New upstream version 18.08
[deb_dpdk.git] / examples / vhost_scsi / vhost_scsi.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2017 Intel Corporation
3  */
4
5 #include <stdint.h>
6 #include <unistd.h>
7 #include <stdbool.h>
8 #include <signal.h>
9 #include <assert.h>
10 #include <semaphore.h>
11 #include <linux/virtio_scsi.h>
12 #include <linux/virtio_ring.h>
13
14 #include <rte_atomic.h>
15 #include <rte_cycles.h>
16 #include <rte_log.h>
17 #include <rte_malloc.h>
18 #include <rte_vhost.h>
19
20 #include "vhost_scsi.h"
21 #include "scsi_spec.h"
22
23 #define VIRTIO_SCSI_FEATURES ((1 << VIRTIO_F_NOTIFY_ON_EMPTY) |\
24                               (1 << VIRTIO_SCSI_F_INOUT) |\
25                               (1 << VIRTIO_SCSI_F_CHANGE))
26
27 /* Path to folder where character device will be created. Can be set by user. */
28 static char dev_pathname[PATH_MAX] = "";
29
30 static struct vhost_scsi_ctrlr *g_vhost_ctrlr;
31 static int g_should_stop;
32 static sem_t exit_sem;
33
34 static struct vhost_scsi_ctrlr *
35 vhost_scsi_ctrlr_find(__rte_unused const char *ctrlr_name)
36 {
37         /* currently we only support 1 socket file fd */
38         return g_vhost_ctrlr;
39 }
40
41 static uint64_t gpa_to_vva(int vid, uint64_t gpa, uint64_t *len)
42 {
43         char path[PATH_MAX];
44         struct vhost_scsi_ctrlr *ctrlr;
45         int ret = 0;
46
47         ret = rte_vhost_get_ifname(vid, path, PATH_MAX);
48         if (ret) {
49                 fprintf(stderr, "Cannot get socket name\n");
50                 assert(ret != 0);
51         }
52
53         ctrlr = vhost_scsi_ctrlr_find(path);
54         if (!ctrlr) {
55                 fprintf(stderr, "Controller is not ready\n");
56                 assert(ctrlr != NULL);
57         }
58
59         assert(ctrlr->mem != NULL);
60
61         return rte_vhost_va_from_guest_pa(ctrlr->mem, gpa, len);
62 }
63
64 static struct vring_desc *
65 descriptor_get_next(struct vring_desc *vq_desc, struct vring_desc *cur_desc)
66 {
67         return &vq_desc[cur_desc->next];
68 }
69
70 static bool
71 descriptor_has_next(struct vring_desc *cur_desc)
72 {
73         return !!(cur_desc->flags & VRING_DESC_F_NEXT);
74 }
75
76 static bool
77 descriptor_is_wr(struct vring_desc *cur_desc)
78 {
79         return !!(cur_desc->flags & VRING_DESC_F_WRITE);
80 }
81
82 static void
83 submit_completion(struct vhost_scsi_task *task, uint32_t q_idx)
84 {
85         struct rte_vhost_vring *vq;
86         struct vring_used *used;
87
88         vq = task->vq;
89         used = vq->used;
90         /* Fill out the next entry in the "used" ring.  id = the
91          * index of the descriptor that contained the SCSI request.
92          * len = the total amount of data transferred for the SCSI
93          * request. We must report the correct len, for variable
94          * length SCSI CDBs, where we may return less data than
95          * allocated by the guest VM.
96          */
97         used->ring[used->idx & (vq->size - 1)].id = task->req_idx;
98         used->ring[used->idx & (vq->size - 1)].len = task->data_len;
99         used->idx++;
100
101         /* Send an interrupt back to the guest VM so that it knows
102          * a completion is ready to be processed.
103          */
104         rte_vhost_vring_call(task->bdev->vid, q_idx);
105 }
106
107 static void
108 vhost_process_read_payload_chain(struct vhost_scsi_task *task)
109 {
110         void *data;
111         uint64_t chunck_len;
112
113         task->iovs_cnt = 0;
114         chunck_len = task->desc->len;
115         task->resp = (void *)(uintptr_t)gpa_to_vva(task->bdev->vid,
116                                                    task->desc->addr,
117                                                    &chunck_len);
118         if (!task->resp || chunck_len != task->desc->len) {
119                 fprintf(stderr, "failed to translate desc address.\n");
120                 return;
121         }
122
123         while (descriptor_has_next(task->desc)) {
124                 task->desc = descriptor_get_next(task->vq->desc, task->desc);
125                 chunck_len = task->desc->len;
126                 data = (void *)(uintptr_t)gpa_to_vva(task->bdev->vid,
127                                                      task->desc->addr,
128                                                          &chunck_len);
129                 if (!data || chunck_len != task->desc->len) {
130                         fprintf(stderr, "failed to translate desc address.\n");
131                         return;
132                 }
133
134                 task->iovs[task->iovs_cnt].iov_base = data;
135                 task->iovs[task->iovs_cnt].iov_len = task->desc->len;
136                 task->data_len += task->desc->len;
137                 task->iovs_cnt++;
138         }
139 }
140
141 static void
142 vhost_process_write_payload_chain(struct vhost_scsi_task *task)
143 {
144         void *data;
145         uint64_t chunck_len;
146
147         task->iovs_cnt = 0;
148
149         do {
150                 chunck_len = task->desc->len;
151                 data = (void *)(uintptr_t)gpa_to_vva(task->bdev->vid,
152                                                      task->desc->addr,
153                                                          &chunck_len);
154                 if (!data || chunck_len != task->desc->len) {
155                         fprintf(stderr, "failed to translate desc address.\n");
156                         return;
157                 }
158
159                 task->iovs[task->iovs_cnt].iov_base = data;
160                 task->iovs[task->iovs_cnt].iov_len = task->desc->len;
161                 task->data_len += task->desc->len;
162                 task->iovs_cnt++;
163                 task->desc = descriptor_get_next(task->vq->desc, task->desc);
164         } while (descriptor_has_next(task->desc));
165
166         chunck_len = task->desc->len;
167         task->resp = (void *)(uintptr_t)gpa_to_vva(task->bdev->vid,
168                                                    task->desc->addr,
169                                                    &chunck_len);
170         if (!task->resp || chunck_len != task->desc->len)
171                 fprintf(stderr, "failed to translate desc address.\n");
172 }
173
174 static struct vhost_block_dev *
175 vhost_scsi_bdev_construct(const char *bdev_name, const char *bdev_serial,
176                           uint32_t blk_size, uint64_t blk_cnt,
177                           bool wce_enable)
178 {
179         struct vhost_block_dev *bdev;
180
181         bdev = rte_zmalloc(NULL, sizeof(*bdev), RTE_CACHE_LINE_SIZE);
182         if (!bdev)
183                 return NULL;
184
185         strncpy(bdev->name, bdev_name, sizeof(bdev->name));
186         strncpy(bdev->product_name, bdev_serial, sizeof(bdev->product_name));
187         bdev->blocklen = blk_size;
188         bdev->blockcnt = blk_cnt;
189         bdev->write_cache = wce_enable;
190
191         /* use memory as disk storage space */
192         bdev->data = rte_zmalloc(NULL, blk_cnt * blk_size, 0);
193         if (!bdev->data) {
194                 fprintf(stderr, "no enough reseverd huge memory for disk\n");
195                 return NULL;
196         }
197
198         return bdev;
199 }
200
201 static void
202 process_requestq(struct vhost_scsi_ctrlr *ctrlr, uint32_t q_idx)
203 {
204         int ret;
205         struct vhost_scsi_queue *scsi_vq;
206         struct rte_vhost_vring *vq;
207
208         scsi_vq = &ctrlr->bdev->queues[q_idx];
209         vq = &scsi_vq->vq;
210         ret = rte_vhost_get_vhost_vring(ctrlr->bdev->vid, q_idx, vq);
211         assert(ret == 0);
212
213         while (vq->avail->idx != scsi_vq->last_used_idx) {
214                 int req_idx;
215                 uint16_t last_idx;
216                 struct vhost_scsi_task *task;
217                 uint64_t chunck_len;
218
219                 last_idx = scsi_vq->last_used_idx & (vq->size - 1);
220                 req_idx = vq->avail->ring[last_idx];
221
222                 task = rte_zmalloc(NULL, sizeof(*task), 0);
223                 assert(task != NULL);
224
225                 task->ctrlr = ctrlr;
226                 task->bdev = ctrlr->bdev;
227                 task->vq = vq;
228                 task->req_idx = req_idx;
229                 task->desc = &task->vq->desc[task->req_idx];
230
231                 /* does not support indirect descriptors */
232                 assert((task->desc->flags & VRING_DESC_F_INDIRECT) == 0);
233                 scsi_vq->last_used_idx++;
234
235                 chunck_len = task->desc->len;
236                 task->req = (void *)(uintptr_t)gpa_to_vva(task->bdev->vid,
237                                                           task->desc->addr,
238                                                           &chunck_len);
239                 if (!task->req || chunck_len != task->desc->len) {
240                         fprintf(stderr, "failed to translate desc address.\n");
241                         return;
242                 }
243
244                 task->desc = descriptor_get_next(task->vq->desc, task->desc);
245                 if (!descriptor_has_next(task->desc)) {
246                         task->dxfer_dir = SCSI_DIR_NONE;
247                         chunck_len = task->desc->len;
248                         task->resp = (void *)(uintptr_t)
249                                               gpa_to_vva(task->bdev->vid,
250                                                          task->desc->addr,
251                                                          &chunck_len);
252                         if (!task->resp || chunck_len != task->desc->len) {
253                                 fprintf(stderr, "failed to translate desc address.\n");
254                                 return;
255                         }
256                 } else if (!descriptor_is_wr(task->desc)) {
257                         task->dxfer_dir = SCSI_DIR_TO_DEV;
258                         vhost_process_write_payload_chain(task);
259                 } else {
260                         task->dxfer_dir = SCSI_DIR_FROM_DEV;
261                         vhost_process_read_payload_chain(task);
262                 }
263
264                 ret = vhost_bdev_process_scsi_commands(ctrlr->bdev, task);
265                 if (ret) {
266                         /* invalid response */
267                         task->resp->response = VIRTIO_SCSI_S_BAD_TARGET;
268                 } else {
269                         /* successfully */
270                         task->resp->response = VIRTIO_SCSI_S_OK;
271                         task->resp->status = 0;
272                         task->resp->resid = 0;
273                 }
274                 submit_completion(task, q_idx);
275                 rte_free(task);
276         }
277 }
278
279 /* Main framework for processing IOs */
280 static void *
281 ctrlr_worker(void *arg)
282 {
283         uint32_t idx, num;
284         struct vhost_scsi_ctrlr *ctrlr = (struct vhost_scsi_ctrlr *)arg;
285         cpu_set_t cpuset;
286         pthread_t thread;
287
288         thread = pthread_self();
289         CPU_ZERO(&cpuset);
290         CPU_SET(0, &cpuset);
291         pthread_setaffinity_np(thread, sizeof(cpu_set_t), &cpuset);
292
293         num =  rte_vhost_get_vring_num(ctrlr->bdev->vid);
294         fprintf(stdout, "Ctrlr Worker Thread Started with %u Vring\n", num);
295
296         if (num != NUM_OF_SCSI_QUEUES) {
297                 fprintf(stderr, "Only 1 IO queue are supported\n");
298                 exit(0);
299         }
300
301         while (!g_should_stop && ctrlr->bdev != NULL) {
302                 /* At least 3 vrings, currently only can support 1 IO queue
303                  * Queue 2 for IO queue, does not support TMF and hotplug
304                  * for the example application now
305                  */
306                 for (idx = 2; idx < num; idx++)
307                         process_requestq(ctrlr, idx);
308         }
309
310         fprintf(stdout, "Ctrlr Worker Thread Exiting\n");
311         sem_post(&exit_sem);
312         return NULL;
313 }
314
315 static int
316 new_device(int vid)
317 {
318         char path[PATH_MAX];
319         struct vhost_scsi_ctrlr *ctrlr;
320         struct vhost_scsi_queue *scsi_vq;
321         struct rte_vhost_vring *vq;
322         pthread_t tid;
323         int i, ret;
324
325         ret = rte_vhost_get_ifname(vid, path, PATH_MAX);
326         if (ret) {
327                 fprintf(stderr, "Cannot get socket name\n");
328                 return -1;
329         }
330
331         ctrlr = vhost_scsi_ctrlr_find(path);
332         if (!ctrlr) {
333                 fprintf(stderr, "Controller is not ready\n");
334                 return -1;
335         }
336
337         ret = rte_vhost_get_mem_table(vid, &ctrlr->mem);
338         if (ret) {
339                 fprintf(stderr, "Get Controller memory region failed\n");
340                 return -1;
341         }
342         assert(ctrlr->mem != NULL);
343
344         /* hardcoded block device information with 128MiB */
345         ctrlr->bdev = vhost_scsi_bdev_construct("malloc0", "vhost_scsi_malloc0",
346                                                 4096, 32768, 0);
347         if (!ctrlr->bdev)
348                 return -1;
349
350         ctrlr->bdev->vid = vid;
351
352         /* Disable Notifications */
353         for (i = 0; i < NUM_OF_SCSI_QUEUES; i++) {
354                 rte_vhost_enable_guest_notification(vid, i, 0);
355                 /* restore used index */
356                 scsi_vq = &ctrlr->bdev->queues[i];
357                 vq = &scsi_vq->vq;
358                 ret = rte_vhost_get_vhost_vring(ctrlr->bdev->vid, i, vq);
359                 assert(ret == 0);
360                 scsi_vq->last_used_idx = vq->used->idx;
361                 scsi_vq->last_avail_idx = vq->used->idx;
362         }
363
364         g_should_stop = 0;
365         fprintf(stdout, "New Device %s, Device ID %d\n", path, vid);
366         if (pthread_create(&tid, NULL, &ctrlr_worker, ctrlr) < 0) {
367                 fprintf(stderr, "Worker Thread Started Failed\n");
368                 return -1;
369         }
370         pthread_detach(tid);
371         return 0;
372 }
373
374 static void
375 destroy_device(int vid)
376 {
377         char path[PATH_MAX];
378         struct vhost_scsi_ctrlr *ctrlr;
379
380         rte_vhost_get_ifname(vid, path, PATH_MAX);
381         fprintf(stdout, "Destroy %s Device ID %d\n", path, vid);
382         ctrlr = vhost_scsi_ctrlr_find(path);
383         if (!ctrlr) {
384                 fprintf(stderr, "Destroy Ctrlr Failed\n");
385                 return;
386         }
387         ctrlr->bdev = NULL;
388         g_should_stop = 1;
389
390         sem_wait(&exit_sem);
391 }
392
393 static const struct vhost_device_ops vhost_scsi_device_ops = {
394         .new_device =  new_device,
395         .destroy_device = destroy_device,
396 };
397
398 static struct vhost_scsi_ctrlr *
399 vhost_scsi_ctrlr_construct(const char *ctrlr_name)
400 {
401         int ret;
402         struct vhost_scsi_ctrlr *ctrlr;
403         char *path;
404         char cwd[PATH_MAX];
405
406         /* always use current directory */
407         path = getcwd(cwd, PATH_MAX);
408         if (!path) {
409                 fprintf(stderr, "Cannot get current working directory\n");
410                 return NULL;
411         }
412         snprintf(dev_pathname, sizeof(dev_pathname), "%s/%s", path, ctrlr_name);
413
414         if (access(dev_pathname, F_OK) != -1) {
415                 if (unlink(dev_pathname) != 0)
416                         rte_exit(EXIT_FAILURE, "Cannot remove %s.\n",
417                                  dev_pathname);
418         }
419
420         if (rte_vhost_driver_register(dev_pathname, 0) != 0) {
421                 fprintf(stderr, "socket %s already exists\n", dev_pathname);
422                 return NULL;
423         }
424
425         fprintf(stdout, "socket file: %s created\n", dev_pathname);
426
427         ret = rte_vhost_driver_set_features(dev_pathname, VIRTIO_SCSI_FEATURES);
428         if (ret != 0) {
429                 fprintf(stderr, "Set vhost driver features failed\n");
430                 return NULL;
431         }
432
433         ctrlr = rte_zmalloc(NULL, sizeof(*ctrlr), RTE_CACHE_LINE_SIZE);
434         if (!ctrlr)
435                 return NULL;
436
437         rte_vhost_driver_callback_register(dev_pathname,
438                                            &vhost_scsi_device_ops);
439
440         return ctrlr;
441 }
442
443 static void
444 signal_handler(__rte_unused int signum)
445 {
446
447         if (access(dev_pathname, F_OK) == 0)
448                 unlink(dev_pathname);
449         exit(0);
450 }
451
452 int main(int argc, char *argv[])
453 {
454         int ret;
455
456         signal(SIGINT, signal_handler);
457
458         /* init EAL */
459         ret = rte_eal_init(argc, argv);
460         if (ret < 0)
461                 rte_exit(EXIT_FAILURE, "Error with EAL initialization\n");
462
463         g_vhost_ctrlr = vhost_scsi_ctrlr_construct("vhost.socket");
464         if (g_vhost_ctrlr == NULL) {
465                 fprintf(stderr, "Construct vhost scsi controller failed\n");
466                 return 0;
467         }
468
469         if (sem_init(&exit_sem, 0, 0) < 0) {
470                 fprintf(stderr, "Error init exit_sem\n");
471                 return -1;
472         }
473
474         rte_vhost_driver_start(dev_pathname);
475
476         /* loop for exit the application */
477         while (1)
478                 sleep(1);
479
480         return 0;
481 }
482