New upstream version 18.02
[deb_dpdk.git] / examples / vhost_scsi / vhost_scsi.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2017 Intel Corporation
3  */
4
5 #include <stdint.h>
6 #include <unistd.h>
7 #include <stdbool.h>
8 #include <signal.h>
9 #include <assert.h>
10 #include <semaphore.h>
11 #include <linux/virtio_scsi.h>
12 #include <linux/virtio_ring.h>
13
14 #include <rte_atomic.h>
15 #include <rte_cycles.h>
16 #include <rte_log.h>
17 #include <rte_malloc.h>
18 #include <rte_vhost.h>
19
20 #include "vhost_scsi.h"
21 #include "scsi_spec.h"
22
23 #define VIRTIO_SCSI_FEATURES ((1 << VIRTIO_F_NOTIFY_ON_EMPTY) |\
24                               (1 << VIRTIO_SCSI_F_INOUT) |\
25                               (1 << VIRTIO_SCSI_F_CHANGE))
26
27 /* Path to folder where character device will be created. Can be set by user. */
28 static char dev_pathname[PATH_MAX] = "";
29
30 static struct vhost_scsi_ctrlr *g_vhost_ctrlr;
31 static int g_should_stop;
32 static sem_t exit_sem;
33
34 static struct vhost_scsi_ctrlr *
35 vhost_scsi_ctrlr_find(__rte_unused const char *ctrlr_name)
36 {
37         /* currently we only support 1 socket file fd */
38         return g_vhost_ctrlr;
39 }
40
41 static uint64_t gpa_to_vva(int vid, uint64_t gpa)
42 {
43         char path[PATH_MAX];
44         struct vhost_scsi_ctrlr *ctrlr;
45         int ret = 0;
46
47         ret = rte_vhost_get_ifname(vid, path, PATH_MAX);
48         if (ret) {
49                 fprintf(stderr, "Cannot get socket name\n");
50                 assert(ret != 0);
51         }
52
53         ctrlr = vhost_scsi_ctrlr_find(path);
54         if (!ctrlr) {
55                 fprintf(stderr, "Controller is not ready\n");
56                 assert(ctrlr != NULL);
57         }
58
59         assert(ctrlr->mem != NULL);
60
61         return rte_vhost_gpa_to_vva(ctrlr->mem, gpa);
62 }
63
64 static struct vring_desc *
65 descriptor_get_next(struct vring_desc *vq_desc, struct vring_desc *cur_desc)
66 {
67         return &vq_desc[cur_desc->next];
68 }
69
70 static bool
71 descriptor_has_next(struct vring_desc *cur_desc)
72 {
73         return !!(cur_desc->flags & VRING_DESC_F_NEXT);
74 }
75
76 static bool
77 descriptor_is_wr(struct vring_desc *cur_desc)
78 {
79         return !!(cur_desc->flags & VRING_DESC_F_WRITE);
80 }
81
82 static void
83 submit_completion(struct vhost_scsi_task *task, uint32_t q_idx)
84 {
85         struct rte_vhost_vring *vq;
86         struct vring_used *used;
87
88         vq = task->vq;
89         used = vq->used;
90         /* Fill out the next entry in the "used" ring.  id = the
91          * index of the descriptor that contained the SCSI request.
92          * len = the total amount of data transferred for the SCSI
93          * request. We must report the correct len, for variable
94          * length SCSI CDBs, where we may return less data than
95          * allocated by the guest VM.
96          */
97         used->ring[used->idx & (vq->size - 1)].id = task->req_idx;
98         used->ring[used->idx & (vq->size - 1)].len = task->data_len;
99         used->idx++;
100
101         /* Send an interrupt back to the guest VM so that it knows
102          * a completion is ready to be processed.
103          */
104         rte_vhost_vring_call(task->bdev->vid, q_idx);
105 }
106
107 static void
108 vhost_process_read_payload_chain(struct vhost_scsi_task *task)
109 {
110         void *data;
111
112         task->iovs_cnt = 0;
113         task->resp = (void *)(uintptr_t)gpa_to_vva(task->bdev->vid,
114                                                    task->desc->addr);
115
116         while (descriptor_has_next(task->desc)) {
117                 task->desc = descriptor_get_next(task->vq->desc, task->desc);
118                 data = (void *)(uintptr_t)gpa_to_vva(task->bdev->vid,
119                                                      task->desc->addr);
120                 task->iovs[task->iovs_cnt].iov_base = data;
121                 task->iovs[task->iovs_cnt].iov_len = task->desc->len;
122                 task->data_len += task->desc->len;
123                 task->iovs_cnt++;
124         }
125 }
126
127 static void
128 vhost_process_write_payload_chain(struct vhost_scsi_task *task)
129 {
130         void *data;
131
132         task->iovs_cnt = 0;
133
134         do {
135                 data = (void *)(uintptr_t)gpa_to_vva(task->bdev->vid,
136                                                      task->desc->addr);
137                 task->iovs[task->iovs_cnt].iov_base = data;
138                 task->iovs[task->iovs_cnt].iov_len = task->desc->len;
139                 task->data_len += task->desc->len;
140                 task->iovs_cnt++;
141                 task->desc = descriptor_get_next(task->vq->desc, task->desc);
142         } while (descriptor_has_next(task->desc));
143
144         task->resp = (void *)(uintptr_t)gpa_to_vva(task->bdev->vid,
145                                                    task->desc->addr);
146 }
147
148 static struct vhost_block_dev *
149 vhost_scsi_bdev_construct(const char *bdev_name, const char *bdev_serial,
150                           uint32_t blk_size, uint64_t blk_cnt,
151                           bool wce_enable)
152 {
153         struct vhost_block_dev *bdev;
154
155         bdev = rte_zmalloc(NULL, sizeof(*bdev), RTE_CACHE_LINE_SIZE);
156         if (!bdev)
157                 return NULL;
158
159         strncpy(bdev->name, bdev_name, sizeof(bdev->name));
160         strncpy(bdev->product_name, bdev_serial, sizeof(bdev->product_name));
161         bdev->blocklen = blk_size;
162         bdev->blockcnt = blk_cnt;
163         bdev->write_cache = wce_enable;
164
165         /* use memory as disk storage space */
166         bdev->data = rte_zmalloc(NULL, blk_cnt * blk_size, 0);
167         if (!bdev->data) {
168                 fprintf(stderr, "no enough reseverd huge memory for disk\n");
169                 return NULL;
170         }
171
172         return bdev;
173 }
174
175 static void
176 process_requestq(struct vhost_scsi_ctrlr *ctrlr, uint32_t q_idx)
177 {
178         int ret;
179         struct vhost_scsi_queue *scsi_vq;
180         struct rte_vhost_vring *vq;
181
182         scsi_vq = &ctrlr->bdev->queues[q_idx];
183         vq = &scsi_vq->vq;
184         ret = rte_vhost_get_vhost_vring(ctrlr->bdev->vid, q_idx, vq);
185         assert(ret == 0);
186
187         while (vq->avail->idx != scsi_vq->last_used_idx) {
188                 int req_idx;
189                 uint16_t last_idx;
190                 struct vhost_scsi_task *task;
191
192                 last_idx = scsi_vq->last_used_idx & (vq->size - 1);
193                 req_idx = vq->avail->ring[last_idx];
194
195                 task = rte_zmalloc(NULL, sizeof(*task), 0);
196                 assert(task != NULL);
197
198                 task->ctrlr = ctrlr;
199                 task->bdev = ctrlr->bdev;
200                 task->vq = vq;
201                 task->req_idx = req_idx;
202                 task->desc = &task->vq->desc[task->req_idx];
203
204                 /* does not support indirect descriptors */
205                 assert((task->desc->flags & VRING_DESC_F_INDIRECT) == 0);
206                 scsi_vq->last_used_idx++;
207
208                 task->req = (void *)(uintptr_t)gpa_to_vva(task->bdev->vid,
209                                                           task->desc->addr);
210
211                 task->desc = descriptor_get_next(task->vq->desc, task->desc);
212                 if (!descriptor_has_next(task->desc)) {
213                         task->dxfer_dir = SCSI_DIR_NONE;
214                         task->resp = (void *)(uintptr_t)
215                                               gpa_to_vva(task->bdev->vid,
216                                                          task->desc->addr);
217
218                 } else if (!descriptor_is_wr(task->desc)) {
219                         task->dxfer_dir = SCSI_DIR_TO_DEV;
220                         vhost_process_write_payload_chain(task);
221                 } else {
222                         task->dxfer_dir = SCSI_DIR_FROM_DEV;
223                         vhost_process_read_payload_chain(task);
224                 }
225
226                 ret = vhost_bdev_process_scsi_commands(ctrlr->bdev, task);
227                 if (ret) {
228                         /* invalid response */
229                         task->resp->response = VIRTIO_SCSI_S_BAD_TARGET;
230                 } else {
231                         /* successfully */
232                         task->resp->response = VIRTIO_SCSI_S_OK;
233                         task->resp->status = 0;
234                         task->resp->resid = 0;
235                 }
236                 submit_completion(task, q_idx);
237                 rte_free(task);
238         }
239 }
240
241 /* Main framework for processing IOs */
242 static void *
243 ctrlr_worker(void *arg)
244 {
245         uint32_t idx, num;
246         struct vhost_scsi_ctrlr *ctrlr = (struct vhost_scsi_ctrlr *)arg;
247         cpu_set_t cpuset;
248         pthread_t thread;
249
250         thread = pthread_self();
251         CPU_ZERO(&cpuset);
252         CPU_SET(0, &cpuset);
253         pthread_setaffinity_np(thread, sizeof(cpu_set_t), &cpuset);
254
255         num =  rte_vhost_get_vring_num(ctrlr->bdev->vid);
256         fprintf(stdout, "Ctrlr Worker Thread Started with %u Vring\n", num);
257
258         if (num != NUM_OF_SCSI_QUEUES) {
259                 fprintf(stderr, "Only 1 IO queue are supported\n");
260                 exit(0);
261         }
262
263         while (!g_should_stop && ctrlr->bdev != NULL) {
264                 /* At least 3 vrings, currently only can support 1 IO queue
265                  * Queue 2 for IO queue, does not support TMF and hotplug
266                  * for the example application now
267                  */
268                 for (idx = 2; idx < num; idx++)
269                         process_requestq(ctrlr, idx);
270         }
271
272         fprintf(stdout, "Ctrlr Worker Thread Exiting\n");
273         sem_post(&exit_sem);
274         return NULL;
275 }
276
277 static int
278 new_device(int vid)
279 {
280         char path[PATH_MAX];
281         struct vhost_scsi_ctrlr *ctrlr;
282         struct vhost_scsi_queue *scsi_vq;
283         struct rte_vhost_vring *vq;
284         pthread_t tid;
285         int i, ret;
286
287         ret = rte_vhost_get_ifname(vid, path, PATH_MAX);
288         if (ret) {
289                 fprintf(stderr, "Cannot get socket name\n");
290                 return -1;
291         }
292
293         ctrlr = vhost_scsi_ctrlr_find(path);
294         if (!ctrlr) {
295                 fprintf(stderr, "Controller is not ready\n");
296                 return -1;
297         }
298
299         ret = rte_vhost_get_mem_table(vid, &ctrlr->mem);
300         if (ret) {
301                 fprintf(stderr, "Get Controller memory region failed\n");
302                 return -1;
303         }
304         assert(ctrlr->mem != NULL);
305
306         /* hardcoded block device information with 128MiB */
307         ctrlr->bdev = vhost_scsi_bdev_construct("malloc0", "vhost_scsi_malloc0",
308                                                 4096, 32768, 0);
309         if (!ctrlr->bdev)
310                 return -1;
311
312         ctrlr->bdev->vid = vid;
313
314         /* Disable Notifications */
315         for (i = 0; i < NUM_OF_SCSI_QUEUES; i++) {
316                 rte_vhost_enable_guest_notification(vid, i, 0);
317                 /* restore used index */
318                 scsi_vq = &ctrlr->bdev->queues[i];
319                 vq = &scsi_vq->vq;
320                 ret = rte_vhost_get_vhost_vring(ctrlr->bdev->vid, i, vq);
321                 assert(ret == 0);
322                 scsi_vq->last_used_idx = vq->used->idx;
323                 scsi_vq->last_avail_idx = vq->used->idx;
324         }
325
326         g_should_stop = 0;
327         fprintf(stdout, "New Device %s, Device ID %d\n", path, vid);
328         if (pthread_create(&tid, NULL, &ctrlr_worker, ctrlr) < 0) {
329                 fprintf(stderr, "Worker Thread Started Failed\n");
330                 return -1;
331         }
332         pthread_detach(tid);
333         return 0;
334 }
335
336 static void
337 destroy_device(int vid)
338 {
339         char path[PATH_MAX];
340         struct vhost_scsi_ctrlr *ctrlr;
341
342         rte_vhost_get_ifname(vid, path, PATH_MAX);
343         fprintf(stdout, "Destroy %s Device ID %d\n", path, vid);
344         ctrlr = vhost_scsi_ctrlr_find(path);
345         if (!ctrlr) {
346                 fprintf(stderr, "Destroy Ctrlr Failed\n");
347                 return;
348         }
349         ctrlr->bdev = NULL;
350         g_should_stop = 1;
351
352         sem_wait(&exit_sem);
353 }
354
355 static const struct vhost_device_ops vhost_scsi_device_ops = {
356         .new_device =  new_device,
357         .destroy_device = destroy_device,
358 };
359
360 static struct vhost_scsi_ctrlr *
361 vhost_scsi_ctrlr_construct(const char *ctrlr_name)
362 {
363         int ret;
364         struct vhost_scsi_ctrlr *ctrlr;
365         char *path;
366         char cwd[PATH_MAX];
367
368         /* always use current directory */
369         path = getcwd(cwd, PATH_MAX);
370         if (!path) {
371                 fprintf(stderr, "Cannot get current working directory\n");
372                 return NULL;
373         }
374         snprintf(dev_pathname, sizeof(dev_pathname), "%s/%s", path, ctrlr_name);
375
376         if (access(dev_pathname, F_OK) != -1) {
377                 if (unlink(dev_pathname) != 0)
378                         rte_exit(EXIT_FAILURE, "Cannot remove %s.\n",
379                                  dev_pathname);
380         }
381
382         if (rte_vhost_driver_register(dev_pathname, 0) != 0) {
383                 fprintf(stderr, "socket %s already exists\n", dev_pathname);
384                 return NULL;
385         }
386
387         fprintf(stdout, "socket file: %s created\n", dev_pathname);
388
389         ret = rte_vhost_driver_set_features(dev_pathname, VIRTIO_SCSI_FEATURES);
390         if (ret != 0) {
391                 fprintf(stderr, "Set vhost driver features failed\n");
392                 return NULL;
393         }
394
395         ctrlr = rte_zmalloc(NULL, sizeof(*ctrlr), RTE_CACHE_LINE_SIZE);
396         if (!ctrlr)
397                 return NULL;
398
399         rte_vhost_driver_callback_register(dev_pathname,
400                                            &vhost_scsi_device_ops);
401
402         return ctrlr;
403 }
404
405 static void
406 signal_handler(__rte_unused int signum)
407 {
408
409         if (access(dev_pathname, F_OK) == 0)
410                 unlink(dev_pathname);
411         exit(0);
412 }
413
414 int main(int argc, char *argv[])
415 {
416         int ret;
417
418         signal(SIGINT, signal_handler);
419
420         /* init EAL */
421         ret = rte_eal_init(argc, argv);
422         if (ret < 0)
423                 rte_exit(EXIT_FAILURE, "Error with EAL initialization\n");
424
425         g_vhost_ctrlr = vhost_scsi_ctrlr_construct("vhost.socket");
426         if (g_vhost_ctrlr == NULL) {
427                 fprintf(stderr, "Construct vhost scsi controller failed\n");
428                 return 0;
429         }
430
431         if (sem_init(&exit_sem, 0, 0) < 0) {
432                 fprintf(stderr, "Error init exit_sem\n");
433                 return -1;
434         }
435
436         rte_vhost_driver_start(dev_pathname);
437
438         /* loop for exit the application */
439         while (1)
440                 sleep(1);
441
442         return 0;
443 }
444