New upstream version 18.08
[deb_dpdk.git] / lib / librte_vhost / socket.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2016 Intel Corporation
3  */
4
5 #include <stdint.h>
6 #include <stdio.h>
7 #include <limits.h>
8 #include <stdlib.h>
9 #include <unistd.h>
10 #include <string.h>
11 #include <sys/types.h>
12 #include <sys/socket.h>
13 #include <sys/un.h>
14 #include <sys/queue.h>
15 #include <errno.h>
16 #include <fcntl.h>
17 #include <pthread.h>
18
19 #include <rte_log.h>
20
21 #include "fd_man.h"
22 #include "vhost.h"
23 #include "vhost_user.h"
24
25
26 TAILQ_HEAD(vhost_user_connection_list, vhost_user_connection);
27
28 /*
29  * Every time rte_vhost_driver_register() is invoked, an associated
30  * vhost_user_socket struct will be created.
31  */
32 struct vhost_user_socket {
33         struct vhost_user_connection_list conn_list;
34         pthread_mutex_t conn_mutex;
35         char *path;
36         int socket_fd;
37         struct sockaddr_un un;
38         bool is_server;
39         bool reconnect;
40         bool dequeue_zero_copy;
41         bool iommu_support;
42         bool use_builtin_virtio_net;
43
44         /*
45          * The "supported_features" indicates the feature bits the
46          * vhost driver supports. The "features" indicates the feature
47          * bits after the rte_vhost_driver_features_disable/enable().
48          * It is also the final feature bits used for vhost-user
49          * features negotiation.
50          */
51         uint64_t supported_features;
52         uint64_t features;
53
54         /*
55          * Device id to identify a specific backend device.
56          * It's set to -1 for the default software implementation.
57          * If valid, one socket can have 1 connection only.
58          */
59         int vdpa_dev_id;
60
61         struct vhost_device_ops const *notify_ops;
62 };
63
64 struct vhost_user_connection {
65         struct vhost_user_socket *vsocket;
66         int connfd;
67         int vid;
68
69         TAILQ_ENTRY(vhost_user_connection) next;
70 };
71
72 #define MAX_VHOST_SOCKET 1024
73 struct vhost_user {
74         struct vhost_user_socket *vsockets[MAX_VHOST_SOCKET];
75         struct fdset fdset;
76         int vsocket_cnt;
77         pthread_mutex_t mutex;
78 };
79
80 #define MAX_VIRTIO_BACKLOG 128
81
82 static void vhost_user_server_new_connection(int fd, void *data, int *remove);
83 static void vhost_user_read_cb(int fd, void *dat, int *remove);
84 static int create_unix_socket(struct vhost_user_socket *vsocket);
85 static int vhost_user_start_client(struct vhost_user_socket *vsocket);
86
87 static struct vhost_user vhost_user = {
88         .fdset = {
89                 .fd = { [0 ... MAX_FDS - 1] = {-1, NULL, NULL, NULL, 0} },
90                 .fd_mutex = PTHREAD_MUTEX_INITIALIZER,
91                 .num = 0
92         },
93         .vsocket_cnt = 0,
94         .mutex = PTHREAD_MUTEX_INITIALIZER,
95 };
96
97 /* return bytes# of read on success or negative val on failure. */
98 int
99 read_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num)
100 {
101         struct iovec iov;
102         struct msghdr msgh;
103         size_t fdsize = fd_num * sizeof(int);
104         char control[CMSG_SPACE(fdsize)];
105         struct cmsghdr *cmsg;
106         int got_fds = 0;
107         int ret;
108
109         memset(&msgh, 0, sizeof(msgh));
110         iov.iov_base = buf;
111         iov.iov_len  = buflen;
112
113         msgh.msg_iov = &iov;
114         msgh.msg_iovlen = 1;
115         msgh.msg_control = control;
116         msgh.msg_controllen = sizeof(control);
117
118         ret = recvmsg(sockfd, &msgh, 0);
119         if (ret <= 0) {
120                 RTE_LOG(ERR, VHOST_CONFIG, "recvmsg failed\n");
121                 return ret;
122         }
123
124         if (msgh.msg_flags & (MSG_TRUNC | MSG_CTRUNC)) {
125                 RTE_LOG(ERR, VHOST_CONFIG, "truncted msg\n");
126                 return -1;
127         }
128
129         for (cmsg = CMSG_FIRSTHDR(&msgh); cmsg != NULL;
130                 cmsg = CMSG_NXTHDR(&msgh, cmsg)) {
131                 if ((cmsg->cmsg_level == SOL_SOCKET) &&
132                         (cmsg->cmsg_type == SCM_RIGHTS)) {
133                         got_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
134                         memcpy(fds, CMSG_DATA(cmsg), got_fds * sizeof(int));
135                         break;
136                 }
137         }
138
139         /* Clear out unused file descriptors */
140         while (got_fds < fd_num)
141                 fds[got_fds++] = -1;
142
143         return ret;
144 }
145
146 int
147 send_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num)
148 {
149
150         struct iovec iov;
151         struct msghdr msgh;
152         size_t fdsize = fd_num * sizeof(int);
153         char control[CMSG_SPACE(fdsize)];
154         struct cmsghdr *cmsg;
155         int ret;
156
157         memset(&msgh, 0, sizeof(msgh));
158         iov.iov_base = buf;
159         iov.iov_len = buflen;
160
161         msgh.msg_iov = &iov;
162         msgh.msg_iovlen = 1;
163
164         if (fds && fd_num > 0) {
165                 msgh.msg_control = control;
166                 msgh.msg_controllen = sizeof(control);
167                 cmsg = CMSG_FIRSTHDR(&msgh);
168                 if (cmsg == NULL) {
169                         RTE_LOG(ERR, VHOST_CONFIG, "cmsg == NULL\n");
170                         errno = EINVAL;
171                         return -1;
172                 }
173                 cmsg->cmsg_len = CMSG_LEN(fdsize);
174                 cmsg->cmsg_level = SOL_SOCKET;
175                 cmsg->cmsg_type = SCM_RIGHTS;
176                 memcpy(CMSG_DATA(cmsg), fds, fdsize);
177         } else {
178                 msgh.msg_control = NULL;
179                 msgh.msg_controllen = 0;
180         }
181
182         do {
183                 ret = sendmsg(sockfd, &msgh, MSG_NOSIGNAL);
184         } while (ret < 0 && errno == EINTR);
185
186         if (ret < 0) {
187                 RTE_LOG(ERR, VHOST_CONFIG,  "sendmsg error\n");
188                 return ret;
189         }
190
191         return ret;
192 }
193
194 static void
195 vhost_user_add_connection(int fd, struct vhost_user_socket *vsocket)
196 {
197         int vid;
198         size_t size;
199         struct vhost_user_connection *conn;
200         int ret;
201
202         if (vsocket == NULL)
203                 return;
204
205         conn = malloc(sizeof(*conn));
206         if (conn == NULL) {
207                 close(fd);
208                 return;
209         }
210
211         vid = vhost_new_device();
212         if (vid == -1) {
213                 goto err;
214         }
215
216         size = strnlen(vsocket->path, PATH_MAX);
217         vhost_set_ifname(vid, vsocket->path, size);
218
219         vhost_set_builtin_virtio_net(vid, vsocket->use_builtin_virtio_net);
220
221         vhost_attach_vdpa_device(vid, vsocket->vdpa_dev_id);
222
223         if (vsocket->dequeue_zero_copy)
224                 vhost_enable_dequeue_zero_copy(vid);
225
226         RTE_LOG(INFO, VHOST_CONFIG, "new device, handle is %d\n", vid);
227
228         if (vsocket->notify_ops->new_connection) {
229                 ret = vsocket->notify_ops->new_connection(vid);
230                 if (ret < 0) {
231                         RTE_LOG(ERR, VHOST_CONFIG,
232                                 "failed to add vhost user connection with fd %d\n",
233                                 fd);
234                         goto err;
235                 }
236         }
237
238         conn->connfd = fd;
239         conn->vsocket = vsocket;
240         conn->vid = vid;
241         ret = fdset_add(&vhost_user.fdset, fd, vhost_user_read_cb,
242                         NULL, conn);
243         if (ret < 0) {
244                 RTE_LOG(ERR, VHOST_CONFIG,
245                         "failed to add fd %d into vhost server fdset\n",
246                         fd);
247
248                 if (vsocket->notify_ops->destroy_connection)
249                         vsocket->notify_ops->destroy_connection(conn->vid);
250
251                 goto err;
252         }
253
254         pthread_mutex_lock(&vsocket->conn_mutex);
255         TAILQ_INSERT_TAIL(&vsocket->conn_list, conn, next);
256         pthread_mutex_unlock(&vsocket->conn_mutex);
257
258         fdset_pipe_notify(&vhost_user.fdset);
259         return;
260
261 err:
262         free(conn);
263         close(fd);
264 }
265
266 /* call back when there is new vhost-user connection from client  */
267 static void
268 vhost_user_server_new_connection(int fd, void *dat, int *remove __rte_unused)
269 {
270         struct vhost_user_socket *vsocket = dat;
271
272         fd = accept(fd, NULL, NULL);
273         if (fd < 0)
274                 return;
275
276         RTE_LOG(INFO, VHOST_CONFIG, "new vhost user connection is %d\n", fd);
277         vhost_user_add_connection(fd, vsocket);
278 }
279
280 static void
281 vhost_user_read_cb(int connfd, void *dat, int *remove)
282 {
283         struct vhost_user_connection *conn = dat;
284         struct vhost_user_socket *vsocket = conn->vsocket;
285         int ret;
286
287         ret = vhost_user_msg_handler(conn->vid, connfd);
288         if (ret < 0) {
289                 close(connfd);
290                 *remove = 1;
291                 vhost_destroy_device(conn->vid);
292
293                 if (vsocket->notify_ops->destroy_connection)
294                         vsocket->notify_ops->destroy_connection(conn->vid);
295
296                 pthread_mutex_lock(&vsocket->conn_mutex);
297                 TAILQ_REMOVE(&vsocket->conn_list, conn, next);
298                 pthread_mutex_unlock(&vsocket->conn_mutex);
299
300                 free(conn);
301
302                 if (vsocket->reconnect) {
303                         create_unix_socket(vsocket);
304                         vhost_user_start_client(vsocket);
305                 }
306         }
307 }
308
309 static int
310 create_unix_socket(struct vhost_user_socket *vsocket)
311 {
312         int fd;
313         struct sockaddr_un *un = &vsocket->un;
314
315         fd = socket(AF_UNIX, SOCK_STREAM, 0);
316         if (fd < 0)
317                 return -1;
318         RTE_LOG(INFO, VHOST_CONFIG, "vhost-user %s: socket created, fd: %d\n",
319                 vsocket->is_server ? "server" : "client", fd);
320
321         if (!vsocket->is_server && fcntl(fd, F_SETFL, O_NONBLOCK)) {
322                 RTE_LOG(ERR, VHOST_CONFIG,
323                         "vhost-user: can't set nonblocking mode for socket, fd: "
324                         "%d (%s)\n", fd, strerror(errno));
325                 close(fd);
326                 return -1;
327         }
328
329         memset(un, 0, sizeof(*un));
330         un->sun_family = AF_UNIX;
331         strncpy(un->sun_path, vsocket->path, sizeof(un->sun_path));
332         un->sun_path[sizeof(un->sun_path) - 1] = '\0';
333
334         vsocket->socket_fd = fd;
335         return 0;
336 }
337
338 static int
339 vhost_user_start_server(struct vhost_user_socket *vsocket)
340 {
341         int ret;
342         int fd = vsocket->socket_fd;
343         const char *path = vsocket->path;
344
345         /*
346          * bind () may fail if the socket file with the same name already
347          * exists. But the library obviously should not delete the file
348          * provided by the user, since we can not be sure that it is not
349          * being used by other applications. Moreover, many applications form
350          * socket names based on user input, which is prone to errors.
351          *
352          * The user must ensure that the socket does not exist before
353          * registering the vhost driver in server mode.
354          */
355         ret = bind(fd, (struct sockaddr *)&vsocket->un, sizeof(vsocket->un));
356         if (ret < 0) {
357                 RTE_LOG(ERR, VHOST_CONFIG,
358                         "failed to bind to %s: %s; remove it and try again\n",
359                         path, strerror(errno));
360                 goto err;
361         }
362         RTE_LOG(INFO, VHOST_CONFIG, "bind to %s\n", path);
363
364         ret = listen(fd, MAX_VIRTIO_BACKLOG);
365         if (ret < 0)
366                 goto err;
367
368         ret = fdset_add(&vhost_user.fdset, fd, vhost_user_server_new_connection,
369                   NULL, vsocket);
370         if (ret < 0) {
371                 RTE_LOG(ERR, VHOST_CONFIG,
372                         "failed to add listen fd %d to vhost server fdset\n",
373                         fd);
374                 goto err;
375         }
376
377         return 0;
378
379 err:
380         close(fd);
381         return -1;
382 }
383
384 struct vhost_user_reconnect {
385         struct sockaddr_un un;
386         int fd;
387         struct vhost_user_socket *vsocket;
388
389         TAILQ_ENTRY(vhost_user_reconnect) next;
390 };
391
392 TAILQ_HEAD(vhost_user_reconnect_tailq_list, vhost_user_reconnect);
393 struct vhost_user_reconnect_list {
394         struct vhost_user_reconnect_tailq_list head;
395         pthread_mutex_t mutex;
396 };
397
398 static struct vhost_user_reconnect_list reconn_list;
399 static pthread_t reconn_tid;
400
401 static int
402 vhost_user_connect_nonblock(int fd, struct sockaddr *un, size_t sz)
403 {
404         int ret, flags;
405
406         ret = connect(fd, un, sz);
407         if (ret < 0 && errno != EISCONN)
408                 return -1;
409
410         flags = fcntl(fd, F_GETFL, 0);
411         if (flags < 0) {
412                 RTE_LOG(ERR, VHOST_CONFIG,
413                         "can't get flags for connfd %d\n", fd);
414                 return -2;
415         }
416         if ((flags & O_NONBLOCK) && fcntl(fd, F_SETFL, flags & ~O_NONBLOCK)) {
417                 RTE_LOG(ERR, VHOST_CONFIG,
418                                 "can't disable nonblocking on fd %d\n", fd);
419                 return -2;
420         }
421         return 0;
422 }
423
424 static void *
425 vhost_user_client_reconnect(void *arg __rte_unused)
426 {
427         int ret;
428         struct vhost_user_reconnect *reconn, *next;
429
430         while (1) {
431                 pthread_mutex_lock(&reconn_list.mutex);
432
433                 /*
434                  * An equal implementation of TAILQ_FOREACH_SAFE,
435                  * which does not exist on all platforms.
436                  */
437                 for (reconn = TAILQ_FIRST(&reconn_list.head);
438                      reconn != NULL; reconn = next) {
439                         next = TAILQ_NEXT(reconn, next);
440
441                         ret = vhost_user_connect_nonblock(reconn->fd,
442                                                 (struct sockaddr *)&reconn->un,
443                                                 sizeof(reconn->un));
444                         if (ret == -2) {
445                                 close(reconn->fd);
446                                 RTE_LOG(ERR, VHOST_CONFIG,
447                                         "reconnection for fd %d failed\n",
448                                         reconn->fd);
449                                 goto remove_fd;
450                         }
451                         if (ret == -1)
452                                 continue;
453
454                         RTE_LOG(INFO, VHOST_CONFIG,
455                                 "%s: connected\n", reconn->vsocket->path);
456                         vhost_user_add_connection(reconn->fd, reconn->vsocket);
457 remove_fd:
458                         TAILQ_REMOVE(&reconn_list.head, reconn, next);
459                         free(reconn);
460                 }
461
462                 pthread_mutex_unlock(&reconn_list.mutex);
463                 sleep(1);
464         }
465
466         return NULL;
467 }
468
469 static int
470 vhost_user_reconnect_init(void)
471 {
472         int ret;
473
474         ret = pthread_mutex_init(&reconn_list.mutex, NULL);
475         if (ret < 0) {
476                 RTE_LOG(ERR, VHOST_CONFIG, "failed to initialize mutex");
477                 return ret;
478         }
479         TAILQ_INIT(&reconn_list.head);
480
481         ret = rte_ctrl_thread_create(&reconn_tid, "vhost_reconn", NULL,
482                              vhost_user_client_reconnect, NULL);
483         if (ret != 0) {
484                 RTE_LOG(ERR, VHOST_CONFIG, "failed to create reconnect thread");
485                 if (pthread_mutex_destroy(&reconn_list.mutex)) {
486                         RTE_LOG(ERR, VHOST_CONFIG,
487                                 "failed to destroy reconnect mutex");
488                 }
489         }
490
491         return ret;
492 }
493
494 static int
495 vhost_user_start_client(struct vhost_user_socket *vsocket)
496 {
497         int ret;
498         int fd = vsocket->socket_fd;
499         const char *path = vsocket->path;
500         struct vhost_user_reconnect *reconn;
501
502         ret = vhost_user_connect_nonblock(fd, (struct sockaddr *)&vsocket->un,
503                                           sizeof(vsocket->un));
504         if (ret == 0) {
505                 vhost_user_add_connection(fd, vsocket);
506                 return 0;
507         }
508
509         RTE_LOG(WARNING, VHOST_CONFIG,
510                 "failed to connect to %s: %s\n",
511                 path, strerror(errno));
512
513         if (ret == -2 || !vsocket->reconnect) {
514                 close(fd);
515                 return -1;
516         }
517
518         RTE_LOG(INFO, VHOST_CONFIG, "%s: reconnecting...\n", path);
519         reconn = malloc(sizeof(*reconn));
520         if (reconn == NULL) {
521                 RTE_LOG(ERR, VHOST_CONFIG,
522                         "failed to allocate memory for reconnect\n");
523                 close(fd);
524                 return -1;
525         }
526         reconn->un = vsocket->un;
527         reconn->fd = fd;
528         reconn->vsocket = vsocket;
529         pthread_mutex_lock(&reconn_list.mutex);
530         TAILQ_INSERT_TAIL(&reconn_list.head, reconn, next);
531         pthread_mutex_unlock(&reconn_list.mutex);
532
533         return 0;
534 }
535
536 static struct vhost_user_socket *
537 find_vhost_user_socket(const char *path)
538 {
539         int i;
540
541         for (i = 0; i < vhost_user.vsocket_cnt; i++) {
542                 struct vhost_user_socket *vsocket = vhost_user.vsockets[i];
543
544                 if (!strcmp(vsocket->path, path))
545                         return vsocket;
546         }
547
548         return NULL;
549 }
550
551 int
552 rte_vhost_driver_attach_vdpa_device(const char *path, int did)
553 {
554         struct vhost_user_socket *vsocket;
555
556         if (rte_vdpa_get_device(did) == NULL)
557                 return -1;
558
559         pthread_mutex_lock(&vhost_user.mutex);
560         vsocket = find_vhost_user_socket(path);
561         if (vsocket)
562                 vsocket->vdpa_dev_id = did;
563         pthread_mutex_unlock(&vhost_user.mutex);
564
565         return vsocket ? 0 : -1;
566 }
567
568 int
569 rte_vhost_driver_detach_vdpa_device(const char *path)
570 {
571         struct vhost_user_socket *vsocket;
572
573         pthread_mutex_lock(&vhost_user.mutex);
574         vsocket = find_vhost_user_socket(path);
575         if (vsocket)
576                 vsocket->vdpa_dev_id = -1;
577         pthread_mutex_unlock(&vhost_user.mutex);
578
579         return vsocket ? 0 : -1;
580 }
581
582 int
583 rte_vhost_driver_get_vdpa_device_id(const char *path)
584 {
585         struct vhost_user_socket *vsocket;
586         int did = -1;
587
588         pthread_mutex_lock(&vhost_user.mutex);
589         vsocket = find_vhost_user_socket(path);
590         if (vsocket)
591                 did = vsocket->vdpa_dev_id;
592         pthread_mutex_unlock(&vhost_user.mutex);
593
594         return did;
595 }
596
597 int
598 rte_vhost_driver_disable_features(const char *path, uint64_t features)
599 {
600         struct vhost_user_socket *vsocket;
601
602         pthread_mutex_lock(&vhost_user.mutex);
603         vsocket = find_vhost_user_socket(path);
604
605         /* Note that use_builtin_virtio_net is not affected by this function
606          * since callers may want to selectively disable features of the
607          * built-in vhost net device backend.
608          */
609
610         if (vsocket)
611                 vsocket->features &= ~features;
612         pthread_mutex_unlock(&vhost_user.mutex);
613
614         return vsocket ? 0 : -1;
615 }
616
617 int
618 rte_vhost_driver_enable_features(const char *path, uint64_t features)
619 {
620         struct vhost_user_socket *vsocket;
621
622         pthread_mutex_lock(&vhost_user.mutex);
623         vsocket = find_vhost_user_socket(path);
624         if (vsocket) {
625                 if ((vsocket->supported_features & features) != features) {
626                         /*
627                          * trying to enable features the driver doesn't
628                          * support.
629                          */
630                         pthread_mutex_unlock(&vhost_user.mutex);
631                         return -1;
632                 }
633                 vsocket->features |= features;
634         }
635         pthread_mutex_unlock(&vhost_user.mutex);
636
637         return vsocket ? 0 : -1;
638 }
639
640 int
641 rte_vhost_driver_set_features(const char *path, uint64_t features)
642 {
643         struct vhost_user_socket *vsocket;
644
645         pthread_mutex_lock(&vhost_user.mutex);
646         vsocket = find_vhost_user_socket(path);
647         if (vsocket) {
648                 vsocket->supported_features = features;
649                 vsocket->features = features;
650
651                 /* Anyone setting feature bits is implementing their own vhost
652                  * device backend.
653                  */
654                 vsocket->use_builtin_virtio_net = false;
655         }
656         pthread_mutex_unlock(&vhost_user.mutex);
657
658         return vsocket ? 0 : -1;
659 }
660
661 int
662 rte_vhost_driver_get_features(const char *path, uint64_t *features)
663 {
664         struct vhost_user_socket *vsocket;
665         uint64_t vdpa_features;
666         struct rte_vdpa_device *vdpa_dev;
667         int did = -1;
668         int ret = 0;
669
670         pthread_mutex_lock(&vhost_user.mutex);
671         vsocket = find_vhost_user_socket(path);
672         if (!vsocket) {
673                 RTE_LOG(ERR, VHOST_CONFIG,
674                         "socket file %s is not registered yet.\n", path);
675                 ret = -1;
676                 goto unlock_exit;
677         }
678
679         did = vsocket->vdpa_dev_id;
680         vdpa_dev = rte_vdpa_get_device(did);
681         if (!vdpa_dev || !vdpa_dev->ops->get_features) {
682                 *features = vsocket->features;
683                 goto unlock_exit;
684         }
685
686         if (vdpa_dev->ops->get_features(did, &vdpa_features) < 0) {
687                 RTE_LOG(ERR, VHOST_CONFIG,
688                                 "failed to get vdpa features "
689                                 "for socket file %s.\n", path);
690                 ret = -1;
691                 goto unlock_exit;
692         }
693
694         *features = vsocket->features & vdpa_features;
695
696 unlock_exit:
697         pthread_mutex_unlock(&vhost_user.mutex);
698         return ret;
699 }
700
701 int
702 rte_vhost_driver_get_protocol_features(const char *path,
703                 uint64_t *protocol_features)
704 {
705         struct vhost_user_socket *vsocket;
706         uint64_t vdpa_protocol_features;
707         struct rte_vdpa_device *vdpa_dev;
708         int did = -1;
709         int ret = 0;
710
711         pthread_mutex_lock(&vhost_user.mutex);
712         vsocket = find_vhost_user_socket(path);
713         if (!vsocket) {
714                 RTE_LOG(ERR, VHOST_CONFIG,
715                         "socket file %s is not registered yet.\n", path);
716                 ret = -1;
717                 goto unlock_exit;
718         }
719
720         did = vsocket->vdpa_dev_id;
721         vdpa_dev = rte_vdpa_get_device(did);
722         if (!vdpa_dev || !vdpa_dev->ops->get_protocol_features) {
723                 *protocol_features = VHOST_USER_PROTOCOL_FEATURES;
724                 goto unlock_exit;
725         }
726
727         if (vdpa_dev->ops->get_protocol_features(did,
728                                 &vdpa_protocol_features) < 0) {
729                 RTE_LOG(ERR, VHOST_CONFIG,
730                                 "failed to get vdpa protocol features "
731                                 "for socket file %s.\n", path);
732                 ret = -1;
733                 goto unlock_exit;
734         }
735
736         *protocol_features = VHOST_USER_PROTOCOL_FEATURES
737                 & vdpa_protocol_features;
738
739 unlock_exit:
740         pthread_mutex_unlock(&vhost_user.mutex);
741         return ret;
742 }
743
744 int
745 rte_vhost_driver_get_queue_num(const char *path, uint32_t *queue_num)
746 {
747         struct vhost_user_socket *vsocket;
748         uint32_t vdpa_queue_num;
749         struct rte_vdpa_device *vdpa_dev;
750         int did = -1;
751         int ret = 0;
752
753         pthread_mutex_lock(&vhost_user.mutex);
754         vsocket = find_vhost_user_socket(path);
755         if (!vsocket) {
756                 RTE_LOG(ERR, VHOST_CONFIG,
757                         "socket file %s is not registered yet.\n", path);
758                 ret = -1;
759                 goto unlock_exit;
760         }
761
762         did = vsocket->vdpa_dev_id;
763         vdpa_dev = rte_vdpa_get_device(did);
764         if (!vdpa_dev || !vdpa_dev->ops->get_queue_num) {
765                 *queue_num = VHOST_MAX_QUEUE_PAIRS;
766                 goto unlock_exit;
767         }
768
769         if (vdpa_dev->ops->get_queue_num(did, &vdpa_queue_num) < 0) {
770                 RTE_LOG(ERR, VHOST_CONFIG,
771                                 "failed to get vdpa queue number "
772                                 "for socket file %s.\n", path);
773                 ret = -1;
774                 goto unlock_exit;
775         }
776
777         *queue_num = RTE_MIN((uint32_t)VHOST_MAX_QUEUE_PAIRS, vdpa_queue_num);
778
779 unlock_exit:
780         pthread_mutex_unlock(&vhost_user.mutex);
781         return ret;
782 }
783
784 static void
785 vhost_user_socket_mem_free(struct vhost_user_socket *vsocket)
786 {
787         if (vsocket && vsocket->path) {
788                 free(vsocket->path);
789                 vsocket->path = NULL;
790         }
791
792         if (vsocket) {
793                 free(vsocket);
794                 vsocket = NULL;
795         }
796 }
797
798 /*
799  * Register a new vhost-user socket; here we could act as server
800  * (the default case), or client (when RTE_VHOST_USER_CLIENT) flag
801  * is set.
802  */
803 int
804 rte_vhost_driver_register(const char *path, uint64_t flags)
805 {
806         int ret = -1;
807         struct vhost_user_socket *vsocket;
808
809         if (!path)
810                 return -1;
811
812         pthread_mutex_lock(&vhost_user.mutex);
813
814         if (vhost_user.vsocket_cnt == MAX_VHOST_SOCKET) {
815                 RTE_LOG(ERR, VHOST_CONFIG,
816                         "error: the number of vhost sockets reaches maximum\n");
817                 goto out;
818         }
819
820         vsocket = malloc(sizeof(struct vhost_user_socket));
821         if (!vsocket)
822                 goto out;
823         memset(vsocket, 0, sizeof(struct vhost_user_socket));
824         vsocket->path = strdup(path);
825         if (vsocket->path == NULL) {
826                 RTE_LOG(ERR, VHOST_CONFIG,
827                         "error: failed to copy socket path string\n");
828                 vhost_user_socket_mem_free(vsocket);
829                 goto out;
830         }
831         TAILQ_INIT(&vsocket->conn_list);
832         ret = pthread_mutex_init(&vsocket->conn_mutex, NULL);
833         if (ret) {
834                 RTE_LOG(ERR, VHOST_CONFIG,
835                         "error: failed to init connection mutex\n");
836                 goto out_free;
837         }
838         vsocket->dequeue_zero_copy = flags & RTE_VHOST_USER_DEQUEUE_ZERO_COPY;
839
840         /*
841          * Set the supported features correctly for the builtin vhost-user
842          * net driver.
843          *
844          * Applications know nothing about features the builtin virtio net
845          * driver (virtio_net.c) supports, thus it's not possible for them
846          * to invoke rte_vhost_driver_set_features(). To workaround it, here
847          * we set it unconditionally. If the application want to implement
848          * another vhost-user driver (say SCSI), it should call the
849          * rte_vhost_driver_set_features(), which will overwrite following
850          * two values.
851          */
852         vsocket->use_builtin_virtio_net = true;
853         vsocket->supported_features = VIRTIO_NET_SUPPORTED_FEATURES;
854         vsocket->features           = VIRTIO_NET_SUPPORTED_FEATURES;
855
856         /* Dequeue zero copy can't assure descriptors returned in order */
857         if (vsocket->dequeue_zero_copy) {
858                 vsocket->supported_features &= ~(1ULL << VIRTIO_F_IN_ORDER);
859                 vsocket->features &= ~(1ULL << VIRTIO_F_IN_ORDER);
860         }
861
862         if (!(flags & RTE_VHOST_USER_IOMMU_SUPPORT)) {
863                 vsocket->supported_features &= ~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
864                 vsocket->features &= ~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
865         }
866
867         if ((flags & RTE_VHOST_USER_CLIENT) != 0) {
868                 vsocket->reconnect = !(flags & RTE_VHOST_USER_NO_RECONNECT);
869                 if (vsocket->reconnect && reconn_tid == 0) {
870                         if (vhost_user_reconnect_init() != 0)
871                                 goto out_mutex;
872                 }
873         } else {
874                 vsocket->is_server = true;
875         }
876         ret = create_unix_socket(vsocket);
877         if (ret < 0) {
878                 goto out_mutex;
879         }
880
881         vhost_user.vsockets[vhost_user.vsocket_cnt++] = vsocket;
882
883         pthread_mutex_unlock(&vhost_user.mutex);
884         return ret;
885
886 out_mutex:
887         if (pthread_mutex_destroy(&vsocket->conn_mutex)) {
888                 RTE_LOG(ERR, VHOST_CONFIG,
889                         "error: failed to destroy connection mutex\n");
890         }
891 out_free:
892         vhost_user_socket_mem_free(vsocket);
893 out:
894         pthread_mutex_unlock(&vhost_user.mutex);
895
896         return ret;
897 }
898
899 static bool
900 vhost_user_remove_reconnect(struct vhost_user_socket *vsocket)
901 {
902         int found = false;
903         struct vhost_user_reconnect *reconn, *next;
904
905         pthread_mutex_lock(&reconn_list.mutex);
906
907         for (reconn = TAILQ_FIRST(&reconn_list.head);
908              reconn != NULL; reconn = next) {
909                 next = TAILQ_NEXT(reconn, next);
910
911                 if (reconn->vsocket == vsocket) {
912                         TAILQ_REMOVE(&reconn_list.head, reconn, next);
913                         close(reconn->fd);
914                         free(reconn);
915                         found = true;
916                         break;
917                 }
918         }
919         pthread_mutex_unlock(&reconn_list.mutex);
920         return found;
921 }
922
923 /**
924  * Unregister the specified vhost socket
925  */
926 int
927 rte_vhost_driver_unregister(const char *path)
928 {
929         int i;
930         int count;
931         struct vhost_user_connection *conn, *next;
932
933         pthread_mutex_lock(&vhost_user.mutex);
934
935         for (i = 0; i < vhost_user.vsocket_cnt; i++) {
936                 struct vhost_user_socket *vsocket = vhost_user.vsockets[i];
937
938                 if (!strcmp(vsocket->path, path)) {
939 again:
940                         pthread_mutex_lock(&vsocket->conn_mutex);
941                         for (conn = TAILQ_FIRST(&vsocket->conn_list);
942                              conn != NULL;
943                              conn = next) {
944                                 next = TAILQ_NEXT(conn, next);
945
946                                 /*
947                                  * If r/wcb is executing, release the
948                                  * conn_mutex lock, and try again since
949                                  * the r/wcb may use the conn_mutex lock.
950                                  */
951                                 if (fdset_try_del(&vhost_user.fdset,
952                                                   conn->connfd) == -1) {
953                                         pthread_mutex_unlock(
954                                                         &vsocket->conn_mutex);
955                                         goto again;
956                                 }
957
958                                 RTE_LOG(INFO, VHOST_CONFIG,
959                                         "free connfd = %d for device '%s'\n",
960                                         conn->connfd, path);
961                                 close(conn->connfd);
962                                 vhost_destroy_device(conn->vid);
963                                 TAILQ_REMOVE(&vsocket->conn_list, conn, next);
964                                 free(conn);
965                         }
966                         pthread_mutex_unlock(&vsocket->conn_mutex);
967
968                         if (vsocket->is_server) {
969                                 fdset_del(&vhost_user.fdset,
970                                                 vsocket->socket_fd);
971                                 close(vsocket->socket_fd);
972                                 unlink(path);
973                         } else if (vsocket->reconnect) {
974                                 vhost_user_remove_reconnect(vsocket);
975                         }
976
977                         pthread_mutex_destroy(&vsocket->conn_mutex);
978                         vhost_user_socket_mem_free(vsocket);
979
980                         count = --vhost_user.vsocket_cnt;
981                         vhost_user.vsockets[i] = vhost_user.vsockets[count];
982                         vhost_user.vsockets[count] = NULL;
983                         pthread_mutex_unlock(&vhost_user.mutex);
984
985                         return 0;
986                 }
987         }
988         pthread_mutex_unlock(&vhost_user.mutex);
989
990         return -1;
991 }
992
993 /*
994  * Register ops so that we can add/remove device to data core.
995  */
996 int
997 rte_vhost_driver_callback_register(const char *path,
998         struct vhost_device_ops const * const ops)
999 {
1000         struct vhost_user_socket *vsocket;
1001
1002         pthread_mutex_lock(&vhost_user.mutex);
1003         vsocket = find_vhost_user_socket(path);
1004         if (vsocket)
1005                 vsocket->notify_ops = ops;
1006         pthread_mutex_unlock(&vhost_user.mutex);
1007
1008         return vsocket ? 0 : -1;
1009 }
1010
1011 struct vhost_device_ops const *
1012 vhost_driver_callback_get(const char *path)
1013 {
1014         struct vhost_user_socket *vsocket;
1015
1016         pthread_mutex_lock(&vhost_user.mutex);
1017         vsocket = find_vhost_user_socket(path);
1018         pthread_mutex_unlock(&vhost_user.mutex);
1019
1020         return vsocket ? vsocket->notify_ops : NULL;
1021 }
1022
1023 int
1024 rte_vhost_driver_start(const char *path)
1025 {
1026         struct vhost_user_socket *vsocket;
1027         static pthread_t fdset_tid;
1028
1029         pthread_mutex_lock(&vhost_user.mutex);
1030         vsocket = find_vhost_user_socket(path);
1031         pthread_mutex_unlock(&vhost_user.mutex);
1032
1033         if (!vsocket)
1034                 return -1;
1035
1036         if (fdset_tid == 0) {
1037                 /**
1038                  * create a pipe which will be waited by poll and notified to
1039                  * rebuild the wait list of poll.
1040                  */
1041                 if (fdset_pipe_init(&vhost_user.fdset) < 0) {
1042                         RTE_LOG(ERR, VHOST_CONFIG,
1043                                 "failed to create pipe for vhost fdset\n");
1044                         return -1;
1045                 }
1046
1047                 int ret = rte_ctrl_thread_create(&fdset_tid,
1048                         "vhost-events", NULL, fdset_event_dispatch,
1049                         &vhost_user.fdset);
1050                 if (ret != 0) {
1051                         RTE_LOG(ERR, VHOST_CONFIG,
1052                                 "failed to create fdset handling thread");
1053
1054                         fdset_pipe_uninit(&vhost_user.fdset);
1055                         return -1;
1056                 }
1057         }
1058
1059         if (vsocket->is_server)
1060                 return vhost_user_start_server(vsocket);
1061         else
1062                 return vhost_user_start_client(vsocket);
1063 }