New upstream version 16.11.7
[deb_dpdk.git] / lib / librte_vhost / socket.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33
34 #include <stdint.h>
35 #include <stdio.h>
36 #include <stdbool.h>
37 #include <limits.h>
38 #include <stdlib.h>
39 #include <unistd.h>
40 #include <string.h>
41 #include <sys/types.h>
42 #include <sys/socket.h>
43 #include <sys/un.h>
44 #include <sys/queue.h>
45 #include <errno.h>
46 #include <fcntl.h>
47 #include <pthread.h>
48
49 #include <rte_log.h>
50
51 #include "fd_man.h"
52 #include "vhost.h"
53 #include "vhost_user.h"
54
55
56 TAILQ_HEAD(vhost_user_connection_list, vhost_user_connection);
57
58 /*
59  * Every time rte_vhost_driver_register() is invoked, an associated
60  * vhost_user_socket struct will be created.
61  */
62 struct vhost_user_socket {
63         struct vhost_user_connection_list conn_list;
64         pthread_mutex_t conn_mutex;
65         char *path;
66         int listenfd;
67         bool is_server;
68         bool reconnect;
69         bool dequeue_zero_copy;
70 };
71
72 struct vhost_user_connection {
73         struct vhost_user_socket *vsocket;
74         int connfd;
75         int vid;
76
77         TAILQ_ENTRY(vhost_user_connection) next;
78 };
79
80 #define MAX_VHOST_SOCKET 1024
81 struct vhost_user {
82         struct vhost_user_socket *vsockets[MAX_VHOST_SOCKET];
83         struct fdset fdset;
84         int vsocket_cnt;
85         pthread_mutex_t mutex;
86 };
87
88 #define MAX_VIRTIO_BACKLOG 128
89
90 static void vhost_user_server_new_connection(int fd, void *data, int *remove);
91 static void vhost_user_read_cb(int fd, void *dat, int *remove);
92 static int vhost_user_create_client(struct vhost_user_socket *vsocket);
93
94 static struct vhost_user vhost_user = {
95         .fdset = {
96                 .fd = { [0 ... MAX_FDS - 1] = {-1, NULL, NULL, NULL, 0} },
97                 .fd_mutex = PTHREAD_MUTEX_INITIALIZER,
98                 .num = 0
99         },
100         .vsocket_cnt = 0,
101         .mutex = PTHREAD_MUTEX_INITIALIZER,
102 };
103
104 /* return bytes# of read on success or negative val on failure. */
105 int
106 read_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num)
107 {
108         struct iovec iov;
109         struct msghdr msgh;
110         size_t fdsize = fd_num * sizeof(int);
111         char control[CMSG_SPACE(fdsize)];
112         struct cmsghdr *cmsg;
113         int ret;
114
115         memset(&msgh, 0, sizeof(msgh));
116         iov.iov_base = buf;
117         iov.iov_len  = buflen;
118
119         msgh.msg_iov = &iov;
120         msgh.msg_iovlen = 1;
121         msgh.msg_control = control;
122         msgh.msg_controllen = sizeof(control);
123
124         ret = recvmsg(sockfd, &msgh, 0);
125         if (ret <= 0) {
126                 RTE_LOG(ERR, VHOST_CONFIG, "recvmsg failed\n");
127                 return ret;
128         }
129
130         if (msgh.msg_flags & (MSG_TRUNC | MSG_CTRUNC)) {
131                 RTE_LOG(ERR, VHOST_CONFIG, "truncted msg\n");
132                 return -1;
133         }
134
135         for (cmsg = CMSG_FIRSTHDR(&msgh); cmsg != NULL;
136                 cmsg = CMSG_NXTHDR(&msgh, cmsg)) {
137                 if ((cmsg->cmsg_level == SOL_SOCKET) &&
138                         (cmsg->cmsg_type == SCM_RIGHTS)) {
139                         memcpy(fds, CMSG_DATA(cmsg), fdsize);
140                         break;
141                 }
142         }
143
144         return ret;
145 }
146
147 int
148 send_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num)
149 {
150
151         struct iovec iov;
152         struct msghdr msgh;
153         size_t fdsize = fd_num * sizeof(int);
154         char control[CMSG_SPACE(fdsize)];
155         struct cmsghdr *cmsg;
156         int ret;
157
158         memset(&msgh, 0, sizeof(msgh));
159         iov.iov_base = buf;
160         iov.iov_len = buflen;
161
162         msgh.msg_iov = &iov;
163         msgh.msg_iovlen = 1;
164
165         if (fds && fd_num > 0) {
166                 msgh.msg_control = control;
167                 msgh.msg_controllen = sizeof(control);
168                 cmsg = CMSG_FIRSTHDR(&msgh);
169                 if (cmsg == NULL) {
170                         RTE_LOG(ERR, VHOST_CONFIG, "cmsg == NULL\n");
171                         errno = EINVAL;
172                         return -1;
173                 }
174                 cmsg->cmsg_len = CMSG_LEN(fdsize);
175                 cmsg->cmsg_level = SOL_SOCKET;
176                 cmsg->cmsg_type = SCM_RIGHTS;
177                 memcpy(CMSG_DATA(cmsg), fds, fdsize);
178         } else {
179                 msgh.msg_control = NULL;
180                 msgh.msg_controllen = 0;
181         }
182
183         do {
184                 ret = sendmsg(sockfd, &msgh, 0);
185         } while (ret < 0 && errno == EINTR);
186
187         if (ret < 0) {
188                 RTE_LOG(ERR, VHOST_CONFIG,  "sendmsg error\n");
189                 return ret;
190         }
191
192         return ret;
193 }
194
195 static void
196 vhost_user_add_connection(int fd, struct vhost_user_socket *vsocket)
197 {
198         int vid;
199         size_t size;
200         struct vhost_user_connection *conn;
201         int ret;
202
203         conn = malloc(sizeof(*conn));
204         if (conn == NULL) {
205                 close(fd);
206                 return;
207         }
208
209         vid = vhost_new_device();
210         if (vid == -1) {
211                 close(fd);
212                 free(conn);
213                 return;
214         }
215
216         size = strnlen(vsocket->path, PATH_MAX);
217         vhost_set_ifname(vid, vsocket->path, size);
218
219         if (vsocket->dequeue_zero_copy)
220                 vhost_enable_dequeue_zero_copy(vid);
221
222         RTE_LOG(INFO, VHOST_CONFIG, "new device, handle is %d\n", vid);
223
224         conn->connfd = fd;
225         conn->vsocket = vsocket;
226         conn->vid = vid;
227         ret = fdset_add(&vhost_user.fdset, fd, vhost_user_read_cb,
228                         NULL, conn);
229         if (ret < 0) {
230                 conn->connfd = -1;
231                 free(conn);
232                 close(fd);
233                 RTE_LOG(ERR, VHOST_CONFIG,
234                         "failed to add fd %d into vhost server fdset\n",
235                         fd);
236                 return;
237         }
238
239         pthread_mutex_lock(&vsocket->conn_mutex);
240         TAILQ_INSERT_TAIL(&vsocket->conn_list, conn, next);
241         pthread_mutex_unlock(&vsocket->conn_mutex);
242 }
243
244 /* call back when there is new vhost-user connection from client  */
245 static void
246 vhost_user_server_new_connection(int fd, void *dat, int *remove __rte_unused)
247 {
248         struct vhost_user_socket *vsocket = dat;
249
250         fd = accept(fd, NULL, NULL);
251         if (fd < 0)
252                 return;
253
254         RTE_LOG(INFO, VHOST_CONFIG, "new vhost user connection is %d\n", fd);
255         vhost_user_add_connection(fd, vsocket);
256 }
257
258 static void
259 vhost_user_read_cb(int connfd, void *dat, int *remove)
260 {
261         struct vhost_user_connection *conn = dat;
262         struct vhost_user_socket *vsocket = conn->vsocket;
263         int ret;
264
265         ret = vhost_user_msg_handler(conn->vid, connfd);
266         if (ret < 0) {
267                 close(connfd);
268                 *remove = 1;
269                 vhost_destroy_device(conn->vid);
270
271                 pthread_mutex_lock(&vsocket->conn_mutex);
272                 TAILQ_REMOVE(&vsocket->conn_list, conn, next);
273                 pthread_mutex_unlock(&vsocket->conn_mutex);
274
275                 free(conn);
276
277                 if (vsocket->reconnect)
278                         vhost_user_create_client(vsocket);
279         }
280 }
281
282 static int
283 create_unix_socket(const char *path, struct sockaddr_un *un, bool is_server)
284 {
285         int fd;
286
287         fd = socket(AF_UNIX, SOCK_STREAM, 0);
288         if (fd < 0)
289                 return -1;
290         RTE_LOG(INFO, VHOST_CONFIG, "vhost-user %s: socket created, fd: %d\n",
291                 is_server ? "server" : "client", fd);
292
293         if (!is_server && fcntl(fd, F_SETFL, O_NONBLOCK)) {
294                 RTE_LOG(ERR, VHOST_CONFIG,
295                         "vhost-user: can't set nonblocking mode for socket, fd: "
296                         "%d (%s)\n", fd, strerror(errno));
297                 close(fd);
298                 return -1;
299         }
300
301         memset(un, 0, sizeof(*un));
302         un->sun_family = AF_UNIX;
303         strncpy(un->sun_path, path, sizeof(un->sun_path));
304         un->sun_path[sizeof(un->sun_path) - 1] = '\0';
305
306         return fd;
307 }
308
309 static int
310 vhost_user_create_server(struct vhost_user_socket *vsocket)
311 {
312         int fd;
313         int ret;
314         struct sockaddr_un un;
315         const char *path = vsocket->path;
316
317         fd = create_unix_socket(path, &un, vsocket->is_server);
318         if (fd < 0)
319                 return -1;
320
321         ret = bind(fd, (struct sockaddr *)&un, sizeof(un));
322         if (ret < 0) {
323                 RTE_LOG(ERR, VHOST_CONFIG,
324                         "failed to bind to %s: %s; remove it and try again\n",
325                         path, strerror(errno));
326                 goto err;
327         }
328         RTE_LOG(INFO, VHOST_CONFIG, "bind to %s\n", path);
329
330         ret = listen(fd, MAX_VIRTIO_BACKLOG);
331         if (ret < 0)
332                 goto err;
333
334         vsocket->listenfd = fd;
335         ret = fdset_add(&vhost_user.fdset, fd, vhost_user_server_new_connection,
336                   NULL, vsocket);
337         if (ret < 0) {
338                 RTE_LOG(ERR, VHOST_CONFIG,
339                         "failed to add listen fd %d to vhost server fdset\n",
340                         fd);
341                 goto err;
342         }
343
344         return 0;
345
346 err:
347         close(fd);
348         return -1;
349 }
350
351 struct vhost_user_reconnect {
352         struct sockaddr_un un;
353         int fd;
354         struct vhost_user_socket *vsocket;
355
356         TAILQ_ENTRY(vhost_user_reconnect) next;
357 };
358
359 TAILQ_HEAD(vhost_user_reconnect_tailq_list, vhost_user_reconnect);
360 struct vhost_user_reconnect_list {
361         struct vhost_user_reconnect_tailq_list head;
362         pthread_mutex_t mutex;
363 };
364
365 static struct vhost_user_reconnect_list reconn_list;
366 static pthread_t reconn_tid;
367
368 static int
369 vhost_user_connect_nonblock(int fd, struct sockaddr *un, size_t sz)
370 {
371         int ret, flags;
372
373         ret = connect(fd, un, sz);
374         if (ret < 0 && errno != EISCONN)
375                 return -1;
376
377         flags = fcntl(fd, F_GETFL, 0);
378         if (flags < 0) {
379                 RTE_LOG(ERR, VHOST_CONFIG,
380                         "can't get flags for connfd %d\n", fd);
381                 return -2;
382         }
383         if ((flags & O_NONBLOCK) && fcntl(fd, F_SETFL, flags & ~O_NONBLOCK)) {
384                 RTE_LOG(ERR, VHOST_CONFIG,
385                                 "can't disable nonblocking on fd %d\n", fd);
386                 return -2;
387         }
388         return 0;
389 }
390
391 static void *
392 vhost_user_client_reconnect(void *arg __rte_unused)
393 {
394         int ret;
395         struct vhost_user_reconnect *reconn, *next;
396
397         while (1) {
398                 pthread_mutex_lock(&reconn_list.mutex);
399
400                 /*
401                  * An equal implementation of TAILQ_FOREACH_SAFE,
402                  * which does not exist on all platforms.
403                  */
404                 for (reconn = TAILQ_FIRST(&reconn_list.head);
405                      reconn != NULL; reconn = next) {
406                         next = TAILQ_NEXT(reconn, next);
407
408                         ret = vhost_user_connect_nonblock(reconn->fd,
409                                                 (struct sockaddr *)&reconn->un,
410                                                 sizeof(reconn->un));
411                         if (ret == -2) {
412                                 close(reconn->fd);
413                                 RTE_LOG(ERR, VHOST_CONFIG,
414                                         "reconnection for fd %d failed\n",
415                                         reconn->fd);
416                                 goto remove_fd;
417                         }
418                         if (ret == -1)
419                                 continue;
420
421                         RTE_LOG(INFO, VHOST_CONFIG,
422                                 "%s: connected\n", reconn->vsocket->path);
423                         vhost_user_add_connection(reconn->fd, reconn->vsocket);
424 remove_fd:
425                         TAILQ_REMOVE(&reconn_list.head, reconn, next);
426                         free(reconn);
427                 }
428
429                 pthread_mutex_unlock(&reconn_list.mutex);
430                 sleep(1);
431         }
432
433         return NULL;
434 }
435
436 static int
437 vhost_user_reconnect_init(void)
438 {
439         int ret;
440
441         pthread_mutex_init(&reconn_list.mutex, NULL);
442         TAILQ_INIT(&reconn_list.head);
443
444         ret = pthread_create(&reconn_tid, NULL,
445                              vhost_user_client_reconnect, NULL);
446         if (ret != 0)
447                 RTE_LOG(ERR, VHOST_CONFIG, "failed to create reconnect thread");
448
449         return ret;
450 }
451
452 static int
453 vhost_user_create_client(struct vhost_user_socket *vsocket)
454 {
455         int fd;
456         int ret;
457         struct sockaddr_un un;
458         const char *path = vsocket->path;
459         struct vhost_user_reconnect *reconn;
460
461         fd = create_unix_socket(path, &un, vsocket->is_server);
462         if (fd < 0)
463                 return -1;
464
465         ret = vhost_user_connect_nonblock(fd, (struct sockaddr *)&un,
466                                           sizeof(un));
467         if (ret == 0) {
468                 vhost_user_add_connection(fd, vsocket);
469                 return 0;
470         }
471
472         RTE_LOG(WARNING, VHOST_CONFIG,
473                 "failed to connect to %s: %s\n",
474                 path, strerror(errno));
475
476         if (ret == -2 || !vsocket->reconnect) {
477                 close(fd);
478                 return -1;
479         }
480
481         RTE_LOG(INFO, VHOST_CONFIG, "%s: reconnecting...\n", path);
482         reconn = malloc(sizeof(*reconn));
483         if (reconn == NULL) {
484                 RTE_LOG(ERR, VHOST_CONFIG,
485                         "failed to allocate memory for reconnect\n");
486                 close(fd);
487                 return -1;
488         }
489         reconn->un = un;
490         reconn->fd = fd;
491         reconn->vsocket = vsocket;
492         pthread_mutex_lock(&reconn_list.mutex);
493         TAILQ_INSERT_TAIL(&reconn_list.head, reconn, next);
494         pthread_mutex_unlock(&reconn_list.mutex);
495
496         return 0;
497 }
498
499 /*
500  * Register a new vhost-user socket; here we could act as server
501  * (the default case), or client (when RTE_VHOST_USER_CLIENT) flag
502  * is set.
503  */
504 int
505 rte_vhost_driver_register(const char *path, uint64_t flags)
506 {
507         int ret = -1;
508         struct vhost_user_socket *vsocket;
509
510         if (!path)
511                 return -1;
512
513         pthread_mutex_lock(&vhost_user.mutex);
514
515         if (vhost_user.vsocket_cnt == MAX_VHOST_SOCKET) {
516                 RTE_LOG(ERR, VHOST_CONFIG,
517                         "error: the number of vhost sockets reaches maximum\n");
518                 goto out;
519         }
520
521         vsocket = malloc(sizeof(struct vhost_user_socket));
522         if (!vsocket)
523                 goto out;
524         memset(vsocket, 0, sizeof(struct vhost_user_socket));
525         vsocket->path = strdup(path);
526         TAILQ_INIT(&vsocket->conn_list);
527         pthread_mutex_init(&vsocket->conn_mutex, NULL);
528         vsocket->dequeue_zero_copy = flags & RTE_VHOST_USER_DEQUEUE_ZERO_COPY;
529
530         if ((flags & RTE_VHOST_USER_CLIENT) != 0) {
531                 vsocket->reconnect = !(flags & RTE_VHOST_USER_NO_RECONNECT);
532                 if (vsocket->reconnect && reconn_tid == 0) {
533                         if (vhost_user_reconnect_init() != 0) {
534                                 free(vsocket->path);
535                                 free(vsocket);
536                                 goto out;
537                         }
538                 }
539                 ret = vhost_user_create_client(vsocket);
540         } else {
541                 vsocket->is_server = true;
542                 ret = vhost_user_create_server(vsocket);
543         }
544         if (ret < 0) {
545                 free(vsocket->path);
546                 free(vsocket);
547                 goto out;
548         }
549
550         vhost_user.vsockets[vhost_user.vsocket_cnt++] = vsocket;
551
552 out:
553         pthread_mutex_unlock(&vhost_user.mutex);
554
555         return ret;
556 }
557
558 static bool
559 vhost_user_remove_reconnect(struct vhost_user_socket *vsocket)
560 {
561         int found = false;
562         struct vhost_user_reconnect *reconn, *next;
563
564         pthread_mutex_lock(&reconn_list.mutex);
565
566         for (reconn = TAILQ_FIRST(&reconn_list.head);
567              reconn != NULL; reconn = next) {
568                 next = TAILQ_NEXT(reconn, next);
569
570                 if (reconn->vsocket == vsocket) {
571                         TAILQ_REMOVE(&reconn_list.head, reconn, next);
572                         close(reconn->fd);
573                         free(reconn);
574                         found = true;
575                         break;
576                 }
577         }
578         pthread_mutex_unlock(&reconn_list.mutex);
579         return found;
580 }
581
582 /**
583  * Unregister the specified vhost socket
584  */
585 int
586 rte_vhost_driver_unregister(const char *path)
587 {
588         int i;
589         int count;
590         struct vhost_user_connection *conn, *next;
591
592         pthread_mutex_lock(&vhost_user.mutex);
593
594         for (i = 0; i < vhost_user.vsocket_cnt; i++) {
595                 struct vhost_user_socket *vsocket = vhost_user.vsockets[i];
596
597                 if (!strcmp(vsocket->path, path)) {
598                         if (vsocket->is_server) {
599                                 fdset_del(&vhost_user.fdset, vsocket->listenfd);
600                                 close(vsocket->listenfd);
601                                 unlink(path);
602                         } else if (vsocket->reconnect) {
603                                 vhost_user_remove_reconnect(vsocket);
604                         }
605
606 again:
607                         pthread_mutex_lock(&vsocket->conn_mutex);
608                         for (conn = TAILQ_FIRST(&vsocket->conn_list);
609                              conn != NULL;
610                              conn = next) {
611                                 next = TAILQ_NEXT(conn, next);
612
613                                 /*
614                                  * If r/wcb is executing, release the
615                                  * conn_mutex lock, and try again since
616                                  * the r/wcb may use the conn_mutex lock.
617                                  */
618                                 if (fdset_try_del(&vhost_user.fdset,
619                                                   conn->connfd) == -1) {
620                                         pthread_mutex_unlock(
621                                                         &vsocket->conn_mutex);
622                                         goto again;
623                                 }
624
625                                 RTE_LOG(INFO, VHOST_CONFIG,
626                                         "free connfd = %d for device '%s'\n",
627                                         conn->connfd, path);
628                                 close(conn->connfd);
629                                 vhost_destroy_device(conn->vid);
630                                 TAILQ_REMOVE(&vsocket->conn_list, conn, next);
631                                 free(conn);
632                         }
633                         pthread_mutex_unlock(&vsocket->conn_mutex);
634
635                         free(vsocket->path);
636                         free(vsocket);
637
638                         count = --vhost_user.vsocket_cnt;
639                         vhost_user.vsockets[i] = vhost_user.vsockets[count];
640                         vhost_user.vsockets[count] = NULL;
641                         pthread_mutex_unlock(&vhost_user.mutex);
642
643                         return 0;
644                 }
645         }
646         pthread_mutex_unlock(&vhost_user.mutex);
647
648         return -1;
649 }
650
651 int
652 rte_vhost_driver_session_start(void)
653 {
654         fdset_event_dispatch(&vhost_user.fdset);
655         return 0;
656 }