Imported Upstream version 16.07-rc1
[deb_dpdk.git] / lib / librte_vhost / vhost_user / vhost-net-user.c
index df2bd64..94f1b92 100644 (file)
@@ -33,6 +33,7 @@
 
 #include <stdint.h>
 #include <stdio.h>
+#include <stdbool.h>
 #include <limits.h>
 #include <stdlib.h>
 #include <unistd.h>
@@ -40,6 +41,7 @@
 #include <sys/types.h>
 #include <sys/socket.h>
 #include <sys/un.h>
+#include <sys/queue.h>
 #include <errno.h>
 #include <pthread.h>
 
 #include "vhost-net.h"
 #include "virtio-net-user.h"
 
-#define MAX_VIRTIO_BACKLOG 128
-
-static void vserver_new_vq_conn(int fd, void *data, int *remove);
-static void vserver_message_handler(int fd, void *dat, int *remove);
+/*
+ * Every time rte_vhost_driver_register() is invoked, an associated
+ * vhost_user_socket struct will be created.
+ */
+struct vhost_user_socket {
+       char *path;
+       int listenfd;
+       bool is_server;
+       bool reconnect;
+};
 
-struct connfd_ctx {
-       struct vhost_server *vserver;
-       uint32_t fh;
+struct vhost_user_connection {
+       struct vhost_user_socket *vsocket;
+       int vid;
 };
 
-#define MAX_VHOST_SERVER 1024
-struct _vhost_server {
-       struct vhost_server *server[MAX_VHOST_SERVER];
+#define MAX_VHOST_SOCKET 1024
+struct vhost_user {
+       struct vhost_user_socket *vsockets[MAX_VHOST_SOCKET];
        struct fdset fdset;
-       int vserver_cnt;
-       pthread_mutex_t server_mutex;
+       int vsocket_cnt;
+       pthread_mutex_t mutex;
 };
 
-static struct _vhost_server g_vhost_server = {
+#define MAX_VIRTIO_BACKLOG 128
+
+static void vhost_user_server_new_connection(int fd, void *data, int *remove);
+static void vhost_user_msg_handler(int fd, void *dat, int *remove);
+static int vhost_user_create_client(struct vhost_user_socket *vsocket);
+
+static struct vhost_user vhost_user = {
        .fdset = {
                .fd = { [0 ... MAX_FDS - 1] = {-1, NULL, NULL, NULL, 0} },
                .fd_mutex = PTHREAD_MUTEX_INITIALIZER,
                .num = 0
        },
-       .vserver_cnt = 0,
-       .server_mutex = PTHREAD_MUTEX_INITIALIZER,
+       .vsocket_cnt = 0,
+       .mutex = PTHREAD_MUTEX_INITIALIZER,
 };
 
 static const char *vhost_message_str[VHOST_USER_MAX] = {
@@ -102,48 +116,6 @@ static const char *vhost_message_str[VHOST_USER_MAX] = {
        [VHOST_USER_SEND_RARP]  = "VHOST_USER_SEND_RARP",
 };
 
-/**
- * Create a unix domain socket, bind to path and listen for connection.
- * @return
- *  socket fd or -1 on failure
- */
-static int
-uds_socket(const char *path)
-{
-       struct sockaddr_un un;
-       int sockfd;
-       int ret;
-
-       if (path == NULL)
-               return -1;
-
-       sockfd = socket(AF_UNIX, SOCK_STREAM, 0);
-       if (sockfd < 0)
-               return -1;
-       RTE_LOG(INFO, VHOST_CONFIG, "socket created, fd:%d\n", sockfd);
-
-       memset(&un, 0, sizeof(un));
-       un.sun_family = AF_UNIX;
-       snprintf(un.sun_path, sizeof(un.sun_path), "%s", path);
-       ret = bind(sockfd, (struct sockaddr *)&un, sizeof(un));
-       if (ret == -1) {
-               RTE_LOG(ERR, VHOST_CONFIG, "fail to bind fd:%d, remove file:%s and try again.\n",
-                       sockfd, path);
-               goto err;
-       }
-       RTE_LOG(INFO, VHOST_CONFIG, "bind to %s\n", path);
-
-       ret = listen(sockfd, MAX_VIRTIO_BACKLOG);
-       if (ret == -1)
-               goto err;
-
-       return sockfd;
-
-err:
-       close(sockfd);
-       return -1;
-}
-
 /* return bytes# of read on success or negative val on failure. */
 static int
 read_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num)
@@ -278,62 +250,66 @@ send_vhost_message(int sockfd, struct VhostUserMsg *msg)
        return ret;
 }
 
-/* call back when there is new virtio connection.  */
+
 static void
-vserver_new_vq_conn(int fd, void *dat, __rte_unused int *remove)
+vhost_user_add_connection(int fd, struct vhost_user_socket *vsocket)
 {
-       struct vhost_server *vserver = (struct vhost_server *)dat;
-       int conn_fd;
-       struct connfd_ctx *ctx;
-       int fh;
-       struct vhost_device_ctx vdev_ctx = { (pid_t)0, 0 };
-       unsigned int size;
-
-       conn_fd = accept(fd, NULL, NULL);
-       RTE_LOG(INFO, VHOST_CONFIG,
-               "new virtio connection is %d\n", conn_fd);
-       if (conn_fd < 0)
-               return;
+       int vid;
+       size_t size;
+       struct vhost_user_connection *conn;
 
-       ctx = calloc(1, sizeof(*ctx));
-       if (ctx == NULL) {
-               close(conn_fd);
+       conn = malloc(sizeof(*conn));
+       if (conn == NULL) {
+               close(fd);
                return;
        }
 
-       fh = vhost_new_device(vdev_ctx);
-       if (fh == -1) {
-               free(ctx);
-               close(conn_fd);
+       vid = vhost_new_device();
+       if (vid == -1) {
+               close(fd);
+               free(conn);
                return;
        }
 
-       vdev_ctx.fh = fh;
-       size = strnlen(vserver->path, PATH_MAX);
-       vhost_set_ifname(vdev_ctx, vserver->path,
-               size);
+       size = strnlen(vsocket->path, PATH_MAX);
+       vhost_set_ifname(vid, vsocket->path, size);
+
+       RTE_LOG(INFO, VHOST_CONFIG, "new device, handle is %d\n", vid);
 
-       RTE_LOG(INFO, VHOST_CONFIG, "new device, handle is %d\n", fh);
+       conn->vsocket = vsocket;
+       conn->vid = vid;
+       fdset_add(&vhost_user.fdset, fd, vhost_user_msg_handler, NULL, conn);
+}
+
+/* call back when there is new vhost-user connection from client  */
+static void
+vhost_user_server_new_connection(int fd, void *dat, int *remove __rte_unused)
+{
+       struct vhost_user_socket *vsocket = dat;
 
-       ctx->vserver = vserver;
-       ctx->fh = fh;
-       fdset_add(&g_vhost_server.fdset,
-               conn_fd, vserver_message_handler, NULL, ctx);
+       fd = accept(fd, NULL, NULL);
+       if (fd < 0)
+               return;
+
+       RTE_LOG(INFO, VHOST_CONFIG, "new vhost user connection is %d\n", fd);
+       vhost_user_add_connection(fd, vsocket);
 }
 
 /* callback when there is message on the connfd */
 static void
-vserver_message_handler(int connfd, void *dat, int *remove)
+vhost_user_msg_handler(int connfd, void *dat, int *remove)
 {
-       struct vhost_device_ctx ctx;
-       struct connfd_ctx *cfd_ctx = (struct connfd_ctx *)dat;
+       int vid;
+       struct vhost_user_connection *conn = dat;
        struct VhostUserMsg msg;
        uint64_t features;
        int ret;
 
-       ctx.fh = cfd_ctx->fh;
+       vid = conn->vid;
        ret = read_vhost_message(connfd, &msg);
        if (ret <= 0 || msg.request >= VHOST_USER_MAX) {
+               struct vhost_user_socket *vsocket = conn->vsocket;
+
                if (ret < 0)
                        RTE_LOG(ERR, VHOST_CONFIG,
                                "vhost read message failed\n");
@@ -346,8 +322,11 @@ vserver_message_handler(int connfd, void *dat, int *remove)
 
                close(connfd);
                *remove = 1;
-               free(cfd_ctx);
-               vhost_destroy_device(ctx);
+               free(conn);
+               vhost_destroy_device(vid);
+
+               if (vsocket->reconnect)
+                       vhost_user_create_client(vsocket);
 
                return;
        }
@@ -356,14 +335,14 @@ vserver_message_handler(int connfd, void *dat, int *remove)
                vhost_message_str[msg.request]);
        switch (msg.request) {
        case VHOST_USER_GET_FEATURES:
-               ret = vhost_get_features(ctx, &features);
+               ret = vhost_get_features(vid, &features);
                msg.payload.u64 = features;
                msg.size = sizeof(msg.payload.u64);
                send_vhost_message(connfd, &msg);
                break;
        case VHOST_USER_SET_FEATURES:
                features = msg.payload.u64;
-               vhost_set_features(ctx, &features);
+               vhost_set_features(vid, &features);
                break;
 
        case VHOST_USER_GET_PROTOCOL_FEATURES:
@@ -372,22 +351,22 @@ vserver_message_handler(int connfd, void *dat, int *remove)
                send_vhost_message(connfd, &msg);
                break;
        case VHOST_USER_SET_PROTOCOL_FEATURES:
-               user_set_protocol_features(ctx, msg.payload.u64);
+               user_set_protocol_features(vid, msg.payload.u64);
                break;
 
        case VHOST_USER_SET_OWNER:
-               vhost_set_owner(ctx);
+               vhost_set_owner(vid);
                break;
        case VHOST_USER_RESET_OWNER:
-               vhost_reset_owner(ctx);
+               vhost_reset_owner(vid);
                break;
 
        case VHOST_USER_SET_MEM_TABLE:
-               user_set_mem_table(ctx, &msg);
+               user_set_mem_table(vid, &msg);
                break;
 
        case VHOST_USER_SET_LOG_BASE:
-               user_set_log_base(ctx, &msg);
+               user_set_log_base(vid, &msg);
 
                /* it needs a reply */
                msg.size = sizeof(msg.payload.u64);
@@ -399,26 +378,26 @@ vserver_message_handler(int connfd, void *dat, int *remove)
                break;
 
        case VHOST_USER_SET_VRING_NUM:
-               vhost_set_vring_num(ctx, &msg.payload.state);
+               vhost_set_vring_num(vid, &msg.payload.state);
                break;
        case VHOST_USER_SET_VRING_ADDR:
-               vhost_set_vring_addr(ctx, &msg.payload.addr);
+               vhost_set_vring_addr(vid, &msg.payload.addr);
                break;
        case VHOST_USER_SET_VRING_BASE:
-               vhost_set_vring_base(ctx, &msg.payload.state);
+               vhost_set_vring_base(vid, &msg.payload.state);
                break;
 
        case VHOST_USER_GET_VRING_BASE:
-               ret = user_get_vring_base(ctx, &msg.payload.state);
+               ret = user_get_vring_base(vid, &msg.payload.state);
                msg.size = sizeof(msg.payload.state);
                send_vhost_message(connfd, &msg);
                break;
 
        case VHOST_USER_SET_VRING_KICK:
-               user_set_vring_kick(ctx, &msg);
+               user_set_vring_kick(vid, &msg);
                break;
        case VHOST_USER_SET_VRING_CALL:
-               user_set_vring_call(ctx, &msg);
+               user_set_vring_call(vid, &msg);
                break;
 
        case VHOST_USER_SET_VRING_ERR:
@@ -434,10 +413,10 @@ vserver_message_handler(int connfd, void *dat, int *remove)
                break;
 
        case VHOST_USER_SET_VRING_ENABLE:
-               user_set_vring_enable(ctx, &msg.payload.state);
+               user_set_vring_enable(vid, &msg.payload.state);
                break;
        case VHOST_USER_SEND_RARP:
-               user_send_rarp(ctx, &msg);
+               user_send_rarp(vid, &msg);
                break;
 
        default:
@@ -446,50 +425,222 @@ vserver_message_handler(int connfd, void *dat, int *remove)
        }
 }
 
-/**
- * Creates and initialise the vhost server.
- */
-int
-rte_vhost_driver_register(const char *path)
+static int
+create_unix_socket(const char *path, struct sockaddr_un *un, bool is_server)
 {
-       struct vhost_server *vserver;
+       int fd;
 
-       pthread_mutex_lock(&g_vhost_server.server_mutex);
+       fd = socket(AF_UNIX, SOCK_STREAM, 0);
+       if (fd < 0)
+               return -1;
+       RTE_LOG(INFO, VHOST_CONFIG, "vhost-user %s: socket created, fd: %d\n",
+               is_server ? "server" : "client", fd);
 
-       if (g_vhost_server.vserver_cnt == MAX_VHOST_SERVER) {
-               RTE_LOG(ERR, VHOST_CONFIG,
-                       "error: the number of servers reaches maximum\n");
-               pthread_mutex_unlock(&g_vhost_server.server_mutex);
+       memset(un, 0, sizeof(*un));
+       un->sun_family = AF_UNIX;
+       strncpy(un->sun_path, path, sizeof(un->sun_path));
+
+       return fd;
+}
+
+static int
+vhost_user_create_server(struct vhost_user_socket *vsocket)
+{
+       int fd;
+       int ret;
+       struct sockaddr_un un;
+       const char *path = vsocket->path;
+
+       fd = create_unix_socket(path, &un, vsocket->is_server);
+       if (fd < 0)
                return -1;
+
+       ret = bind(fd, (struct sockaddr *)&un, sizeof(un));
+       if (ret < 0) {
+               RTE_LOG(ERR, VHOST_CONFIG,
+                       "failed to bind to %s: %s; remove it and try again\n",
+                       path, strerror(errno));
+               goto err;
        }
+       RTE_LOG(INFO, VHOST_CONFIG, "bind to %s\n", path);
 
-       vserver = calloc(sizeof(struct vhost_server), 1);
-       if (vserver == NULL) {
-               pthread_mutex_unlock(&g_vhost_server.server_mutex);
-               return -1;
+       ret = listen(fd, MAX_VIRTIO_BACKLOG);
+       if (ret < 0)
+               goto err;
+
+       vsocket->listenfd = fd;
+       fdset_add(&vhost_user.fdset, fd, vhost_user_server_new_connection,
+                 NULL, vsocket);
+
+       return 0;
+
+err:
+       close(fd);
+       return -1;
+}
+
+struct vhost_user_reconnect {
+       struct sockaddr_un un;
+       int fd;
+       struct vhost_user_socket *vsocket;
+
+       TAILQ_ENTRY(vhost_user_reconnect) next;
+};
+
+TAILQ_HEAD(vhost_user_reconnect_tailq_list, vhost_user_reconnect);
+struct vhost_user_reconnect_list {
+       struct vhost_user_reconnect_tailq_list head;
+       pthread_mutex_t mutex;
+};
+
+static struct vhost_user_reconnect_list reconn_list;
+static pthread_t reconn_tid;
+
+static void *
+vhost_user_client_reconnect(void *arg __rte_unused)
+{
+       struct vhost_user_reconnect *reconn, *next;
+
+       while (1) {
+               pthread_mutex_lock(&reconn_list.mutex);
+
+               /*
+                * An equal implementation of TAILQ_FOREACH_SAFE,
+                * which does not exist on all platforms.
+                */
+               for (reconn = TAILQ_FIRST(&reconn_list.head);
+                    reconn != NULL; reconn = next) {
+                       next = TAILQ_NEXT(reconn, next);
+
+                       if (connect(reconn->fd, (struct sockaddr *)&reconn->un,
+                                   sizeof(reconn->un)) < 0)
+                               continue;
+
+                       RTE_LOG(INFO, VHOST_CONFIG,
+                               "%s: connected\n", reconn->vsocket->path);
+                       vhost_user_add_connection(reconn->fd, reconn->vsocket);
+                       TAILQ_REMOVE(&reconn_list.head, reconn, next);
+                       free(reconn);
+               }
+
+               pthread_mutex_unlock(&reconn_list.mutex);
+               sleep(1);
        }
 
-       vserver->listenfd = uds_socket(path);
-       if (vserver->listenfd < 0) {
-               free(vserver);
-               pthread_mutex_unlock(&g_vhost_server.server_mutex);
+       return NULL;
+}
+
+static int
+vhost_user_reconnect_init(void)
+{
+       int ret;
+
+       pthread_mutex_init(&reconn_list.mutex, NULL);
+       TAILQ_INIT(&reconn_list.head);
+
+       ret = pthread_create(&reconn_tid, NULL,
+                            vhost_user_client_reconnect, NULL);
+       if (ret < 0)
+               RTE_LOG(ERR, VHOST_CONFIG, "failed to create reconnect thread");
+
+       return ret;
+}
+
+static int
+vhost_user_create_client(struct vhost_user_socket *vsocket)
+{
+       int fd;
+       int ret;
+       struct sockaddr_un un;
+       const char *path = vsocket->path;
+       struct vhost_user_reconnect *reconn;
+
+       fd = create_unix_socket(path, &un, vsocket->is_server);
+       if (fd < 0)
                return -1;
+
+       ret = connect(fd, (struct sockaddr *)&un, sizeof(un));
+       if (ret == 0) {
+               vhost_user_add_connection(fd, vsocket);
+               return 0;
        }
 
-       vserver->path = strdup(path);
+       RTE_LOG(ERR, VHOST_CONFIG,
+               "failed to connect to %s: %s\n",
+               path, strerror(errno));
 
-       fdset_add(&g_vhost_server.fdset, vserver->listenfd,
-               vserver_new_vq_conn, NULL, vserver);
+       if (!vsocket->reconnect) {
+               close(fd);
+               return -1;
+       }
 
-       g_vhost_server.server[g_vhost_server.vserver_cnt++] = vserver;
-       pthread_mutex_unlock(&g_vhost_server.server_mutex);
+       RTE_LOG(ERR, VHOST_CONFIG, "%s: reconnecting...\n", path);
+       reconn = malloc(sizeof(*reconn));
+       reconn->un = un;
+       reconn->fd = fd;
+       reconn->vsocket = vsocket;
+       pthread_mutex_lock(&reconn_list.mutex);
+       TAILQ_INSERT_TAIL(&reconn_list.head, reconn, next);
+       pthread_mutex_unlock(&reconn_list.mutex);
 
        return 0;
 }
 
+/*
+ * Register a new vhost-user socket; here we could act as server
+ * (the default case), or client (when RTE_VHOST_USER_CLIENT) flag
+ * is set.
+ */
+int
+rte_vhost_driver_register(const char *path, uint64_t flags)
+{
+       int ret = -1;
+       struct vhost_user_socket *vsocket;
+
+       if (!path)
+               return -1;
+
+       pthread_mutex_lock(&vhost_user.mutex);
+
+       if (vhost_user.vsocket_cnt == MAX_VHOST_SOCKET) {
+               RTE_LOG(ERR, VHOST_CONFIG,
+                       "error: the number of vhost sockets reaches maximum\n");
+               goto out;
+       }
+
+       vsocket = malloc(sizeof(struct vhost_user_socket));
+       if (!vsocket)
+               goto out;
+       memset(vsocket, 0, sizeof(struct vhost_user_socket));
+       vsocket->path = strdup(path);
+
+       if ((flags & RTE_VHOST_USER_CLIENT) != 0) {
+               vsocket->reconnect = !(flags & RTE_VHOST_USER_NO_RECONNECT);
+               if (vsocket->reconnect && reconn_tid == 0) {
+                       if (vhost_user_reconnect_init() < 0)
+                               goto out;
+               }
+               ret = vhost_user_create_client(vsocket);
+       } else {
+               vsocket->is_server = true;
+               ret = vhost_user_create_server(vsocket);
+       }
+       if (ret < 0) {
+               free(vsocket->path);
+               free(vsocket);
+               goto out;
+       }
+
+       vhost_user.vsockets[vhost_user.vsocket_cnt++] = vsocket;
+
+out:
+       pthread_mutex_unlock(&vhost_user.mutex);
+
+       return ret;
+}
 
 /**
- * Unregister the specified vhost server
+ * Unregister the specified vhost socket
  */
 int
 rte_vhost_driver_unregister(const char *path)
@@ -497,28 +648,29 @@ rte_vhost_driver_unregister(const char *path)
        int i;
        int count;
 
-       pthread_mutex_lock(&g_vhost_server.server_mutex);
-
-       for (i = 0; i < g_vhost_server.vserver_cnt; i++) {
-               if (!strcmp(g_vhost_server.server[i]->path, path)) {
-                       fdset_del(&g_vhost_server.fdset,
-                               g_vhost_server.server[i]->listenfd);
+       pthread_mutex_lock(&vhost_user.mutex);
 
-                       close(g_vhost_server.server[i]->listenfd);
-                       free(g_vhost_server.server[i]->path);
-                       free(g_vhost_server.server[i]);
+       for (i = 0; i < vhost_user.vsocket_cnt; i++) {
+               if (!strcmp(vhost_user.vsockets[i]->path, path)) {
+                       if (vhost_user.vsockets[i]->is_server) {
+                               fdset_del(&vhost_user.fdset,
+                                       vhost_user.vsockets[i]->listenfd);
+                               close(vhost_user.vsockets[i]->listenfd);
+                               unlink(path);
+                       }
 
-                       unlink(path);
+                       free(vhost_user.vsockets[i]->path);
+                       free(vhost_user.vsockets[i]);
 
-                       count = --g_vhost_server.vserver_cnt;
-                       g_vhost_server.server[i] = g_vhost_server.server[count];
-                       g_vhost_server.server[count] = NULL;
-                       pthread_mutex_unlock(&g_vhost_server.server_mutex);
+                       count = --vhost_user.vsocket_cnt;
+                       vhost_user.vsockets[i] = vhost_user.vsockets[count];
+                       vhost_user.vsockets[count] = NULL;
+                       pthread_mutex_unlock(&vhost_user.mutex);
 
                        return 0;
                }
        }
-       pthread_mutex_unlock(&g_vhost_server.server_mutex);
+       pthread_mutex_unlock(&vhost_user.mutex);
 
        return -1;
 }
@@ -526,6 +678,6 @@ rte_vhost_driver_unregister(const char *path)
 int
 rte_vhost_driver_session_start(void)
 {
-       fdset_event_dispatch(&g_vhost_server.fdset);
+       fdset_event_dispatch(&vhost_user.fdset);
        return 0;
 }