Imported Upstream version 16.04
[deb_dpdk.git] / lib / librte_vhost / vhost_cuse / vhost-net-cdev.c
diff --git a/lib/librte_vhost/vhost_cuse/vhost-net-cdev.c b/lib/librte_vhost/vhost_cuse/vhost-net-cdev.c
new file mode 100644 (file)
index 0000000..c613e68
--- /dev/null
@@ -0,0 +1,426 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <errno.h>
+#include <fuse/cuse_lowlevel.h>
+#include <linux/limits.h>
+#include <linux/vhost.h>
+#include <stdint.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <rte_ethdev.h>
+#include <rte_log.h>
+#include <rte_string_fns.h>
+#include <rte_virtio_net.h>
+
+#include "virtio-net-cdev.h"
+#include "vhost-net.h"
+#include "eventfd_copy.h"
+
+#define FUSE_OPT_DUMMY "\0\0"
+#define FUSE_OPT_FORE  "-f\0\0"
+#define FUSE_OPT_NOMULTI "-s\0\0"
+
+static const uint32_t default_major = 231;
+static const uint32_t default_minor = 1;
+static const char cuse_device_name[] = "/dev/cuse";
+static const char default_cdev[] = "vhost-net";
+
+static struct fuse_session *session;
+
+/*
+ * Returns vhost_device_ctx from given fuse_req_t. The index is populated later
+ * when the device is added to the device linked list.
+ */
+static struct vhost_device_ctx
+fuse_req_to_vhost_ctx(fuse_req_t req, struct fuse_file_info *fi)
+{
+       struct vhost_device_ctx ctx;
+       struct fuse_ctx const *const req_ctx = fuse_req_ctx(req);
+
+       ctx.pid = req_ctx->pid;
+       ctx.fh = fi->fh;
+
+       return ctx;
+}
+
+/*
+ * When the device is created in QEMU it gets initialised here and
+ * added to the device linked list.
+ */
+static void
+vhost_net_open(fuse_req_t req, struct fuse_file_info *fi)
+{
+       struct vhost_device_ctx ctx = fuse_req_to_vhost_ctx(req, fi);
+       int err = 0;
+
+       err = vhost_new_device(ctx);
+       if (err == -1) {
+               fuse_reply_err(req, EPERM);
+               return;
+       }
+
+       fi->fh = err;
+
+       RTE_LOG(INFO, VHOST_CONFIG,
+               "(%"PRIu64") Device configuration started\n", fi->fh);
+       fuse_reply_open(req, fi);
+}
+
+/*
+ * When QEMU is shutdown or killed the device gets released.
+ */
+static void
+vhost_net_release(fuse_req_t req, struct fuse_file_info *fi)
+{
+       int err = 0;
+       struct vhost_device_ctx ctx = fuse_req_to_vhost_ctx(req, fi);
+
+       vhost_destroy_device(ctx);
+       RTE_LOG(INFO, VHOST_CONFIG, "(%"PRIu64") Device released\n", ctx.fh);
+       fuse_reply_err(req, err);
+}
+
+/*
+ * Boilerplate code for CUSE IOCTL
+ * Implicit arguments: ctx, req, result.
+ */
+#define VHOST_IOCTL(func) do { \
+       result = (func)(ctx);   \
+       fuse_reply_ioctl(req, result, NULL, 0); \
+} while (0)
+
+/*
+ * Boilerplate IOCTL RETRY
+ * Implicit arguments: req.
+ */
+#define VHOST_IOCTL_RETRY(size_r, size_w) do { \
+       struct iovec iov_r = { arg, (size_r) }; \
+       struct iovec iov_w = { arg, (size_w) }; \
+       fuse_reply_ioctl_retry(req, &iov_r,     \
+               (size_r) ? 1 : 0, &iov_w, (size_w) ? 1 : 0);\
+} while (0)
+
+/*
+ * Boilerplate code for CUSE Read IOCTL
+ * Implicit arguments: ctx, req, result, in_bufsz, in_buf.
+ */
+#define VHOST_IOCTL_R(type, var, func) do {    \
+       if (!in_bufsz) {        \
+               VHOST_IOCTL_RETRY(sizeof(type), 0);\
+       } else {        \
+               (var) = *(const type*)in_buf;   \
+               result = func(ctx, &(var));     \
+               fuse_reply_ioctl(req, result, NULL, 0);\
+       }       \
+} while (0)
+
+/*
+ * Boilerplate code for CUSE Write IOCTL
+ * Implicit arguments: ctx, req, result, out_bufsz.
+ */
+#define VHOST_IOCTL_W(type, var, func) do {    \
+       if (!out_bufsz) {       \
+               VHOST_IOCTL_RETRY(0, sizeof(type));\
+       } else {        \
+               result = (func)(ctx, &(var));\
+               fuse_reply_ioctl(req, result, &(var), sizeof(type));\
+       } \
+} while (0)
+
+/*
+ * Boilerplate code for CUSE Read/Write IOCTL
+ * Implicit arguments: ctx, req, result, in_bufsz, in_buf.
+ */
+#define VHOST_IOCTL_RW(type1, var1, type2, var2, func) do {    \
+       if (!in_bufsz) {        \
+               VHOST_IOCTL_RETRY(sizeof(type1), sizeof(type2));\
+       } else {        \
+               (var1) = *(const type1*) (in_buf);      \
+               result = (func)(ctx, (var1), &(var2));  \
+               fuse_reply_ioctl(req, result, &(var2), sizeof(type2));\
+       }       \
+} while (0)
+
+/*
+ * The IOCTLs are handled using CUSE/FUSE in userspace. Depending on the type
+ * of IOCTL a buffer is requested to read or to write. This request is handled
+ * by FUSE and the buffer is then given to CUSE.
+ */
+static void
+vhost_net_ioctl(fuse_req_t req, int cmd, void *arg,
+               struct fuse_file_info *fi, __rte_unused unsigned flags,
+               const void *in_buf, size_t in_bufsz, size_t out_bufsz)
+{
+       struct vhost_device_ctx ctx = fuse_req_to_vhost_ctx(req, fi);
+       struct vhost_vring_file file;
+       struct vhost_vring_state state;
+       struct vhost_vring_addr addr;
+       uint64_t features;
+       uint32_t index;
+       int result = 0;
+
+       switch (cmd) {
+       case VHOST_NET_SET_BACKEND:
+               LOG_DEBUG(VHOST_CONFIG,
+                       "(%"PRIu64") IOCTL: VHOST_NET_SET_BACKEND\n", ctx.fh);
+               if (!in_buf) {
+                       VHOST_IOCTL_RETRY(sizeof(file), 0);
+                       break;
+               }
+               file = *(const struct vhost_vring_file *)in_buf;
+               result = cuse_set_backend(ctx, &file);
+               fuse_reply_ioctl(req, result, NULL, 0);
+               break;
+
+       case VHOST_GET_FEATURES:
+               LOG_DEBUG(VHOST_CONFIG,
+                       "(%"PRIu64") IOCTL: VHOST_GET_FEATURES\n", ctx.fh);
+               VHOST_IOCTL_W(uint64_t, features, vhost_get_features);
+               break;
+
+       case VHOST_SET_FEATURES:
+               LOG_DEBUG(VHOST_CONFIG,
+                       "(%"PRIu64") IOCTL: VHOST_SET_FEATURES\n", ctx.fh);
+               VHOST_IOCTL_R(uint64_t, features, vhost_set_features);
+               break;
+
+       case VHOST_RESET_OWNER:
+               LOG_DEBUG(VHOST_CONFIG,
+                       "(%"PRIu64") IOCTL: VHOST_RESET_OWNER\n", ctx.fh);
+               VHOST_IOCTL(vhost_reset_owner);
+               break;
+
+       case VHOST_SET_OWNER:
+               LOG_DEBUG(VHOST_CONFIG,
+                       "(%"PRIu64") IOCTL: VHOST_SET_OWNER\n", ctx.fh);
+               VHOST_IOCTL(vhost_set_owner);
+               break;
+
+       case VHOST_SET_MEM_TABLE:
+               /*TODO fix race condition.*/
+               LOG_DEBUG(VHOST_CONFIG,
+                       "(%"PRIu64") IOCTL: VHOST_SET_MEM_TABLE\n", ctx.fh);
+               static struct vhost_memory mem_temp;
+
+               switch (in_bufsz) {
+               case 0:
+                       VHOST_IOCTL_RETRY(sizeof(struct vhost_memory), 0);
+                       break;
+
+               case sizeof(struct vhost_memory):
+                       mem_temp = *(const struct vhost_memory *) in_buf;
+
+                       if (mem_temp.nregions > 0) {
+                               VHOST_IOCTL_RETRY(sizeof(struct vhost_memory) +
+                                       (sizeof(struct vhost_memory_region) *
+                                               mem_temp.nregions), 0);
+                       } else {
+                               result = -1;
+                               fuse_reply_ioctl(req, result, NULL, 0);
+                       }
+                       break;
+
+               default:
+                       result = cuse_set_mem_table(ctx, in_buf,
+                               mem_temp.nregions);
+                       if (result)
+                               fuse_reply_err(req, EINVAL);
+                       else
+                               fuse_reply_ioctl(req, result, NULL, 0);
+               }
+               break;
+
+       case VHOST_SET_VRING_NUM:
+               LOG_DEBUG(VHOST_CONFIG,
+                       "(%"PRIu64") IOCTL: VHOST_SET_VRING_NUM\n", ctx.fh);
+               VHOST_IOCTL_R(struct vhost_vring_state, state,
+                       vhost_set_vring_num);
+               break;
+
+       case VHOST_SET_VRING_BASE:
+               LOG_DEBUG(VHOST_CONFIG,
+                       "(%"PRIu64") IOCTL: VHOST_SET_VRING_BASE\n", ctx.fh);
+               VHOST_IOCTL_R(struct vhost_vring_state, state,
+                       vhost_set_vring_base);
+               break;
+
+       case VHOST_GET_VRING_BASE:
+               LOG_DEBUG(VHOST_CONFIG,
+                       "(%"PRIu64") IOCTL: VHOST_GET_VRING_BASE\n", ctx.fh);
+               VHOST_IOCTL_RW(uint32_t, index,
+                       struct vhost_vring_state, state, vhost_get_vring_base);
+               break;
+
+       case VHOST_SET_VRING_ADDR:
+               LOG_DEBUG(VHOST_CONFIG,
+                       "(%"PRIu64") IOCTL: VHOST_SET_VRING_ADDR\n", ctx.fh);
+               VHOST_IOCTL_R(struct vhost_vring_addr, addr,
+                       vhost_set_vring_addr);
+               break;
+
+       case VHOST_SET_VRING_KICK:
+       case VHOST_SET_VRING_CALL:
+               if (cmd == VHOST_SET_VRING_KICK)
+                       LOG_DEBUG(VHOST_CONFIG,
+                               "(%"PRIu64") IOCTL: VHOST_SET_VRING_KICK\n",
+                       ctx.fh);
+               else
+                       LOG_DEBUG(VHOST_CONFIG,
+                               "(%"PRIu64") IOCTL: VHOST_SET_VRING_CALL\n",
+                       ctx.fh);
+               if (!in_buf)
+                       VHOST_IOCTL_RETRY(sizeof(struct vhost_vring_file), 0);
+               else {
+                       int fd;
+                       file = *(const struct vhost_vring_file *)in_buf;
+                       LOG_DEBUG(VHOST_CONFIG,
+                               "idx:%d fd:%d\n", file.index, file.fd);
+                       fd = eventfd_copy(file.fd, ctx.pid);
+                       if (fd < 0) {
+                               fuse_reply_ioctl(req, -1, NULL, 0);
+                               result = -1;
+                               break;
+                       }
+                       file.fd = fd;
+                       if (cmd == VHOST_SET_VRING_KICK) {
+                               result = vhost_set_vring_kick(ctx, &file);
+                               fuse_reply_ioctl(req, result, NULL, 0);
+                       } else {
+                               result = vhost_set_vring_call(ctx, &file);
+                               fuse_reply_ioctl(req, result, NULL, 0);
+                       }
+               }
+               break;
+
+       default:
+               RTE_LOG(ERR, VHOST_CONFIG,
+                       "(%"PRIu64") IOCTL: DOESN NOT EXIST\n", ctx.fh);
+               result = -1;
+               fuse_reply_ioctl(req, result, NULL, 0);
+       }
+
+       if (result < 0)
+               LOG_DEBUG(VHOST_CONFIG,
+                       "(%"PRIu64") IOCTL: FAIL\n", ctx.fh);
+       else
+               LOG_DEBUG(VHOST_CONFIG,
+                       "(%"PRIu64") IOCTL: SUCCESS\n", ctx.fh);
+}
+
+/*
+ * Structure handling open, release and ioctl function pointers is populated.
+ */
+static const struct cuse_lowlevel_ops vhost_net_ops = {
+       .open           = vhost_net_open,
+       .release        = vhost_net_release,
+       .ioctl          = vhost_net_ioctl,
+};
+
+/*
+ * cuse_info is populated and used to register the cuse device.
+ * vhost_net_device_ops are also passed when the device is registered in app.
+ */
+int
+rte_vhost_driver_register(const char *dev_name)
+{
+       struct cuse_info cuse_info;
+       char device_name[PATH_MAX] = "";
+       char char_device_name[PATH_MAX] = "";
+       const char *device_argv[] = { device_name };
+
+       char fuse_opt_dummy[] = FUSE_OPT_DUMMY;
+       char fuse_opt_fore[] = FUSE_OPT_FORE;
+       char fuse_opt_nomulti[] = FUSE_OPT_NOMULTI;
+       char *fuse_argv[] = {fuse_opt_dummy, fuse_opt_fore, fuse_opt_nomulti};
+
+       if (access(cuse_device_name, R_OK | W_OK) < 0) {
+               RTE_LOG(ERR, VHOST_CONFIG,
+                       "char device %s can't be accessed, maybe not exist\n",
+                       cuse_device_name);
+               return -1;
+       }
+
+       if (eventfd_init() < 0)
+               return -1;
+
+       /*
+        * The device name is created. This is passed to QEMU so that it can
+        * register the device with our application.
+        */
+       snprintf(device_name, PATH_MAX, "DEVNAME=%s", dev_name);
+       snprintf(char_device_name, PATH_MAX, "/dev/%s", dev_name);
+
+       /* Check if device already exists. */
+       if (access(char_device_name, F_OK) != -1) {
+               RTE_LOG(ERR, VHOST_CONFIG,
+                       "char device %s already exists\n", char_device_name);
+               return -1;
+       }
+
+       memset(&cuse_info, 0, sizeof(cuse_info));
+       cuse_info.dev_major = default_major;
+       cuse_info.dev_minor = default_minor;
+       cuse_info.dev_info_argc = 1;
+       cuse_info.dev_info_argv = device_argv;
+       cuse_info.flags = CUSE_UNRESTRICTED_IOCTL;
+
+       session = cuse_lowlevel_setup(3, fuse_argv,
+                       &cuse_info, &vhost_net_ops, 0, NULL);
+       if (session == NULL)
+               return -1;
+
+       return 0;
+}
+
+/**
+ * An empty function for unregister
+ */
+int
+rte_vhost_driver_unregister(const char *dev_name __rte_unused)
+{
+       return 0;
+}
+
+/**
+ * The CUSE session is launched allowing the application to receive open,
+ * release and ioctl calls.
+ */
+int
+rte_vhost_driver_session_start(void)
+{
+       fuse_session_loop(session);
+
+       return 0;
+}