From 6fbef2322870bbe1768537caecc3ca06bfd70dd7 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Mon, 15 Oct 2018 12:52:30 -0700 Subject: [PATCH] vlib: support Hyper-v/Azure VMBus This patch adds support for VMBus to the VPP infrastructure. Since the only device that matters is the netvsc Poll Mode Driver in DPDK, the infrastructure is much simpler than PCI. Change-Id: Ie96c897ad9c426716c2398e4528688ce2217419b Signed-off-by: Stephen Hemminger --- Makefile | 5 +- src/plugins/dpdk/device/init.c | 32 ++++ src/vlib/CMakeLists.txt | 21 ++- src/vlib/linux/vmbus.c | 405 +++++++++++++++++++++++++++++++++++++++++ src/vlib/vmbus/vmbus.c | 48 +++++ src/vlib/vmbus/vmbus.h | 45 +++++ 6 files changed, 553 insertions(+), 3 deletions(-) create mode 100644 src/vlib/linux/vmbus.c create mode 100644 src/vlib/vmbus/vmbus.c create mode 100644 src/vlib/vmbus/vmbus.h diff --git a/Makefile b/Makefile index 6bc871c39a8..9868d9ae0d3 100644 --- a/Makefile +++ b/Makefile @@ -66,7 +66,7 @@ DEB_DEPENDS += libconfuse-dev git-review exuberant-ctags cscope pkg-config DEB_DEPENDS += lcov chrpath autoconf indent clang-format libnuma-dev DEB_DEPENDS += python-all python-dev python-virtualenv python-pip libffi6 check DEB_DEPENDS += libboost-all-dev libffi-dev python-ply libmbedtls-dev -DEB_DEPENDS += cmake ninja-build +DEB_DEPENDS += cmake ninja-build uuid-dev ifeq ($(OS_VERSION_ID),14.04) DEB_DEPENDS += openjdk-8-jdk-headless DEB_DEPENDS += libssl-dev @@ -89,6 +89,7 @@ RPM_DEPENDS += check check-devel RPM_DEPENDS += boost boost-devel RPM_DEPENDS += selinux-policy selinux-policy-devel RPM_DEPENDS += ninja-build +RPM_DEPENDS += libuuid-devel ifeq ($(OS_ID),fedora) RPM_DEPENDS += dnf-utils @@ -119,7 +120,7 @@ RPM_SUSE_BUILDTOOLS_DEPS = autoconf automake ccache check-devel chrpath RPM_SUSE_BUILDTOOLS_DEPS += clang cmake indent libtool make ninja python-ply RPM_SUSE_DEVEL_DEPS = glibc-devel-static java-1_8_0-openjdk-devel libnuma-devel -RPM_SUSE_DEVEL_DEPS += libopenssl-devel openssl-devel mbedtls-devel +RPM_SUSE_DEVEL_DEPS += libopenssl-devel openssl-devel mbedtls-devel libuuid-devel RPM_SUSE_PYTHON_DEPS = python-devel python3-devel python-pip python3-pip RPM_SUSE_PYTHON_DEPS += python-rpm-macros python3-rpm-macros diff --git a/src/plugins/dpdk/device/init.c b/src/plugins/dpdk/device/init.c index c0a927acae5..c5c98ab3359 100644 --- a/src/plugins/dpdk/device/init.c +++ b/src/plugins/dpdk/device/init.c @@ -24,6 +24,7 @@ #include #include #include +#include #include @@ -973,6 +974,27 @@ dpdk_bind_devices_to_uio (dpdk_config_main_t * conf) vlib_pci_free_device_info (d); } +static void +dpdk_bind_vmbus_devices_to_uio (dpdk_config_main_t * conf) +{ + clib_error_t *error; + vlib_vmbus_addr_t *addrs, *addr = 0; + + addrs = vlib_vmbus_get_all_dev_addrs (); + + /* *INDENT-OFF* */ + vec_foreach (addr, addrs) + { + error = vlib_vmbus_bind_to_uio (addr); + + if (error) + { + clib_error_report (error); + } + } + /* *INDENT-ON* */ +} + static clib_error_t * dpdk_device_config (dpdk_config_main_t * conf, vlib_pci_addr_t pci_addr, unformat_input_t * input, u8 is_default) @@ -1121,6 +1143,7 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input) int ret, i; int num_whitelisted = 0; u8 no_pci = 0; + u8 no_vmbus = 0; u8 no_huge = 0; u8 huge_dir = 0; u8 file_prefix = 0; @@ -1214,6 +1237,12 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input) vec_add1 (conf->blacklist_by_pci_vendor_and_device, blacklist_entry); } + else if (unformat (input, "no-vmbus")) + { + no_vmbus = 1; + tmp = format (0, "--no-vmbus%c", 0); + vec_add1 (conf->eal_init_args, tmp); + } #define _(a) \ else if (unformat(input, #a)) \ @@ -1417,6 +1446,9 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input) if (no_pci == 0 && geteuid () == 0) dpdk_bind_devices_to_uio (conf); + if (no_vmbus == 0 && geteuid () == 0) + dpdk_bind_vmbus_devices_to_uio (conf); + #define _(x) \ if (devconf->x == 0 && conf->default_devconf.x > 0) \ devconf->x = conf->default_devconf.x ; diff --git a/src/vlib/CMakeLists.txt b/src/vlib/CMakeLists.txt index 2a6cbd54da2..2209923547a 100644 --- a/src/vlib/CMakeLists.txt +++ b/src/vlib/CMakeLists.txt @@ -25,6 +25,22 @@ install( COMPONENT vpp-dev ) +############################################################################## +# Find lib and include files +############################################################################## +message(STATUS "Looking for libuuid") +find_path(UUID_INCLUDE_DIR NAMES uuid/uuid.h) +find_library(UUID_LIB NAMES uuid) + +if(UUID_INCLUDE_DIR AND UUID_LIB) + include_directories(${UUID_INCLUDE_DIR}) + set(VMBUS_SOURCE linux/vmbus.c) + set(VMBUS_LIBS uuid) + message(STATUS "Found uuid in ${UUID_INCLUDE_DIR}") +else() + message(WARNING "-- libuuid not found - vmbus support disabled") +endif() + ############################################################################## # vlib shared library ############################################################################## @@ -56,6 +72,8 @@ add_vpp_library(vlib unix/main.c unix/plugin.c unix/util.c + vmbus/vmbus.c + ${VMBUS_SOURCE} INSTALL_HEADERS buffer_funcs.h @@ -89,6 +107,7 @@ add_vpp_library(vlib unix/plugin.h unix/unix.h vlib.h + vmbus/vmbus.h - LINK_LIBRARIES vppinfra svm ${CMAKE_DL_LIBS} + LINK_LIBRARIES vppinfra svm ${VMBUS_LIBS} ${CMAKE_DL_LIBS} ) diff --git a/src/vlib/linux/vmbus.c b/src/vlib/linux/vmbus.c new file mode 100644 index 00000000000..2af62241d4d --- /dev/null +++ b/src/vlib/linux/vmbus.c @@ -0,0 +1,405 @@ +/* + * Copyright (c) 2018, Microsoft Corporation. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * vmbus.c: Linux user space VMBus bus management. + */ + +#include + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +static const char sysfs_vmbus_dev_path[] = "/sys/bus/vmbus/devices"; +static const char sysfs_vmbus_drv_path[] = "/sys/bus/vmbus/drivers"; +static const char sysfs_class_net_path[] = "/sys/class/net"; +static const char uio_drv_name[] = "uio_hv_generic"; +static const char netvsc_uuid[] = "f8615163-df3e-46c5-913f-f2d2f965ed0e"; + +typedef struct +{ + int fd; + void *addr; + size_t size; +} linux_vmbus_region_t; + +typedef struct +{ + int fd; + u32 clib_file_index; +} linux_vmbus_irq_t; + +typedef struct +{ + vlib_vmbus_dev_handle_t handle; + vlib_vmbus_addr_t addr; + + /* Device File descriptor */ + int fd; + + /* Minor device for uio device. */ + u32 uio_minor; + + /* private data */ + uword private_data; + +} linux_vmbus_device_t; + +/* Pool of VMBUS devices. */ +typedef struct +{ + vlib_main_t *vlib_main; + linux_vmbus_device_t *linux_vmbus_devices; + +} linux_vmbus_main_t; + +linux_vmbus_main_t linux_vmbus_main; + +static linux_vmbus_device_t * +linux_vmbus_get_device (vlib_vmbus_dev_handle_t h) +{ + linux_vmbus_main_t *lpm = &linux_vmbus_main; + return pool_elt_at_index (lpm->linux_vmbus_devices, h); +} + +uword +vlib_vmbus_get_private_data (vlib_vmbus_dev_handle_t h) +{ + linux_vmbus_device_t *d = linux_vmbus_get_device (h); + return d->private_data; +} + +void +vlib_vmbus_set_private_data (vlib_vmbus_dev_handle_t h, uword private_data) +{ + linux_vmbus_device_t *d = linux_vmbus_get_device (h); + d->private_data = private_data; +} + +vlib_vmbus_addr_t * +vlib_vmbus_get_addr (vlib_vmbus_dev_handle_t h) +{ + linux_vmbus_device_t *d = linux_vmbus_get_device (h); + return &d->addr; +} + +/* Call to allocate/initialize the vmbus subsystem. + This is not an init function so that users can explicitly enable + vmbus only when it's needed. */ +clib_error_t *vmbus_bus_init (vlib_main_t * vm); + +linux_vmbus_main_t linux_vmbus_main; + +/* + * Take VMBus address represented in standard form like: + * "f2c086b2-ff2e-11e8-88de-7bad0a57de05" and convert + * it to u8[16] + */ +static uword +unformat_vlib_vmbus_addr (unformat_input_t * input, va_list * args) +{ + vlib_vmbus_addr_t *addr = va_arg (*args, vlib_vmbus_addr_t *); + uword ret = 0; + u8 *s; + + if (!unformat (input, "%s", &s)) + return 0; + + if (uuid_parse ((char *) s, addr->guid) == 0) + ret = 1; + + vec_free (s); + + return ret; +} + +/* Convert bus address to standard UUID string */ +static u8 * +format_vlib_vmbus_addr (u8 * s, va_list * va) +{ + vlib_vmbus_addr_t *addr = va_arg (*va, vlib_vmbus_addr_t *); + char tmp[40]; + + uuid_unparse (addr->guid, tmp); + return format (s, "%s", tmp); +} + +/* workaround for mlx bug, bring lower device up before unbind */ +static clib_error_t * +vlib_vmbus_raise_lower (int fd, const char *upper_name) +{ + clib_error_t *error = 0; + struct dirent *e; + struct ifreq ifr; + u8 *dev_net_dir; + DIR *dir; + + memset (&ifr, 0, sizeof (ifr)); + + dev_net_dir = format (0, "%s/%s%c", sysfs_class_net_path, upper_name, 0); + + dir = opendir ((char *) dev_net_dir); + + if (!dir) + { + error = clib_error_return (0, "VMBUS failed to open %s", dev_net_dir); + goto done; + } + + while ((e = readdir (dir))) + { + /* look for lower_enXXXX */ + if (strncmp (e->d_name, "lower_", 6)) + continue; + + strncpy (ifr.ifr_name, e->d_name + 6, IFNAMSIZ); + break; + } + closedir (dir); + + if (!e) + goto done; /* no lower device */ + + if (ioctl (fd, SIOCGIFFLAGS, &ifr) < 0) + error = clib_error_return_unix (0, "ioctl fetch intf %s flags", + ifr.ifr_name); + else if (!(ifr.ifr_flags & IFF_UP)) + { + ifr.ifr_flags |= IFF_UP; + + if (ioctl (fd, SIOCSIFFLAGS, &ifr) < 0) + error = clib_error_return_unix (0, "ioctl set intf %s flags", + ifr.ifr_name); + } +done: + vec_free (dev_net_dir); + return error; +} + +clib_error_t * +vlib_vmbus_bind_to_uio (vlib_vmbus_addr_t * addr) +{ + clib_error_t *error = 0; + u8 *dev_dir_name; + char *ifname = 0; + static int uio_new_id_needed = 1; + struct dirent *e; + struct ifreq ifr; + u8 *s, *driver_name; + DIR *dir; + int fd; + + dev_dir_name = format (0, "%s/%U", sysfs_vmbus_dev_path, + format_vlib_vmbus_addr, addr); + s = format (0, "%v/driver%c", dev_dir_name, 0); + + driver_name = clib_sysfs_link_to_name ((char *) s); + vec_reset_length (s); + + /* skip if not using the Linux kernel netvsc driver */ + if (!driver_name || strcmp ("hv_netvsc", (char *) driver_name) != 0) + goto done; + + s = format (s, "%v/net%c", dev_dir_name, 0); + dir = opendir ((char *) s); + vec_reset_length (s); + + if (!dir) + return clib_error_return (0, "VMBUS failed to open %s", s); + + while ((e = readdir (dir))) + { + if (e->d_name[0] == '.') /* skip . and .. */ + continue; + + ifname = strdup (e->d_name); + break; + } + closedir (dir); + + if (!ifname) + { + error = clib_error_return (0, + "VMBUS device %U eth not found", + format_vlib_vmbus_addr, addr); + goto done; + } + + + memset (&ifr, 0, sizeof (ifr)); + strncpy (ifr.ifr_name, ifname, IFNAMSIZ); + + /* read up/down flags */ + fd = socket (PF_INET, SOCK_DGRAM, 0); + if (fd < 0) + { + error = clib_error_return_unix (0, "socket"); + goto done; + } + + if (ioctl (fd, SIOCGIFFLAGS, &ifr) < 0) + { + error = clib_error_return_unix (0, "ioctl fetch intf %s flags", + ifr.ifr_name); + close (fd); + goto done; + } + + if (ifr.ifr_flags & IFF_UP) + { + error = clib_error_return (0, + "Skipping VMBUS device %U as host interface %s is up", + format_vlib_vmbus_addr, addr, e->d_name); + close (fd); + goto done; + } + + error = vlib_vmbus_raise_lower (fd, ifname); + close (fd); + + if (error) + goto done; + + + /* tell uio_hv_generic about netvsc device type */ + if (uio_new_id_needed) + { + uio_new_id_needed = 0; + + vec_reset_length (s); + s = format (s, "%s/%s/new_id%c", sysfs_vmbus_drv_path, uio_drv_name, 0); + error = clib_sysfs_write ((char *) s, "%s", netvsc_uuid); + + if (error) + goto done; + + } + + /* prefer the simplier driver_override model */ + vec_reset_length (s); + s = format (s, "%/driver_override%c", dev_dir_name, 0); + if (access ((char *) s, F_OK) == 0) + { + clib_sysfs_write ((char *) s, "%s", uio_drv_name); + } + else + { + vec_reset_length (s); + + s = format (s, "%v/driver/unbind%c", dev_dir_name, 0); + error = + clib_sysfs_write ((char *) s, "%U", format_vlib_vmbus_addr, addr); + + if (error) + goto done; + + vec_reset_length (s); + + s = format (s, "%s/%s/bind%c", sysfs_vmbus_drv_path, uio_drv_name, 0); + error = + clib_sysfs_write ((char *) s, "%U", format_vlib_vmbus_addr, addr); + } + vec_reset_length (s); + +done: + free (ifname); + vec_free (s); + vec_free (dev_dir_name); + vec_free (driver_name); + return error; +} + +static clib_error_t * +scan_vmbus_addr (void *arg, u8 * dev_dir_name, u8 * ignored) +{ + vlib_vmbus_addr_t addr, **addrv = arg; + unformat_input_t input; + clib_error_t *err = 0; + + unformat_init_string (&input, (char *) dev_dir_name, + vec_len (dev_dir_name)); + + if (!unformat (&input, "/sys/bus/vmbus/devices/%U", + unformat_vlib_vmbus_addr, &addr)) + err = clib_error_return (0, "unformat error `%v`", dev_dir_name); + + unformat_free (&input); + + if (err) + return err; + + vec_add1 (*addrv, addr); + return 0; +} + +static int +vmbus_addr_cmp (void *v1, void *v2) +{ + vlib_vmbus_addr_t *a1 = v1; + vlib_vmbus_addr_t *a2 = v2; + + return uuid_compare (a1->guid, a2->guid); +} + +vlib_vmbus_addr_t * +vlib_vmbus_get_all_dev_addrs () +{ + vlib_vmbus_addr_t *addrs = 0; + clib_error_t *err; + + err = + foreach_directory_file ((char *) sysfs_vmbus_dev_path, scan_vmbus_addr, + &addrs, /* scan_dirs */ 0); + if (err) + { + vec_free (addrs); + return 0; + } + + vec_sort_with_function (addrs, vmbus_addr_cmp); + + return addrs; +} + +clib_error_t * +linux_vmbus_init (vlib_main_t * vm) +{ + linux_vmbus_main_t *pm = &linux_vmbus_main; + + pm->vlib_main = vm; + + return vlib_call_init_function (vm, unix_input_init); +} + +VLIB_INIT_FUNCTION (linux_vmbus_init); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vlib/vmbus/vmbus.c b/src/vlib/vmbus/vmbus.c new file mode 100644 index 00000000000..ea395ece6a8 --- /dev/null +++ b/src/vlib/vmbus/vmbus.c @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2018, Microsoft Corporation. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +/* this is a stub replaced by the Linux specfic version */ +vlib_vmbus_addr_t * __attribute__ ((weak)) vlib_vmbus_get_all_dev_addrs () +{ + return NULL; +} + + +clib_error_t * +vmbus_bus_init (vlib_main_t * vm) +{ + return vlib_call_init_function (vm, vmbus_bus_init); +} + +VLIB_INIT_FUNCTION (vmbus_bus_init); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vlib/vmbus/vmbus.h b/src/vlib/vmbus/vmbus.h new file mode 100644 index 00000000000..0927b8990d6 --- /dev/null +++ b/src/vlib/vmbus/vmbus.h @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2018, Microsoft Corporation. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * vmbus.h: VMBus definitions. + */ + +#ifndef included_vlib_vmbus_h +#define included_vlib_vmbus_h + +#include + +typedef struct +{ + u8 guid[16]; +} vlib_vmbus_addr_t; +typedef u32 vlib_vmbus_dev_handle_t; + +vlib_vmbus_addr_t *vlib_vmbus_get_all_dev_addrs (); +vlib_vmbus_addr_t *vlib_vmbus_get_addr (vlib_vmbus_dev_handle_t h); +uword vlib_vmbus_get_private_data (vlib_vmbus_dev_handle_t h); +void vlib_vmbus_set_private_data (vlib_vmbus_dev_handle_t h, + uword private_data); + +clib_error_t *vlib_vmbus_bind_to_uio (vlib_vmbus_addr_t * addr); +#endif /* included_vlib_vmbus_h */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ -- 2.16.6