From 44db1caefbf5067b0cf0073299c9f21265331412 Mon Sep 17 00:00:00 2001 From: Neale Ranns Date: Thu, 24 Dec 2020 09:16:09 +0000 Subject: [PATCH] linux-cp: Linux Interface Mirroring for Control Plane Integration Type: feature please see FEATURE.yaml for details. Signed-off-by: Neale Ranns Signed-off-by: Matthew Smith Signed-off-by: Jon Loeliger Signed-off-by: Pim van Pelt Change-Id: I04a45c15c0838906aa787e06660fa29f39f755fa --- MAINTAINERS | 6 + Makefile | 2 + src/plugins/linux-cp/CMakeLists.txt | 61 ++ src/plugins/linux-cp/FEATURE.yaml | 25 + src/plugins/linux-cp/lcp.api | 166 +++++ src/plugins/linux-cp/lcp.c | 85 +++ src/plugins/linux-cp/lcp.h | 49 ++ src/plugins/linux-cp/lcp.rst | 96 +++ src/plugins/linux-cp/lcp_adj.c | 151 +++++ src/plugins/linux-cp/lcp_adj.h | 84 +++ src/plugins/linux-cp/lcp_api.c | 232 +++++++ src/plugins/linux-cp/lcp_cli.c | 236 +++++++ src/plugins/linux-cp/lcp_interface.c | 1016 ++++++++++++++++++++++++++++ src/plugins/linux-cp/lcp_interface.h | 160 +++++ src/plugins/linux-cp/lcp_node.c | 919 +++++++++++++++++++++++++ src/plugins/linux-cp/test/lcp_unittest.c | 101 +++ src/plugins/linux-cp/test/test_linux_cp.py | 174 +++++ src/vlibapi/api_helper_macros.h | 66 +- src/vnet/interface_funcs.h | 8 + src/vnet/l2/l2_input.h | 6 +- 20 files changed, 3614 insertions(+), 29 deletions(-) create mode 100644 src/plugins/linux-cp/CMakeLists.txt create mode 100644 src/plugins/linux-cp/FEATURE.yaml create mode 100644 src/plugins/linux-cp/lcp.api create mode 100644 src/plugins/linux-cp/lcp.c create mode 100644 src/plugins/linux-cp/lcp.h create mode 100644 src/plugins/linux-cp/lcp.rst create mode 100644 src/plugins/linux-cp/lcp_adj.c create mode 100644 src/plugins/linux-cp/lcp_adj.h create mode 100644 src/plugins/linux-cp/lcp_api.c create mode 100644 src/plugins/linux-cp/lcp_cli.c create mode 100644 src/plugins/linux-cp/lcp_interface.c create mode 100644 src/plugins/linux-cp/lcp_interface.h create mode 100644 src/plugins/linux-cp/lcp_node.c create mode 100644 src/plugins/linux-cp/test/lcp_unittest.c create mode 100644 src/plugins/linux-cp/test/test_linux_cp.py diff --git a/MAINTAINERS b/MAINTAINERS index 3231cf8222c..b32f1eeaaa6 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -714,6 +714,12 @@ I: geneve M: community vpp-dev@lists.fd.io F: src/plugins/geneve/ +Plugin - linux-cp +I: linux-cp +M: neale@graphiant.com +M: Matthew Smith +F: src/plugins/linux-cp/ + THE REST I: misc C: Contact vpp-dev Mailing List diff --git a/Makefile b/Makefile index 1ae1684e891..7ecc2c69ed1 100644 --- a/Makefile +++ b/Makefile @@ -71,6 +71,7 @@ DEB_DEPENDS += libboost-all-dev libffi-dev python3-ply libmbedtls-dev DEB_DEPENDS += cmake ninja-build uuid-dev python3-jsonschema python3-yaml DEB_DEPENDS += python3-venv # ensurepip DEB_DEPENDS += python3-dev # needed for python3 -m pip install psutil +DEB_DEPENDS += libnl-3-dev libnl-route-3-dev # python3.6 on 16.04 requires python36-dev LIBFFI=libffi6 # works on all but 20.04 and debian-testing @@ -114,6 +115,7 @@ RPM_DEPENDS += mbedtls-devel RPM_DEPENDS += ccache RPM_DEPENDS += xmlto RPM_DEPENDS += elfutils-libelf-devel +RPM_DEPENDS += libnl3-devel ifeq ($(OS_ID),fedora) RPM_DEPENDS += dnf-utils diff --git a/src/plugins/linux-cp/CMakeLists.txt b/src/plugins/linux-cp/CMakeLists.txt new file mode 100644 index 00000000000..6b6ccb3d0b1 --- /dev/null +++ b/src/plugins/linux-cp/CMakeLists.txt @@ -0,0 +1,61 @@ +# Copyright (c) 2020 Cisco and/or its affiliates. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +find_path(LIBNL3_INCLUDE_DIR NAMES libnl3/netlink/route/link/vlan.h) + +if (NOT LIBNL3_INCLUDE_DIR) + message(WARNING "-- libnl3 headers not found - linux-cp plugin disabled") + return() +endif() + +vpp_plugin_find_library(linux-cp LIBNL3_LIB libnl-3.so) +vpp_plugin_find_library(linux-cp LIBNL3_ROUTE_LIB libnl-route-3.so.200) + +include_directories(${LIBNL3_INCLUDE_DIR}/libnl3) +include_directories(${LIBMNL_INCLUDE_DIR}) + +add_vpp_library(lcp + SOURCES + lcp_interface.c + lcp_adj.c + lcp.c + + LINK_LIBRARIES + ${LIBNL3_LIB} + ${LIBNL3_ROUTE_LIB} + + INSTALL_HEADERS + lcp_interface.h + lcp.h +) + +add_vpp_plugin(linux_cp + SOURCES + lcp_api.c + lcp_cli.c + lcp_node.c + + API_FILES + lcp.api + + LINK_LIBRARIES + lcp +) + +add_vpp_plugin(linux_cp_unittest + SOURCES + test/lcp_unittest.c + + LINK_LIBRARIES + lcp +) diff --git a/src/plugins/linux-cp/FEATURE.yaml b/src/plugins/linux-cp/FEATURE.yaml new file mode 100644 index 00000000000..088b0606f58 --- /dev/null +++ b/src/plugins/linux-cp/FEATURE.yaml @@ -0,0 +1,25 @@ +--- +name: Linux Control Plane (integration) +maintainer: Neale Ranns + +description: |- + This plugin provides the beginnings of an integration with the + Linux network stack. + The plugin provides the capability to 'mirror' VPP interfaces in + the Linux kernel. This means that for any interface in VPP the user + can create a corresponding TAP or TUN device in the Linux kernel + and have VPP plumb them together. + The plumbing mechanics is different in each direction. + In the RX direction, all packets received on a given VPP interface + that are punted (i.e. are not dropped or forwarded) are transmitted + on its mirror interface (this includes for example ARP, ND etc, + so the recommendation is to disable ARP, ND, ping plugin). + In the TX direction, packets received by VPP an the mirror Tap/Tun + are cross-connected to the VPP interfaces. For IP packets, IP output + features are applied. + This is the beginnings of integration, because there needs to be + an external agent that will configure (and synchronize) the IP + configuration of the paired interfaces. + +state: experimental +properties: [API, CLI, MULTITHREAD] diff --git a/src/plugins/linux-cp/lcp.api b/src/plugins/linux-cp/lcp.api new file mode 100644 index 00000000000..49fdeddf886 --- /dev/null +++ b/src/plugins/linux-cp/lcp.api @@ -0,0 +1,166 @@ +/* Hey Emacs use -*- mode: C -*- */ +/* + * Linux Control Plane API + * + * Copyright 2020 Rubicon Communications, LLC. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +option version = "1.0.0"; + +import "vnet/interface_types.api"; + +/** \brief Set the default Linux Control Plane namespace + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param namespace - the new default namespace; namespace[0] == 0 iff none +*/ +autoreply define lcp_default_ns_set +{ + u32 client_index; + u32 context; + string namespace[32]; /* LCP_NS_LEN */ +}; + +/** \brief get the default Linux Control Plane namespace + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request +*/ +define lcp_default_ns_get +{ + u32 client_index; + u32 context; +}; + +/** \brief get the default Linux Control Plane namespace + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param namespace - the default namespace; namespace[0] == 0 iff none +*/ +define lcp_default_ns_get_reply +{ + u32 context; + string namespace[32]; /* LCP_NS_LEN */ +}; + +enum lcp_itf_host_type : u8 +{ + LCP_API_ITF_HOST_TAP = 0, + LCP_API_ITF_HOST_TUN = 1, +}; + +/** \brief Add or delete a Linux Conrol Plane interface pair + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param is_add - 0 if deleting, != 0 if adding + @param sw_if_index - index of VPP PHY SW interface + @param host_if_name - host tap interface name + @param host_if_type - the type of host interface to create (tun, tap) + @param namespace - optional tap namespace; namespace[0] == 0 iff none +*/ +autoreply autoendian define lcp_itf_pair_add_del +{ + u32 client_index; + u32 context; + bool is_add; + vl_api_interface_index_t sw_if_index; + string host_if_name[16]; /* IFNAMSIZ */ + vl_api_lcp_itf_host_type_t host_if_type; + string namespace[32]; /* LCP_NS_LEN */ +}; + +/** \brief Dump Linux Control Plane interface pair data + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param sw_if_index - interface to use as filter (~0 == "all") +*/ +autoendian define lcp_itf_pair_get +{ + u32 client_index; + u32 context; + u32 cursor; +}; +autoendian define lcp_itf_pair_get_reply +{ + u32 context; + i32 retval; + u32 cursor; +}; + +/** \brief Linux Control Plane interface pair dump response + @param context - sender context which was passed in the request + @param phy_sw_if_index - VPP's sw_if_index for the PHY + @param host_sw_if_index - VPP's sw_if_index for the host tap + @param vif_index - tap linux index + @param host_if_name - host interface name + @param host_if_type - host interface type (tun, tap) + @param namespace - host interface namespace +*/ +autoendian define lcp_itf_pair_details +{ + u32 context; + vl_api_interface_index_t phy_sw_if_index; + vl_api_interface_index_t host_sw_if_index; + u32 vif_index; + string host_if_name[16]; /* IFNAMSIZ */ + vl_api_lcp_itf_host_type_t host_if_type; + string namespace[32]; /* LCP_NS_LEN */ +}; + +service { + rpc lcp_itf_pair_get returns lcp_itf_pair_get_reply + stream lcp_itf_pair_details; +}; + +/** \brief Replace end/begin + */ +autoreply define lcp_itf_pair_replace_begin +{ + u32 client_index; + u32 context; +}; +autoreply define lcp_itf_pair_replace_end +{ + u32 client_index; + u32 context; +}; + +/* + * Linux-CP Error counters/messages + */ +counters linuxcp { + packets { + severity info; + type counter64; + units "packets"; + description "ARP packets processed"; + }; + copies { + severity info; + type counter64; + units "packets"; + description "ARP replies copied to host"; + }; +}; + +paths { + "/err/linux-cp-arp-phy" "linuxcp"; + "/err/linux-cp-arp-host" "linuxcp"; +}; + +/* + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/linux-cp/lcp.c b/src/plugins/linux-cp/lcp.c new file mode 100644 index 00000000000..f4c491c9cb3 --- /dev/null +++ b/src/plugins/linux-cp/lcp.c @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2020 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include + +#include + +lcp_main_t lcp_main; + +u8 * +lcp_get_default_ns (void) +{ + lcp_main_t *lcpm = &lcp_main; + + if (lcpm->default_namespace[0] == 0) + return 0; + return lcpm->default_namespace; +} + +int +lcp_get_default_ns_fd (void) +{ + lcp_main_t *lcpm = &lcp_main; + + return lcpm->default_ns_fd; +} + +/* + * ns is expected to be or look like a NUL-terminated C string. + */ +int +lcp_set_default_ns (u8 *ns) +{ + lcp_main_t *lcpm = &lcp_main; + char *p; + int len; + u8 *s; + + p = (char *) ns; + len = clib_strnlen (p, LCP_NS_LEN); + if (len >= LCP_NS_LEN) + return -1; + + if (!p || *p == 0) + { + clib_memset (lcpm->default_namespace, 0, + sizeof (lcpm->default_namespace)); + if (lcpm->default_ns_fd > 0) + close (lcpm->default_ns_fd); + lcpm->default_ns_fd = 0; + return 0; + } + + clib_strncpy ((char *) lcpm->default_namespace, p, LCP_NS_LEN - 1); + + s = format (0, "/var/run/netns/%s%c", (char *) lcpm->default_namespace, 0); + lcpm->default_ns_fd = open ((char *) s, O_RDONLY); + vec_free (s); + + return 0; +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/linux-cp/lcp.h b/src/plugins/linux-cp/lcp.h new file mode 100644 index 00000000000..7fdad3798bc --- /dev/null +++ b/src/plugins/linux-cp/lcp.h @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2020 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __LCP_H__ +#define __LCP_H__ + +#include + +#define LCP_NS_LEN 32 + +typedef struct lcp_main_s +{ + u16 msg_id_base; /* API message ID base */ + u8 default_namespace[LCP_NS_LEN]; /* default namespace if set */ + int default_ns_fd; + u8 auto_intf; + /* Set when Unit testing */ + u8 test_mode; +} lcp_main_t; + +extern lcp_main_t lcp_main; + +/** + * Get/Set the default namespace for LCP host taps. + */ +int lcp_set_default_ns (u8 *ns); +u8 *lcp_get_default_ns (void); /* Returns NULL or shared string */ +int lcp_get_default_ns_fd (void); + +#endif + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/linux-cp/lcp.rst b/src/plugins/linux-cp/lcp.rst new file mode 100644 index 00000000000..6d81901cf7b --- /dev/null +++ b/src/plugins/linux-cp/lcp.rst @@ -0,0 +1,96 @@ +.. _Linux_control_plane: + +.. toctree:: + +Linux Control Plane Integration +=============================== + +Overview +________ + +This plugin allows VPP to integrate with the Linux. The +general model is that Linux is the network stack, i.e. it has the +control plane protocols, like ARP, IPv6 ND/MLD, Ping, etc, and VPP +provides a SW based ASIC for forwarding. + +Interfaces +__________ + +VPP owns the interfaces in the system; physical (.e.g PCI), quasi +physical (e.g. vhost), or virtual (e.g. tunnel). However, +for the Linux networking stack to function it needs a representation +of these interfaces; it needs a mirror image in the kernel. For this +mirror we use a Tap interface, if the VPP interface is multi-point, a +Tun if it's point-to-point. A physical and its mirror form an +interface 'pair'. + +The host interface has two identities; the sw_if_index of the Tap and +the virtual interface index in the kernel. It may be in a Linux namespace. + +The creation of the interface pairs is required from the control +plane. It can be statically configured in the VPP startup +configuration file. The intent here was to make the pair creation +explicit, rather than have VPP guess which of the interfaces it owns +require a mirror. + +Configuration +_____________ + +Linux will send and receive packets on the mirrored tap/tun +interfaces. Any configuration that is made on these Linux interfaces, +also needs to be applied on the corresponding physical interface in +VPP. + +This is functionality is not provided in this plugin, but it can be +achieved in various ways, for example by listening to the netlink +messages and applying the config. As a result all e.g. routes +programmed in Linux, will also be present in VPP's FIB. + +Linux will own the [ARP/ND] nieghbor tables (which will be copied via +netlink to VPP also). This means that Linux will send packets with the +peer's MAC address in the rewrite to VPP. The receiving TAP interface +must therefore be in promiscuous mode. + + +Forwarding +__________ + +The basic principle is to x-connect traffic from a Linux host interface +(received on the Tap/Tun) to its paired the physical, and vice-versa. + +Host to Physical +^^^^^^^^^^^^^^^^ + +All packets sent by the host, and received by VPP on a Tap/Tun should +be sent to its paired physical interface. However, they should be sent +with the same consequences as if they had originated from VPP, +i.e. they should be subject to all output features on the physical +interface. To achieve this there is a per-IP-address-family (AF) node +inserted in the per-AF input feature arc. The node must be per-AF, +since it must be a sibling of a start node for the ipX-output feature +arc. This node uses the packet's L2 rewrite to search for the +adjacency that VPP would have used to send this packet; this adjacency +is stored in the buffer's meta data so that it is available to all +output features. Then the packet is sent through the physical +interface's IP output feature arc. +All ARP packets are x-connected from the tap to the physical. + +Physical to Host +^^^^^^^^^^^^^^^^ + +All ARP packets received on the physical are sent to the paired +Tap. This allows the Linux network stack to build the nieghbour table. + +IP packets that are punted are sent to the host. They are sent on the +tap that is paired with the physical on which they were originally +received. The packet is sent on the Tap/Tun 'exactly' as it was +received (i.e. with the L2 rewrite) but post any translations that +input features may have made. + + +Recommendations +^^^^^^^^^^^^^^^ + +When using this plugin disable the ARP, ND, IGMP plugins; this is the +task for Linux. +Disable ping plugin, since Linux will now respond. diff --git a/src/plugins/linux-cp/lcp_adj.c b/src/plugins/linux-cp/lcp_adj.c new file mode 100644 index 00000000000..9a08591a0c1 --- /dev/null +++ b/src/plugins/linux-cp/lcp_adj.c @@ -0,0 +1,151 @@ +/* + * Copyright (c) 2020 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include +#include + +static adj_delegate_type_t adj_type; + +/** + * The table of adjacencies indexed by the rewrite string + */ +BVT (clib_bihash) lcp_adj_tbl; + +static_always_inline void +lcp_adj_mk_key_adj (const ip_adjacency_t *adj, lcp_adj_key_t *key) +{ + lcp_adj_mk_key (adj->rewrite_header.data, adj->rewrite_header.data_bytes, + adj->rewrite_header.sw_if_index, key); +} + +static u8 * +lcp_adj_delegate_format (const adj_delegate_t *aed, u8 *s) +{ + return (format (s, "lcp")); +} + +static void +lcp_adj_delegate_adj_deleted (adj_delegate_t *aed) +{ + ip_adjacency_t *adj; + lcp_adj_kv_t kv; + + adj = adj_get (aed->ad_adj_index); + + lcp_adj_mk_key_adj (adj, &kv.k); + + BV (clib_bihash_add_del) (&lcp_adj_tbl, &kv.kv, 0); +} + +static void +lcp_adj_delegate_adj_modified (adj_delegate_t *aed) +{ + ip_adjacency_t *adj; + lcp_adj_kv_t kv; + + adj = adj_get (aed->ad_adj_index); + + if (IP_LOOKUP_NEXT_REWRITE != adj->lookup_next_index) + return; + + lcp_adj_mk_key_adj (adj, &kv.k); + kv.v = aed->ad_adj_index; + + BV (clib_bihash_add_del) (&lcp_adj_tbl, &kv.kv, 1); +} + +static void +lcp_adj_delegate_adj_created (adj_index_t ai) +{ + ip_adjacency_t *adj; + lcp_adj_kv_t kv; + + adj = adj_get (ai); + + if (IP_LOOKUP_NEXT_REWRITE != adj->lookup_next_index) + return; + + lcp_adj_mk_key_adj (adj, &kv.k); + kv.v = ai; + + BV (clib_bihash_add_del) (&lcp_adj_tbl, &kv.kv, 1); +} + +u8 * +format_lcp_adj_kvp (u8 *s, va_list *args) +{ + BVT (clib_bihash_kv) *kv = va_arg (*args, BVT (clib_bihash_kv) *); + CLIB_UNUSED (int verbose) = va_arg (*args, int); + lcp_adj_kv_t *akv = (lcp_adj_kv_t *) kv; + + s = format (s, " %U:%U\n %U", format_vnet_sw_if_index_name, + vnet_get_main (), akv->k.sw_if_index, format_hex_bytes, + akv->k.rewrite, 18, format_adj_nbr, akv->v, 4); + + return (s); +} + +static clib_error_t * +lcp_adj_show_cmd (vlib_main_t *vm, unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + u8 verbose = 0; + + if (unformat (input, "verbose")) + verbose = 1; + + vlib_cli_output (vm, "Linux-CP Adjs:\n%U", BV (format_bihash), &lcp_adj_tbl, + verbose); + + return 0; +} + +VLIB_CLI_COMMAND (lcp_itf_pair_show_cmd_node, static) = { + .path = "show lcp adj", + .function = lcp_adj_show_cmd, + .short_help = "show lcp adj", + .is_mp_safe = 1, +}; + +const adj_delegate_vft_t lcp_adj_vft = { + .adv_format = lcp_adj_delegate_format, + .adv_adj_deleted = lcp_adj_delegate_adj_deleted, + .adv_adj_modified = lcp_adj_delegate_adj_modified, + .adv_adj_created = lcp_adj_delegate_adj_created, +}; + +static clib_error_t * +lcp_adj_init (vlib_main_t *vm) +{ + adj_type = adj_delegate_register_new_type (&lcp_adj_vft); + + BV (clib_bihash_init) (&lcp_adj_tbl, "linux-cp ADJ table", 1024, 1 << 24); + BV (clib_bihash_set_kvp_format_fn) (&lcp_adj_tbl, format_lcp_adj_kvp); + + return (NULL); +} + +VLIB_INIT_FUNCTION (lcp_adj_init); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/linux-cp/lcp_adj.h b/src/plugins/linux-cp/lcp_adj.h new file mode 100644 index 00000000000..006d183461e --- /dev/null +++ b/src/plugins/linux-cp/lcp_adj.h @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2020 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __LCP_ADJ_DELEGATE_H__ +#define __LCP_ADJ_DELEGATE_H__ + +#include + +typedef struct lcp_adj_key_t_ +{ + u32 sw_if_index; + u8 rewrite[28]; +} lcp_adj_key_t; + +STATIC_ASSERT (sizeof (lcp_adj_key_t) == 32, "LCP ADJ Key size changed"); + +typedef struct lcp_adj_kv_t_ +{ + union + { + BVT (clib_bihash_kv) kv; + struct + { + lcp_adj_key_t k; + u64 v; + }; + }; +} lcp_adj_kv_t; + +STATIC_ASSERT (sizeof (lcp_adj_kv_t) == sizeof (BVT (clib_bihash_kv)), + "LCP ADJ Key size changed"); + +/** + * The table of adjacencies indexed by the rewrite string + */ +extern BVT (clib_bihash) lcp_adj_tbl; + +static_always_inline void +lcp_adj_mk_key (const u8 *rewrite, u8 len, u32 sw_if_index, lcp_adj_key_t *key) +{ + /* + * Construct the key from the provided rewrite, then pad with zeros + * to ensure the key does not have garbage bytes + */ + ASSERT (len <= sizeof (key->rewrite)); + clib_memcpy_fast (key->rewrite, rewrite, len); + clib_memset (key->rewrite + len, 0, sizeof (key->rewrite) - len); + key->sw_if_index = sw_if_index; +} + +static_always_inline adj_index_t +lcp_adj_lkup (const u8 *rewrite, u8 len, u32 sw_if_index) +{ + lcp_adj_kv_t kv; + + lcp_adj_mk_key (rewrite, len, sw_if_index, &kv.k); + + if (!BV (clib_bihash_search_inline) (&lcp_adj_tbl, &kv.kv)) + return (kv.v); + + return (ADJ_INDEX_INVALID); +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ + +#endif diff --git a/src/plugins/linux-cp/lcp_api.c b/src/plugins/linux-cp/lcp_api.c new file mode 100644 index 00000000000..409aa7224af --- /dev/null +++ b/src/plugins/linux-cp/lcp_api.c @@ -0,0 +1,232 @@ +/* + * Copyright 2020 Rubicon Communications, LLC. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include +#include + +#include +#include + +#include +#include +#include +#include + +#include +#include +#include + +static u16 lcp_msg_id_base; +#define REPLY_MSG_ID_BASE lcp_msg_id_base +#include + +static lip_host_type_t +api_decode_host_type (vl_api_lcp_itf_host_type_t type) +{ + if (type == LCP_API_ITF_HOST_TUN) + return LCP_ITF_HOST_TUN; + + return LCP_ITF_HOST_TAP; +} + +static vl_api_lcp_itf_host_type_t +api_encode_host_type (lip_host_type_t type) +{ + if (type == LCP_ITF_HOST_TUN) + return LCP_API_ITF_HOST_TUN; + + return LCP_API_ITF_HOST_TAP; +} + +void +lcp_set_auto_intf (u8 is_auto) +{ + lcp_main_t *lcpm = &lcp_main; + + lcpm->auto_intf = (is_auto != 0); +} + +int +lcp_auto_intf (void) +{ + lcp_main_t *lcpm = &lcp_main; + + return lcpm->auto_intf; +} + +static void +vl_api_lcp_itf_pair_add_del_t_handler (vl_api_lcp_itf_pair_add_del_t *mp) +{ + u32 phy_sw_if_index; + vl_api_lcp_itf_pair_add_del_reply_t *rmp; + lip_host_type_t lip_host_type; + int rv; + + if (!vnet_sw_if_index_is_api_valid (mp->sw_if_index)) + { + rv = VNET_API_ERROR_INVALID_SW_IF_INDEX; + goto bad_sw_if_index; + } + + phy_sw_if_index = mp->sw_if_index; + lip_host_type = api_decode_host_type (mp->host_if_type); + if (mp->is_add) + { + u8 *host_if_name, *netns; + int host_len, netns_len; + + host_if_name = netns = 0; + + /* lcp_itf_pair_create expects vec of u8 */ + host_len = clib_strnlen ((char *) mp->host_if_name, + sizeof (mp->host_if_name) - 1); + vec_add (host_if_name, mp->host_if_name, host_len); + vec_add1 (host_if_name, 0); + + netns_len = + clib_strnlen ((char *) mp->namespace, sizeof (mp->namespace) - 1); + vec_add (netns, mp->namespace, netns_len); + vec_add1 (netns, 0); + + rv = lcp_itf_pair_create (phy_sw_if_index, host_if_name, lip_host_type, + netns); + + vec_free (host_if_name); + vec_free (netns); + } + else + { + rv = lcp_itf_pair_delete (phy_sw_if_index); + } + + BAD_SW_IF_INDEX_LABEL; + REPLY_MACRO (VL_API_LCP_ITF_PAIR_ADD_DEL_REPLY); +} + +static void +send_lcp_itf_pair_details (index_t lipi, vl_api_registration_t *rp, + u32 context) +{ + vl_api_lcp_itf_pair_details_t *rmp; + lcp_itf_pair_t *lcp_pair = lcp_itf_pair_get (lipi); + + REPLY_MACRO_DETAILS4 ( + VL_API_LCP_ITF_PAIR_DETAILS, rp, context, ({ + rmp->phy_sw_if_index = lcp_pair->lip_phy_sw_if_index; + rmp->host_sw_if_index = lcp_pair->lip_host_sw_if_index; + rmp->vif_index = lcp_pair->lip_vif_index; + rmp->host_if_type = api_encode_host_type (lcp_pair->lip_host_type); + + clib_strncpy ((char *) rmp->host_if_name, + (char *) lcp_pair->lip_host_name, + vec_len (lcp_pair->lip_host_name) - 1); + + clib_strncpy ((char *) rmp->namespace, (char *) lcp_pair->lip_namespace, + vec_len (lcp_pair->lip_namespace)); + })); +} + +static void +vl_api_lcp_itf_pair_get_t_handler (vl_api_lcp_itf_pair_get_t *mp) +{ + vl_api_lcp_itf_pair_get_reply_t *rmp; + i32 rv = 0; + + REPLY_AND_DETAILS_MACRO ( + VL_API_LCP_ITF_PAIR_GET_REPLY, lcp_itf_pair_pool, + ({ send_lcp_itf_pair_details (cursor, rp, mp->context); })); +} + +static void +vl_api_lcp_default_ns_set_t_handler (vl_api_lcp_default_ns_set_t *mp) +{ + vl_api_lcp_default_ns_set_reply_t *rmp; + int rv; + + mp->namespace[LCP_NS_LEN - 1] = 0; + rv = lcp_set_default_ns (mp->namespace); + + REPLY_MACRO (VL_API_LCP_DEFAULT_NS_SET_REPLY); +} + +static void +vl_api_lcp_default_ns_get_t_handler (vl_api_lcp_default_ns_get_t *mp) +{ + lcp_main_t *lcpm = &lcp_main; + vl_api_lcp_default_ns_get_reply_t *rmp; + vl_api_registration_t *reg; + char *ns; + + reg = vl_api_client_index_to_registration (mp->client_index); + if (!reg) + return; + + rmp = vl_msg_api_alloc (sizeof (*rmp)); + clib_memset (rmp, 0, sizeof (*rmp)); + rmp->_vl_msg_id = (VL_API_LCP_DEFAULT_NS_GET_REPLY + lcpm->msg_id_base); + rmp->context = mp->context; + + ns = (char *) lcp_get_default_ns (); + if (ns) + clib_strncpy ((char *) rmp->namespace, ns, LCP_NS_LEN - 1); + + vl_api_send_msg (reg, (u8 *) rmp); +} + +static void +vl_api_lcp_itf_pair_replace_begin_t_handler ( + vl_api_lcp_itf_pair_replace_begin_t *mp) +{ + vl_api_lcp_itf_pair_replace_begin_reply_t *rmp; + int rv; + + rv = lcp_itf_pair_replace_begin (); + + REPLY_MACRO (VL_API_LCP_ITF_PAIR_REPLACE_BEGIN_REPLY); +} + +static void +vl_api_lcp_itf_pair_replace_end_t_handler ( + vl_api_lcp_itf_pair_replace_end_t *mp) +{ + vl_api_lcp_itf_pair_replace_end_reply_t *rmp; + int rv = 0; + + rv = lcp_itf_pair_replace_end (); + + REPLY_MACRO (VL_API_LCP_ITF_PAIR_REPLACE_END_REPLY); +} + +/* + * Set up the API message handling tables + */ +#include + +static clib_error_t * +lcp_plugin_api_hookup (vlib_main_t *vm) +{ + /* Ask for a correctly-sized block of API message decode slots */ + lcp_msg_id_base = setup_message_id_table (); + + return (NULL); +} + +VLIB_INIT_FUNCTION (lcp_plugin_api_hookup); + +#include +VLIB_PLUGIN_REGISTER () = { + .version = VPP_BUILD_VER, + .description = "Linux Control Plane - Interface Mirror", + .default_disabled = 1, +}; + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/linux-cp/lcp_cli.c b/src/plugins/linux-cp/lcp_cli.c new file mode 100644 index 00000000000..3b0598c1b66 --- /dev/null +++ b/src/plugins/linux-cp/lcp_cli.c @@ -0,0 +1,236 @@ +/* Hey Emacs use -*- mode: C -*- */ +/* + * Copyright 2020 Rubicon Communications, LLC. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include +#include + +#include +#include +#include +#include + +#include + +static clib_error_t * +lcp_itf_pair_create_command_fn (vlib_main_t *vm, unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + vnet_main_t *vnm = vnet_get_main (); + u32 sw_if_index; + u8 *host_if_name; + lip_host_type_t host_if_type; + u8 *ns; + int r; + + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + sw_if_index = ~0; + host_if_name = ns = NULL; + host_if_type = LCP_ITF_HOST_TAP; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "%d", &sw_if_index)) + ; + else if (unformat (line_input, "%U", unformat_vnet_sw_interface, vnm, + &sw_if_index)) + ; + else if (unformat (line_input, "host-if %s", &host_if_name)) + ; + else if (unformat (line_input, "netns %s", &ns)) + ; + else if (unformat (line_input, "tun")) + host_if_type = LCP_ITF_HOST_TUN; + else + { + unformat_free (line_input); + vec_free (host_if_name); + vec_free (ns); + return clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + } + } + + unformat_free (line_input); + + if (sw_if_index == ~0) + { + vec_free (host_if_name); + vec_free (ns); + return clib_error_return (0, "interface name or sw_if_index required"); + } + + if (vec_len (ns) >= LCP_NS_LEN) + { + vec_free (host_if_name); + vec_free (ns); + return clib_error_return ( + 0, "Namespace name should be fewer than %d characters", LCP_NS_LEN); + } + + r = lcp_itf_pair_create (sw_if_index, host_if_name, host_if_type, ns); + + vec_free (host_if_name); + vec_free (ns); + + if (r) + return clib_error_return (0, "linux-cp pair creation failed (%d)", r); + + return 0; +} + +VLIB_CLI_COMMAND (lcp_itf_pair_create_command, static) = { + .path = "lcp create", + .short_help = "lcp create | host-if " + "netns [tun]", + .function = lcp_itf_pair_create_command_fn, +}; + +static clib_error_t * +lcp_default_netns_command_fn (vlib_main_t *vm, unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + u8 *ns; + int r; + + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + ns = 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "netns %s", &ns)) + ; + else if (unformat (line_input, "clear netns")) + ; + } + + unformat_free (line_input); + + vlib_cli_output (vm, "lcp set default netns '%s'\n", (char *) ns); + + r = lcp_set_default_ns (ns); + + if (r) + return clib_error_return (0, "linux-cp set default netns failed (%d)", r); + + return 0; +} + +VLIB_CLI_COMMAND (lcp_default_netns_command, static) = { + .path = "lcp default", + .short_help = "lcp default netns []", + .function = lcp_default_netns_command_fn, +}; + +static clib_error_t * +lcp_itf_pair_delete_command_fn (vlib_main_t *vm, unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + vnet_main_t *vnm = vnet_get_main (); + unformat_input_t _line_input, *line_input = &_line_input; + u32 sw_if_index; + int r; + + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + sw_if_index = ~0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "%d", &sw_if_index)) + ; + else if (unformat (line_input, "%U", unformat_vnet_sw_interface, vnm, + &sw_if_index)) + ; + else + return clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + } + + unformat_free (line_input); + + if (sw_if_index == ~0) + return clib_error_return (0, "interface name or sw_if_index required"); + + r = lcp_itf_pair_delete (sw_if_index); + + if (r) + return clib_error_return (0, "linux-cp pair deletion failed (%d)", r); + return 0; +} + +VLIB_CLI_COMMAND (lcp_itf_pair_delete_command, static) = { + .path = "lcp delete", + .short_help = "lcp delete |", + .function = lcp_itf_pair_delete_command_fn, +}; + +static clib_error_t * +lcp_itf_pair_show_cmd (vlib_main_t *vm, unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + vnet_main_t *vnm = vnet_get_main (); + u32 phy_sw_if_index; + + phy_sw_if_index = ~0; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "phy %U", unformat_vnet_sw_interface, vnm, + &phy_sw_if_index)) + ; + else + return clib_error_return (0, "unknown input '%U'", + format_unformat_error, input); + } + + lcp_itf_pair_show (phy_sw_if_index); + + return 0; +} + +VLIB_CLI_COMMAND (lcp_itf_pair_show_cmd_node, static) = { + .path = "show lcp", + .function = lcp_itf_pair_show_cmd, + .short_help = "show lcp [phy ]", + .is_mp_safe = 1, +}; + +clib_error_t * +lcp_cli_init (vlib_main_t *vm) +{ + return 0; +} + +VLIB_INIT_FUNCTION (lcp_cli_init); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/linux-cp/lcp_interface.c b/src/plugins/linux-cp/lcp_interface.c new file mode 100644 index 00000000000..534d974c7b0 --- /dev/null +++ b/src/plugins/linux-cp/lcp_interface.c @@ -0,0 +1,1016 @@ +/* + * Copyright (c) 2020 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#define _GNU_SOURCE +#include +#include +#include +#include +#include + +#include +#include + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static vlib_log_class_t lcp_itf_pair_logger; + +/** + * Pool of LIP objects + */ +lcp_itf_pair_t *lcp_itf_pair_pool; + +u32 +lcp_itf_num_pairs (void) +{ + return pool_elts (lcp_itf_pair_pool); +} + +/** + * DBs of interface-pair objects: + * - key'd by VIF (linux ID) + * - key'd by VPP's physical interface + * - number of shared uses of VPP's tap/host interface + */ +static uword *lip_db_by_vif; +index_t *lip_db_by_phy; +u32 *lip_db_by_host; + +#define LCP_ITF_PAIR_DBG(...) \ + vlib_log_notice (lcp_itf_pair_logger, __VA_ARGS__); + +#define LCP_ITF_PAIR_INFO(...) \ + vlib_log_notice (lcp_itf_pair_logger, __VA_ARGS__); + +u8 * +format_lcp_itf_pair (u8 *s, va_list *args) +{ + vnet_main_t *vnm = vnet_get_main (); + lcp_itf_pair_t *lip = va_arg (*args, lcp_itf_pair_t *); + vnet_sw_interface_t *swif_phy; + vnet_sw_interface_t *swif_host; + + s = format (s, "itf-pair: [%d]", lip - lcp_itf_pair_pool); + + swif_phy = vnet_get_sw_interface_or_null (vnm, lip->lip_phy_sw_if_index); + if (!swif_phy) + s = format (s, " "); + else + s = format (s, " %U", format_vnet_sw_interface_name, vnm, swif_phy); + + swif_host = vnet_get_sw_interface_or_null (vnm, lip->lip_host_sw_if_index); + if (!swif_host) + s = format (s, " "); + else + s = format (s, " %U", format_vnet_sw_interface_name, vnm, swif_host); + + s = format (s, " %s %d type %s", lip->lip_host_name, lip->lip_vif_index, + (lip->lip_host_type == LCP_ITF_HOST_TAP) ? "tap" : "tun"); + + if (lip->lip_namespace) + s = format (s, " netns %s", lip->lip_namespace); + + return s; +} + +static walk_rc_t +lcp_itf_pair_walk_show_cb (index_t api, void *ctx) +{ + vlib_main_t *vm; + lcp_itf_pair_t *lip; + + lip = lcp_itf_pair_get (api); + if (!lip) + return WALK_STOP; + + vm = vlib_get_main (); + vlib_cli_output (vm, "%U\n", format_lcp_itf_pair, lip); + + return WALK_CONTINUE; +} + +void +lcp_itf_pair_show (u32 phy_sw_if_index) +{ + vlib_main_t *vm; + u8 *ns; + index_t api; + + vm = vlib_get_main (); + ns = lcp_get_default_ns (); + vlib_cli_output (vm, "lcp default netns '%s'\n", + ns ? (char *) ns : ""); + + if (phy_sw_if_index == ~0) + { + lcp_itf_pair_walk (lcp_itf_pair_walk_show_cb, 0); + } + else + { + api = lcp_itf_pair_find_by_phy (phy_sw_if_index); + if (api != INDEX_INVALID) + lcp_itf_pair_walk_show_cb (api, 0); + } +} + +lcp_itf_pair_t * +lcp_itf_pair_get (u32 index) +{ + return pool_elt_at_index (lcp_itf_pair_pool, index); +} + +index_t +lcp_itf_pair_find_by_vif (u32 vif_index) +{ + uword *p; + + p = hash_get (lip_db_by_vif, vif_index); + + if (p) + return p[0]; + + return INDEX_INVALID; +} + +int +lcp_itf_pair_add_sub (u32 vif, u8 *host_if_name, u32 sub_sw_if_index, + u32 phy_sw_if_index, u8 *ns) +{ + lcp_itf_pair_t *lip; + + lip = lcp_itf_pair_get (lcp_itf_pair_find_by_phy (phy_sw_if_index)); + + return lcp_itf_pair_add (lip->lip_host_sw_if_index, sub_sw_if_index, + host_if_name, vif, lip->lip_host_type, ns); +} + +const char *lcp_itf_l3_feat_names[N_LCP_ITF_HOST][N_AF] = { + [LCP_ITF_HOST_TAP] = { + [AF_IP4] = "linux-cp-xc-ip4", + [AF_IP6] = "linux-cp-xc-ip6", + }, + [LCP_ITF_HOST_TUN] = { + [AF_IP4] = "linux-cp-xc-l3-ip4", + [AF_IP6] = "linux-cp-xc-l3-ip6", + }, +}; + +const fib_route_path_flags_t lcp_itf_route_path_flags[N_LCP_ITF_HOST] = { + [LCP_ITF_HOST_TAP] = FIB_ROUTE_PATH_DVR, + [LCP_ITF_HOST_TUN] = FIB_ROUTE_PATH_FLAG_NONE, +}; + +static void +lcp_itf_unset_adjs (lcp_itf_pair_t *lip) +{ + adj_unlock (lip->lip_phy_adjs.adj_index[AF_IP4]); + adj_unlock (lip->lip_phy_adjs.adj_index[AF_IP6]); +} + +static void +lcp_itf_set_adjs (lcp_itf_pair_t *lip) +{ + if (lip->lip_host_type == LCP_ITF_HOST_TUN) + { + lip->lip_phy_adjs.adj_index[AF_IP4] = adj_nbr_add_or_lock ( + FIB_PROTOCOL_IP4, VNET_LINK_IP4, &zero_addr, lip->lip_phy_sw_if_index); + lip->lip_phy_adjs.adj_index[AF_IP6] = adj_nbr_add_or_lock ( + FIB_PROTOCOL_IP6, VNET_LINK_IP6, &zero_addr, lip->lip_phy_sw_if_index); + } + else + { + lip->lip_phy_adjs.adj_index[AF_IP4] = adj_mcast_add_or_lock ( + FIB_PROTOCOL_IP4, VNET_LINK_IP4, lip->lip_phy_sw_if_index); + lip->lip_phy_adjs.adj_index[AF_IP6] = adj_mcast_add_or_lock ( + FIB_PROTOCOL_IP6, VNET_LINK_IP6, lip->lip_phy_sw_if_index); + } + + ip_adjacency_t *adj; + + adj = adj_get (lip->lip_phy_adjs.adj_index[AF_IP4]); + + lip->lip_rewrite_len = adj->rewrite_header.data_bytes; +} + +int __clib_weak +lcp_nl_drain_messages (void) +{ + return 0; +} + +int +lcp_itf_pair_add (u32 host_sw_if_index, u32 phy_sw_if_index, u8 *host_name, + u32 host_index, lip_host_type_t host_type, u8 *ns) +{ + index_t lipi; + lcp_itf_pair_t *lip; + + lipi = lcp_itf_pair_find_by_phy (phy_sw_if_index); + + LCP_ITF_PAIR_INFO ("add: host:%U phy:%U, host_if:%v vif:%d ns:%v", + format_vnet_sw_if_index_name, vnet_get_main (), + host_sw_if_index, format_vnet_sw_if_index_name, + vnet_get_main (), phy_sw_if_index, host_name, host_index, + ns); + + if (lipi != INDEX_INVALID) + return VNET_API_ERROR_VALUE_EXIST; + + /* + * Drain netlink messages before adding the new pair. + * This avoids unnecessarily applying messages that were generated by + * the creation of the tap/tun interface. By processing them before we + * store the pair data, we will ensure that they are ignored. + */ + lcp_nl_drain_messages (); + + /* + * Create a new pair. + */ + pool_get (lcp_itf_pair_pool, lip); + + lipi = lip - lcp_itf_pair_pool; + + vec_validate_init_empty (lip_db_by_phy, phy_sw_if_index, INDEX_INVALID); + vec_validate_init_empty (lip_db_by_host, host_sw_if_index, INDEX_INVALID); + lip_db_by_phy[phy_sw_if_index] = lipi; + lip_db_by_host[host_sw_if_index] = lipi; + hash_set (lip_db_by_vif, host_index, lipi); + + lip->lip_host_sw_if_index = host_sw_if_index; + lip->lip_phy_sw_if_index = phy_sw_if_index; + lip->lip_host_name = vec_dup (host_name); + lip->lip_host_type = host_type; + lip->lip_vif_index = host_index; + lip->lip_namespace = vec_dup (ns); + lip->lip_create_ts = vlib_time_now (vlib_get_main ()); + + if (lip->lip_host_sw_if_index == ~0) + return 0; + + /* + * First use of this host interface. + * Enable the x-connect feature on the host to send + * all packets to the phy. + */ + ip_address_family_t af; + + FOR_EACH_IP_ADDRESS_FAMILY (af) + ip_feature_enable_disable (af, N_SAFI, IP_FEATURE_INPUT, + lcp_itf_l3_feat_names[lip->lip_host_type][af], + lip->lip_host_sw_if_index, 1, NULL, 0); + + /* + * Configure passive punt to the host interface. + */ + fib_route_path_t *rpaths = NULL, rpath = { + .frp_flags = lcp_itf_route_path_flags[lip->lip_host_type], + .frp_proto = DPO_PROTO_IP4, + .frp_sw_if_index = lip->lip_host_sw_if_index, + .frp_weight = 1, + .frp_fib_index = ~0, + }; + + vec_add1 (rpaths, rpath); + + ip4_punt_redirect_add_paths (lip->lip_phy_sw_if_index, rpaths); + + rpaths[0].frp_proto = DPO_PROTO_IP6; + + ip6_punt_redirect_add_paths (lip->lip_phy_sw_if_index, rpaths); + + vec_free (rpaths); + + lcp_itf_set_adjs (lip); + + /* enable ARP feature node for broadcast interfaces */ + if (lip->lip_host_type != LCP_ITF_HOST_TUN) + { + vnet_feature_enable_disable ("arp", "linux-cp-arp-phy", + lip->lip_phy_sw_if_index, 1, NULL, 0); + vnet_feature_enable_disable ("arp", "linux-cp-arp-host", + lip->lip_host_sw_if_index, 1, NULL, 0); + } + else + { + vnet_feature_enable_disable ("ip4-punt", "linux-cp-punt-l3", 0, 1, NULL, + 0); + vnet_feature_enable_disable ("ip6-punt", "linux-cp-punt-l3", 0, 1, NULL, + 0); + } + + return 0; +} + +static clib_error_t * +lcp_netlink_add_link_vlan (int parent, u32 vlan, const char *name) +{ + struct rtnl_link *link; + struct nl_sock *sk; + int err; + + sk = nl_socket_alloc (); + if ((err = nl_connect (sk, NETLINK_ROUTE)) < 0) + return clib_error_return (NULL, "Unable to connect socket: %d", err); + + link = rtnl_link_vlan_alloc (); + + rtnl_link_set_link (link, parent); + rtnl_link_set_name (link, name); + + rtnl_link_vlan_set_id (link, vlan); + + if ((err = rtnl_link_add (sk, link, NLM_F_CREATE)) < 0) + return clib_error_return (NULL, "Unable to add link %s: %d", name, err); + + rtnl_link_put (link); + nl_close (sk); + + return NULL; +} + +static clib_error_t * +lcp_netlink_del_link (const char *name) +{ + struct rtnl_link *link; + struct nl_sock *sk; + int err; + + sk = nl_socket_alloc (); + if ((err = nl_connect (sk, NETLINK_ROUTE)) < 0) + return clib_error_return (NULL, "Unable to connect socket: %d", err); + + link = rtnl_link_alloc (); + rtnl_link_set_name (link, name); + + if ((err = rtnl_link_delete (sk, link)) < 0) + return clib_error_return (NULL, "Unable to del link %s: %d", name, err); + + rtnl_link_put (link); + nl_close (sk); + + return NULL; +} + +int +lcp_itf_pair_del (u32 phy_sw_if_index) +{ + ip_address_family_t af; + lcp_itf_pair_t *lip; + u32 lipi; + + lipi = lcp_itf_pair_find_by_phy (phy_sw_if_index); + + if (lipi == INDEX_INVALID) + return VNET_API_ERROR_INVALID_SW_IF_INDEX; + + lip = lcp_itf_pair_get (lipi); + + LCP_ITF_PAIR_INFO ("pair delete: {%U, %U, %s}", format_vnet_sw_if_index_name, + vnet_get_main (), lip->lip_phy_sw_if_index, + format_vnet_sw_if_index_name, vnet_get_main (), + lip->lip_host_sw_if_index, lip->lip_host_name); + + FOR_EACH_IP_ADDRESS_FAMILY (af) + ip_feature_enable_disable (af, N_SAFI, IP_FEATURE_INPUT, + lcp_itf_l3_feat_names[lip->lip_host_type][af], + lip->lip_host_sw_if_index, 0, NULL, 0); + + lcp_itf_unset_adjs (lip); + + ip4_punt_redirect_del (lip->lip_phy_sw_if_index); + ip6_punt_redirect_del (lip->lip_phy_sw_if_index); + + /* disable ARP feature node for broadcast interfaces */ + if (lip->lip_host_type != LCP_ITF_HOST_TUN) + { + vnet_feature_enable_disable ("arp", "linux-cp-arp-phy", + lip->lip_phy_sw_if_index, 0, NULL, 0); + vnet_feature_enable_disable ("arp", "linux-cp-arp-host", + lip->lip_host_sw_if_index, 0, NULL, 0); + } + else + { + vnet_feature_enable_disable ("ip4-punt", "linux-cp-punt-l3", 0, 0, NULL, + 0); + vnet_feature_enable_disable ("ip6-punt", "linux-cp-punt-l3", 0, 0, NULL, + 0); + } + + lip_db_by_phy[phy_sw_if_index] = INDEX_INVALID; + lip_db_by_phy[lip->lip_host_sw_if_index] = INDEX_INVALID; + + vec_free (lip->lip_host_name); + vec_free (lip->lip_namespace); + pool_put (lcp_itf_pair_pool, lip); + + return 0; +} + +static void +lcp_itf_pair_delete_by_index (index_t lipi) +{ + u32 host_sw_if_index; + lcp_itf_pair_t *lip; + u8 *host_name; + + lip = lcp_itf_pair_get (lipi); + + host_name = vec_dup (lip->lip_host_name); + host_sw_if_index = lip->lip_host_sw_if_index; + + lcp_itf_pair_del (lip->lip_phy_sw_if_index); + + if (vnet_sw_interface_is_sub (vnet_get_main (), host_sw_if_index)) + { + lcp_netlink_del_link ((const char *) host_name); + vnet_delete_sub_interface (host_sw_if_index); + } + else + tap_delete_if (vlib_get_main (), host_sw_if_index); + + vec_free (host_name); +} + +int +lcp_itf_pair_delete (u32 phy_sw_if_index) +{ + index_t lipi; + + lipi = lcp_itf_pair_find_by_phy (phy_sw_if_index); + + if (lipi == INDEX_INVALID) + return VNET_API_ERROR_INVALID_SW_IF_INDEX; + + lcp_itf_pair_delete_by_index (lipi); + + return 0; +} + +void +lcp_itf_pair_walk (lcp_itf_pair_walk_cb_t cb, void *ctx) +{ + u32 api; + + pool_foreach_index (api, lcp_itf_pair_pool) + { + if (!cb (api, ctx)) + break; + }; +} + +typedef struct lcp_itf_pair_names_t_ +{ + u8 *lipn_host_name; + u8 *lipn_phy_name; + u8 *lipn_namespace; + u32 lipn_phy_sw_if_index; +} lcp_itf_pair_names_t; + +static lcp_itf_pair_names_t *lipn_names; + +static clib_error_t * +lcp_itf_pair_config (vlib_main_t *vm, unformat_input_t *input) +{ + u8 *host, *phy; + u8 *ns; + u8 *default_ns; + + host = phy = ns = default_ns = NULL; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + vec_reset_length (host); + + if (unformat (input, "pair %s %s %s", &phy, &host, &ns)) + { + lcp_itf_pair_names_t *lipn; + + if (vec_len (ns) > LCP_NS_LEN) + { + return clib_error_return (0, + "linux-cp IF namespace must" + " be less than %d characters", + LCP_NS_LEN); + } + + vec_add2 (lipn_names, lipn, 1); + + lipn->lipn_host_name = vec_dup (host); + lipn->lipn_phy_name = vec_dup (phy); + lipn->lipn_namespace = vec_dup (ns); + } + else if (unformat (input, "pair %v %v", &phy, &host)) + { + lcp_itf_pair_names_t *lipn; + + vec_add2 (lipn_names, lipn, 1); + + lipn->lipn_host_name = vec_dup (host); + lipn->lipn_phy_name = vec_dup (phy); + lipn->lipn_namespace = 0; + } + else if (unformat (input, "default netns %v", &default_ns)) + { + vec_add1 (default_ns, 0); + if (lcp_set_default_ns (default_ns) < 0) + { + return clib_error_return (0, + "linux-cp default namespace must" + " be less than %d characters", + LCP_NS_LEN); + } + } + else if (unformat (input, "interface-auto-create")) + lcp_set_auto_intf (1 /* is_auto */); + else + return clib_error_return (0, "interfaces not found"); + } + + vec_free (host); + vec_free (phy); + vec_free (default_ns); + + return NULL; +} + +VLIB_EARLY_CONFIG_FUNCTION (lcp_itf_pair_config, "linux-cp"); + +/* + * Returns 1 if the tap name is valid. + * Returns 0 if the tap name is invalid. + */ +static int +lcp_validate_if_name (u8 *name) +{ + int len; + char *p; + + p = (char *) name; + len = clib_strnlen (p, IFNAMSIZ); + if (len >= IFNAMSIZ) + return 0; + + for (; *p; ++p) + { + if (isalnum (*p)) + continue; + + switch (*p) + { + case '-': + case '_': + case '%': + case '@': + case ':': + case '.': + continue; + } + + return 0; + } + + return 1; +} + +static int +lcp_itf_get_ns_fd (char *ns_name) +{ + char ns_path[256] = "/proc/self/ns/net"; + + if (ns_name) + snprintf (ns_path, sizeof (ns_path) - 1, "/var/run/netns/%s", ns_name); + + return open (ns_path, O_RDONLY); +} + +static void +lcp_itf_set_vif_link_state (u32 vif_index, u8 up, u8 *ns) +{ + int curr_ns_fd, vif_ns_fd; + + curr_ns_fd = vif_ns_fd = -1; + + if (ns) + { + u8 *ns_path = 0; + + curr_ns_fd = open ("/proc/self/ns/net", O_RDONLY); + ns_path = format (0, "/var/run/netns/%s%c", (char *) ns, 0); + vif_ns_fd = open ((char *) ns_path, O_RDONLY); + if (vif_ns_fd != -1) + setns (vif_ns_fd, CLONE_NEWNET); + } + + vnet_netlink_set_link_state (vif_index, up); + + if (vif_ns_fd != -1) + close (vif_ns_fd); + + if (curr_ns_fd != -1) + { + setns (curr_ns_fd, CLONE_NEWNET); + close (curr_ns_fd); + } +} + +int +lcp_itf_pair_create (u32 phy_sw_if_index, u8 *host_if_name, + lip_host_type_t host_if_type, u8 *ns) +{ + vlib_main_t *vm; + vnet_main_t *vnm; + u32 vif_index = 0, host_sw_if_index; + const vnet_sw_interface_t *sw; + const vnet_hw_interface_t *hw; + + if (!vnet_sw_if_index_is_api_valid (phy_sw_if_index)) + return VNET_API_ERROR_INVALID_SW_IF_INDEX; + + if (!lcp_validate_if_name (host_if_name)) + return VNET_API_ERROR_INVALID_ARGUMENT; + + vnm = vnet_get_main (); + sw = vnet_get_sw_interface (vnm, phy_sw_if_index); + hw = vnet_get_sup_hw_interface (vnm, phy_sw_if_index); + + /* + * Use interface-specific netns if supplied. + * Otherwise, use default netns if defined. + * Otherwise ignore a netns and use the OS default. + */ + if (ns == 0 || ns[0] == 0) + ns = lcp_get_default_ns (); + + /* sub interfaces do not need a tap created */ + if (vnet_sw_interface_is_sub (vnm, phy_sw_if_index)) + { + const lcp_itf_pair_t *lip; + int orig_ns_fd, ns_fd; + clib_error_t *err; + u16 vlan; + + /* + * Find the parent tap by finding the pair from the parent phy + */ + lip = lcp_itf_pair_get (lcp_itf_pair_find_by_phy (sw->sup_sw_if_index)); + vlan = sw->sub.eth.outer_vlan_id; + + /* + * see if the requested host interface has already been created + */ + orig_ns_fd = ns_fd = -1; + err = NULL; + + if (ns && ns[0] != 0) + { + orig_ns_fd = lcp_itf_get_ns_fd (NULL); + ns_fd = lcp_itf_get_ns_fd ((char *) ns); + if (orig_ns_fd == -1 || ns_fd == -1) + goto socket_close; + + setns (ns_fd, CLONE_NEWNET); + } + + vif_index = if_nametoindex ((const char *) host_if_name); + + if (!vif_index) + { + /* + * no existing host interface, create it now + */ + err = lcp_netlink_add_link_vlan (lip->lip_vif_index, vlan, + (const char *) host_if_name); + + if (!err && -1 != ns_fd) + err = vnet_netlink_set_link_netns (vif_index, ns_fd, NULL); + + if (!err) + vif_index = if_nametoindex ((char *) host_if_name); + } + + /* + * create a sub-interface on the tap + */ + if (!err && vnet_create_sub_interface (lip->lip_host_sw_if_index, + sw->sub.id, sw->sub.eth.raw_flags, + sw->sub.eth.inner_vlan_id, vlan, + &host_sw_if_index)) + LCP_ITF_PAIR_INFO ("failed create vlan: %d on %U", vlan, + format_vnet_sw_if_index_name, vnet_get_main (), + lip->lip_host_sw_if_index); + + socket_close: + if (orig_ns_fd != -1) + { + setns (orig_ns_fd, CLONE_NEWNET); + close (orig_ns_fd); + } + if (ns_fd != -1) + close (ns_fd); + + if (err) + return VNET_API_ERROR_INVALID_ARGUMENT; + } + else + { + tap_create_if_args_t args = { + .num_rx_queues = clib_max (1, vlib_num_workers ()), + .id = hw->hw_if_index, + .sw_if_index = ~0, + .rx_ring_sz = 256, + .tx_ring_sz = 256, + .host_if_name = host_if_name, + .host_namespace = 0, + }; + ethernet_interface_t *ei; + + if (host_if_type == LCP_ITF_HOST_TUN) + args.tap_flags |= TAP_FLAG_TUN; + else + { + ei = pool_elt_at_index (ethernet_main.interfaces, hw->hw_instance); + mac_address_copy (&args.host_mac_addr, &ei->address.mac); + } + + if (sw->mtu[VNET_MTU_L3]) + { + args.host_mtu_set = 1; + args.host_mtu_size = sw->mtu[VNET_MTU_L3]; + } + + if (ns && ns[0] != 0) + args.host_namespace = ns; + + vm = vlib_get_main (); + tap_create_if (vm, &args); + + if (args.rv < 0) + { + return args.rv; + } + + /* + * get the hw and ethernet of the tap + */ + hw = vnet_get_sup_hw_interface (vnm, args.sw_if_index); + + /* + * Set the interface down on the host side. + * This controls whether the host can RX/TX. + */ + virtio_main_t *mm = &virtio_main; + virtio_if_t *vif = pool_elt_at_index (mm->interfaces, hw->dev_instance); + + lcp_itf_set_vif_link_state (vif->ifindex, 0 /* down */, + args.host_namespace); + + /* + * Leave the TAP permanently up on the VPP side. + * This TAP will be shared by many sub-interface. + * Therefore we can't use it to manage admin state. + * force the tap in promiscuous mode. + */ + if (host_if_type == LCP_ITF_HOST_TAP) + { + ei = pool_elt_at_index (ethernet_main.interfaces, hw->hw_instance); + ei->flags |= ETHERNET_INTERFACE_FLAG_STATUS_L3; + } + + vif_index = vif->ifindex; + host_sw_if_index = args.sw_if_index; + } + + if (!vif_index) + { + LCP_ITF_PAIR_INFO ("failed pair add (no vif index): {%U, %U, %s}", + format_vnet_sw_if_index_name, vnet_get_main (), + phy_sw_if_index, format_vnet_sw_if_index_name, + vnet_get_main (), host_sw_if_index, host_if_name); + return -1; + } + + vnet_sw_interface_admin_up (vnm, host_sw_if_index); + lcp_itf_pair_add (host_sw_if_index, phy_sw_if_index, host_if_name, vif_index, + host_if_type, ns); + + LCP_ITF_PAIR_INFO ("pair create: {%U, %U, %s}", format_vnet_sw_if_index_name, + vnet_get_main (), phy_sw_if_index, + format_vnet_sw_if_index_name, vnet_get_main (), + host_sw_if_index, host_if_name); + + return 0; +} + +static walk_rc_t +lcp_itf_pair_walk_mark (index_t lipi, void *ctx) +{ + lcp_itf_pair_t *lip; + + lip = lcp_itf_pair_get (lipi); + + lip->lip_flags |= LIP_FLAG_STALE; + + return (WALK_CONTINUE); +} + +int +lcp_itf_pair_replace_begin (void) +{ + lcp_itf_pair_walk (lcp_itf_pair_walk_mark, NULL); + + return (0); +} + +typedef struct lcp_itf_pair_sweep_ctx_t_ +{ + index_t *indicies; +} lcp_itf_pair_sweep_ctx_t; + +static walk_rc_t +lcp_itf_pair_walk_sweep (index_t lipi, void *arg) +{ + lcp_itf_pair_sweep_ctx_t *ctx = arg; + lcp_itf_pair_t *lip; + + lip = lcp_itf_pair_get (lipi); + + if (lip->lip_flags & LIP_FLAG_STALE) + vec_add1 (ctx->indicies, lipi); + + return (WALK_CONTINUE); +} + +int +lcp_itf_pair_replace_end (void) +{ + lcp_itf_pair_sweep_ctx_t ctx = { + .indicies = NULL, + }; + index_t *lipi; + + lcp_itf_pair_walk (lcp_itf_pair_walk_sweep, &ctx); + + vec_foreach (lipi, ctx.indicies) + lcp_itf_pair_delete_by_index (*lipi); + + vec_free (ctx.indicies); + return (0); +} + +static uword +lcp_itf_pair_process (vlib_main_t *vm, vlib_node_runtime_t *rt, + vlib_frame_t *f) +{ + uword *event_data = 0; + uword *lipn_index; + + while (1) + { + vlib_process_wait_for_event (vm); + + vlib_process_get_events (vm, &event_data); + + vec_foreach (lipn_index, event_data) + { + lcp_itf_pair_names_t *lipn; + + lipn = &lipn_names[*lipn_index]; + lcp_itf_pair_create (lipn->lipn_phy_sw_if_index, + lipn->lipn_host_name, LCP_ITF_HOST_TAP, + lipn->lipn_namespace); + } + + vec_reset_length (event_data); + } + + return 0; +} + +VLIB_REGISTER_NODE (lcp_itf_pair_process_node, static) = { + .function = lcp_itf_pair_process, + .name = "linux-cp-itf-process", + .type = VLIB_NODE_TYPE_PROCESS, +}; + +static clib_error_t * +lcp_itf_phy_add (vnet_main_t *vnm, u32 sw_if_index, u32 is_create) +{ + lcp_itf_pair_names_t *lipn; + vlib_main_t *vm = vlib_get_main (); + vnet_hw_interface_t *hw; + + if (!is_create || vnet_sw_interface_is_sub (vnm, sw_if_index)) + return NULL; + + hw = vnet_get_sup_hw_interface (vnm, sw_if_index); + + vec_foreach (lipn, lipn_names) + { + if (!vec_cmp (hw->name, lipn->lipn_phy_name)) + { + lipn->lipn_phy_sw_if_index = sw_if_index; + + vlib_process_signal_event (vm, lcp_itf_pair_process_node.index, 0, + lipn - lipn_names); + break; + } + } + + return NULL; +} + +VNET_SW_INTERFACE_ADD_DEL_FUNCTION (lcp_itf_phy_add); + +static clib_error_t * +lcp_itf_pair_link_up_down (vnet_main_t *vnm, u32 hw_if_index, u32 flags) +{ + vnet_hw_interface_t *hi; + vnet_sw_interface_t *si; + index_t lipi; + lcp_itf_pair_t *lip; + + hi = vnet_get_hw_interface_or_null (vnm, hw_if_index); + if (!hi) + return 0; + + lipi = lcp_itf_pair_find_by_phy (hi->sw_if_index); + if (lipi == INDEX_INVALID) + return 0; + + lip = lcp_itf_pair_get (lipi); + si = vnet_get_sw_interface_or_null (vnm, lip->lip_host_sw_if_index); + if (!si) + return 0; + + if (!lcp_main.test_mode) + { + tap_set_carrier (si->hw_if_index, + (flags & VNET_HW_INTERFACE_FLAG_LINK_UP)); + + if (flags & VNET_HW_INTERFACE_FLAG_LINK_UP) + { + tap_set_speed (si->hw_if_index, hi->link_speed / 1000); + } + } + + return 0; +} + +VNET_HW_INTERFACE_LINK_UP_DOWN_FUNCTION (lcp_itf_pair_link_up_down); + +static clib_error_t * +lcp_itf_pair_init (vlib_main_t *vm) +{ + vlib_punt_hdl_t punt_hdl = vlib_punt_client_register ("linux-cp"); + + /* punt IKE */ + vlib_punt_register (punt_hdl, ipsec_punt_reason[IPSEC_PUNT_IP4_SPI_UDP_0], + "linux-cp-punt"); + + /* punt all unknown ports */ + udp_punt_unknown (vm, 0, 1); + udp_punt_unknown (vm, 1, 1); + tcp_punt_unknown (vm, 0, 1); + tcp_punt_unknown (vm, 1, 1); + + lcp_itf_pair_logger = vlib_log_register_class ("linux-cp", "itf"); + + return NULL; +} + +VLIB_INIT_FUNCTION (lcp_itf_pair_init) = { + .runs_after = VLIB_INITS ("vnet_interface_init", "tcp_init", "udp_init"), +}; + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/linux-cp/lcp_interface.h b/src/plugins/linux-cp/lcp_interface.h new file mode 100644 index 00000000000..d2f19e8481b --- /dev/null +++ b/src/plugins/linux-cp/lcp_interface.h @@ -0,0 +1,160 @@ +/* + * Copyright (c) 2020 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __LCP_ITF_PAIR_H__ +#define __LCP_ITF_PAIR_H__ + +#include +#include +#include + +#include + +#define foreach_lcp_itf_pair_flag _ (STALE, 0, "stale") + +typedef enum lip_flag_t_ +{ +#define _(a, b, c) LIP_FLAG_##a = (1 << b), + foreach_lcp_itf_pair_flag +#undef _ +} lip_flag_t; + +typedef enum +{ + LCP_ITF_HOST_TAP = 0, + LCP_ITF_HOST_TUN = 1, +} lip_host_type_t; + +#define N_LCP_ITF_HOST (LCP_ITF_HOST_TUN + 1) + +typedef struct lcp_itf_phy_adj +{ + adj_index_t adj_index[N_AF]; +} lcp_itf_phy_adj_t; + +/** + * A pair of interfaces + */ +typedef struct lcp_itf_pair_t_ +{ + u32 lip_host_sw_if_index; /* VPP's sw_if_index for the host tap */ + u32 lip_phy_sw_if_index; /* VPP's sw_if_index for the phy */ + u8 *lip_host_name; /* linux's name for the tap */ + u32 lip_vif_index; /* linux's index for the tap */ + u8 *lip_namespace; /* namespace in which the tap lives */ + lip_host_type_t lip_host_type; /* type of host interface */ + lcp_itf_phy_adj_t lip_phy_adjs; /* adjacencies for phy l3 interface */ + lip_flag_t lip_flags; /* Flags */ + u8 lip_rewrite_len; /* The length of an L2 MAC rewrite */ + f64 lip_create_ts; /* Timestamp of creation */ +} lcp_itf_pair_t; +extern lcp_itf_pair_t *lcp_itf_pair_pool; + +extern vlib_node_registration_t lcp_ethernet_node; + +u8 *format_lcp_itf_pair (u8 *s, va_list *args); +void lcp_itf_pair_show (u32 phy_sw_if_index); +u32 lcp_itf_num_pairs (void); + +/** + * Get an interface-pair object from its VPP index + */ +extern lcp_itf_pair_t *lcp_itf_pair_get (index_t index); + +/** + * Find a interface-pair object from the host interface + * + * @param host_sw_if_index host interface + * @return VPP's object index + */ +extern index_t lcp_itf_pair_find_by_vif (u32 vif_index); + +/** + * Create an interface-pair + * + * @return error code + */ +extern int lcp_itf_pair_add (u32 host_sw_if_index, u32 phy_sw_if_index, + u8 *host_name, u32 host_index, + lip_host_type_t host_type, u8 *ns); +extern int lcp_itf_pair_add_sub (u32 vif, u8 *host_name, u32 sub_sw_if_index, + u32 phy_sw_if_index, u8 *ns); +extern int lcp_itf_pair_del (u32 phy_sw_if_index); + +/** + * Create an interface-pair from PHY sw_if_index and tap name. + * + * @return error code + */ +extern int lcp_itf_pair_create (u32 phy_sw_if_index, u8 *host_if_name, + lip_host_type_t host_if_type, u8 *ns); + +/** + * Delete a LCP_ITF_PAIR + */ +extern int lcp_itf_pair_delete (u32 phy_sw_if_index); + +/** + * Callback function invoked during a walk of all interface-pairs + */ +typedef walk_rc_t (*lcp_itf_pair_walk_cb_t) (index_t index, void *ctx); + +/** + * Walk/visit each of the interface pairs + */ +extern void lcp_itf_pair_walk (lcp_itf_pair_walk_cb_t cb, void *ctx); + +/** + * Begin and End the replace process + */ +extern int lcp_itf_pair_replace_begin (void); +extern int lcp_itf_pair_replace_end (void); + +/** + * Retreive the pair in the DP + */ +extern index_t *lip_db_by_phy; +extern u32 *lip_db_by_host; + +always_inline index_t +lcp_itf_pair_find_by_phy (u32 phy_sw_if_index) +{ + if (phy_sw_if_index >= vec_len (lip_db_by_phy)) + return INDEX_INVALID; + return (lip_db_by_phy[phy_sw_if_index]); +} + +always_inline index_t +lcp_itf_pair_find_by_host (u32 host_sw_if_index) +{ + if (host_sw_if_index >= vec_len (lip_db_by_host)) + return INDEX_INVALID; + return (lip_db_by_host[host_sw_if_index]); +} + +/** + * manage interface auto creation + */ +void lcp_set_auto_intf (u8 is_auto); +int lcp_auto_intf (void); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ + +#endif diff --git a/src/plugins/linux-cp/lcp_node.c b/src/plugins/linux-cp/lcp_node.c new file mode 100644 index 00000000000..7f099ffe856 --- /dev/null +++ b/src/plugins/linux-cp/lcp_node.c @@ -0,0 +1,919 @@ +/* + * lcp_enthernet_node.c : linux control plane ethernet node + * + * Copyright (c) 2021 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define foreach_lip_punt \ + _ (IO, "punt to host") \ + _ (DROP, "unknown input interface") + +typedef enum +{ +#define _(sym, str) LIP_PUNT_NEXT_##sym, + foreach_lip_punt +#undef _ + LIP_PUNT_N_NEXT, +} lip_punt_next_t; + +typedef struct lip_punt_trace_t_ +{ + u32 phy_sw_if_index; + u32 host_sw_if_index; +} lip_punt_trace_t; + +/* packet trace format function */ +static u8 * +format_lip_punt_trace (u8 *s, va_list *args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + lip_punt_trace_t *t = va_arg (*args, lip_punt_trace_t *); + + s = + format (s, "lip-punt: %u -> %u", t->phy_sw_if_index, t->host_sw_if_index); + + return s; +} + +/** + * Pass punted packets from the PHY to the HOST. + */ +VLIB_NODE_FN (lip_punt_node) +(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) +{ + u32 n_left_from, *from, *to_next, n_left_to_next; + lip_punt_next_t next_index; + + next_index = node->cached_next_index; + n_left_from = frame->n_vectors; + from = vlib_frame_vector_args (frame); + + while (n_left_from > 0) + { + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + vlib_buffer_t *b0; + const lcp_itf_pair_t *lip0 = NULL; + u32 next0 = ~0; + u32 bi0, lipi0; + u32 sw_if_index0; + u8 len0; + + bi0 = to_next[0] = from[0]; + + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + next0 = LIP_PUNT_NEXT_DROP; + + b0 = vlib_get_buffer (vm, bi0); + + sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX]; + lipi0 = lcp_itf_pair_find_by_phy (sw_if_index0); + if (PREDICT_FALSE (lipi0 == INDEX_INVALID)) + goto trace0; + + lip0 = lcp_itf_pair_get (lipi0); + next0 = LIP_PUNT_NEXT_IO; + vnet_buffer (b0)->sw_if_index[VLIB_TX] = lip0->lip_host_sw_if_index; + + if (PREDICT_TRUE (lip0->lip_host_type == LCP_ITF_HOST_TAP)) + { + /* + * rewind to ethernet header + */ + len0 = ((u8 *) vlib_buffer_get_current (b0) - + (u8 *) ethernet_buffer_get_header (b0)); + vlib_buffer_advance (b0, -len0); + } + /* Tun packets don't need any special treatment, just need to + * be escorted past the TTL decrement. If we still want to use + * ip[46]-punt-redirect with these, we could just set the + * VNET_BUFFER_F_LOCALLY_ORIGINATED in an 'else {}' here and + * then pass to the next node on the ip[46]-punt feature arc + */ + + trace0: + if (PREDICT_FALSE ((b0->flags & VLIB_BUFFER_IS_TRACED))) + { + lip_punt_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t)); + t->phy_sw_if_index = sw_if_index0; + t->host_sw_if_index = + (lipi0 == INDEX_INVALID) ? ~0 : lip0->lip_host_sw_if_index; + } + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, + n_left_to_next, bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + return frame->n_vectors; +} + +VLIB_REGISTER_NODE (lip_punt_node) = { + .name = "linux-cp-punt", + .vector_size = sizeof (u32), + .format_trace = format_lip_punt_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_next_nodes = LIP_PUNT_N_NEXT, + .next_nodes = { + [LIP_PUNT_NEXT_DROP] = "error-drop", + [LIP_PUNT_NEXT_IO] = "interface-output", + }, +}; + +#define foreach_lcp_punt_l3 _ (DROP, "unknown error") + +typedef enum +{ +#define _(sym, str) LCP_LOCAL_NEXT_##sym, + foreach_lcp_punt_l3 +#undef _ + LCP_LOCAL_N_NEXT, +} lcp_punt_l3_next_t; + +typedef struct lcp_punt_l3_trace_t_ +{ + u32 phy_sw_if_index; +} lcp_punt_l3_trace_t; + +/* packet trace format function */ +static u8 * +format_lcp_punt_l3_trace (u8 *s, va_list *args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + lcp_punt_l3_trace_t *t = va_arg (*args, lcp_punt_l3_trace_t *); + + s = format (s, "linux-cp-punt-l3: %u", t->phy_sw_if_index); + + return s; +} + +VLIB_NODE_FN (lcp_punt_l3_node) +(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) +{ + u32 n_left_from, *from, *to_next, n_left_to_next; + lip_punt_next_t next_index; + + next_index = node->cached_next_index; + n_left_from = frame->n_vectors; + from = vlib_frame_vector_args (frame); + + while (n_left_from > 0) + { + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + vlib_buffer_t *b0; + u32 next0 = LCP_LOCAL_NEXT_DROP; + u32 bi0; + index_t lipi0; + lcp_itf_pair_t *lip0; + + bi0 = to_next[0] = from[0]; + + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + vnet_feature_next (&next0, b0); + + lipi0 = + lcp_itf_pair_find_by_phy (vnet_buffer (b0)->sw_if_index[VLIB_RX]); + if (lipi0 != INDEX_INVALID) + { + /* + * Avoid TTL check for packets which arrived on a tunnel and + * are being punted to the local host. + */ + lip0 = lcp_itf_pair_get (lipi0); + if (lip0->lip_host_type == LCP_ITF_HOST_TUN) + b0->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED; + } + + if (PREDICT_FALSE ((b0->flags & VLIB_BUFFER_IS_TRACED))) + { + lcp_punt_l3_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->phy_sw_if_index = vnet_buffer (b0)->sw_if_index[VLIB_RX]; + } + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, + n_left_to_next, bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + return frame->n_vectors; +} + +VLIB_REGISTER_NODE (lcp_punt_l3_node) = { + .name = "linux-cp-punt-l3", + .vector_size = sizeof (u32), + .format_trace = format_lcp_punt_l3_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_next_nodes = 1, + .next_nodes = { + [LCP_LOCAL_NEXT_DROP] = "error-drop", + }, +}; + +VNET_FEATURE_INIT (lcp_punt_l3_ip4, static) = { + .arc_name = "ip4-punt", + .node_name = "linux-cp-punt-l3", + .runs_before = VNET_FEATURES ("ip4-punt-redirect"), +}; + +VNET_FEATURE_INIT (lip_punt_l3_ip6, static) = { + .arc_name = "ip6-punt", + .node_name = "linux-cp-punt-l3", + .runs_before = VNET_FEATURES ("ip6-punt-redirect"), +}; + +#define foreach_lcp_xc \ + _ (DROP, "drop") \ + _ (XC_IP4, "x-connnect-ip4") \ + _ (XC_IP6, "x-connnect-ip6") + +typedef enum +{ +#define _(sym, str) LCP_XC_NEXT_##sym, + foreach_lcp_xc +#undef _ + LCP_XC_N_NEXT, +} lcp_xc_next_t; + +typedef struct lcp_xc_trace_t_ +{ + u32 phy_sw_if_index; + adj_index_t adj_index; +} lcp_xc_trace_t; + +/* packet trace format function */ +static u8 * +format_lcp_xc_trace (u8 *s, va_list *args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + lcp_xc_trace_t *t = va_arg (*args, lcp_xc_trace_t *); + + s = format (s, "lcp-xc: itf:%d adj:%d", t->phy_sw_if_index, t->adj_index); + + return s; +} + +/** + * X-connect all packets from the HOST to the PHY. + * + * This runs in either the IP4 or IP6 path. The MAC rewrite on the received + * packet from the host is used as a key to find the adjacency used on the phy. + * This allows this code to start the feature arc on that adjacency. + * Consequently, all packet sent from the host are also subject to output + * features, which is symmetric w.r.t. to input features. + */ +static_always_inline u32 +lcp_xc_inline (vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame, + ip_address_family_t af) +{ + u32 n_left_from, *from, *to_next, n_left_to_next; + lcp_xc_next_t next_index; + ip_lookup_main_t *lm; + + next_index = 0; + n_left_from = frame->n_vectors; + from = vlib_frame_vector_args (frame); + + if (AF_IP4 == af) + lm = &ip4_main.lookup_main; + else + lm = &ip6_main.lookup_main; + + while (n_left_from > 0) + { + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + const ethernet_header_t *eth; + const lcp_itf_pair_t *lip; + u32 next0, bi0, lipi, ai; + vlib_buffer_t *b0; + + bi0 = to_next[0] = from[0]; + + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + lipi = + lcp_itf_pair_find_by_host (vnet_buffer (b0)->sw_if_index[VLIB_RX]); + lip = lcp_itf_pair_get (lipi); + + vnet_buffer (b0)->sw_if_index[VLIB_TX] = lip->lip_phy_sw_if_index; + vlib_buffer_advance (b0, -lip->lip_rewrite_len); + eth = vlib_buffer_get_current (b0); + + if (ethernet_address_cast (eth->dst_address)) + ai = lip->lip_phy_adjs.adj_index[af]; + else + ai = lcp_adj_lkup ((u8 *) eth, lip->lip_rewrite_len, + vnet_buffer (b0)->sw_if_index[VLIB_TX]); + + if (ADJ_INDEX_INVALID != ai) + { + const ip_adjacency_t *adj; + + adj = adj_get (ai); + vnet_buffer (b0)->ip.adj_index[VLIB_TX] = ai; + next0 = adj->rewrite_header.next_index; + vnet_buffer (b0)->ip.save_rewrite_length = lip->lip_rewrite_len; + + if (PREDICT_FALSE (adj->rewrite_header.flags & + VNET_REWRITE_HAS_FEATURES)) + vnet_feature_arc_start_w_cfg_index ( + lm->output_feature_arc_index, + vnet_buffer (b0)->sw_if_index[VLIB_TX], &next0, b0, + adj->ia_cfg_index); + } + else + next0 = LCP_XC_NEXT_DROP; + + if (PREDICT_FALSE ((b0->flags & VLIB_BUFFER_IS_TRACED))) + { + lcp_xc_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t)); + t->phy_sw_if_index = lip->lip_phy_sw_if_index; + t->adj_index = vnet_buffer (b0)->ip.adj_index[VLIB_TX]; + } + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, + n_left_to_next, bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + return frame->n_vectors; +} + +VLIB_NODE_FN (lcp_xc_ip4) +(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) +{ + return (lcp_xc_inline (vm, node, frame, AF_IP4)); +} + +VLIB_NODE_FN (lcp_xc_ip6) +(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) +{ + return (lcp_xc_inline (vm, node, frame, AF_IP6)); +} + +VLIB_REGISTER_NODE (lcp_xc_ip4) = { .name = "linux-cp-xc-ip4", + .vector_size = sizeof (u32), + .format_trace = format_lcp_xc_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + .sibling_of = "ip4-rewrite" }; + +VNET_FEATURE_INIT (lcp_xc_ip4_ucast_node, static) = { + .arc_name = "ip4-unicast", + .node_name = "linux-cp-xc-ip4", +}; +VNET_FEATURE_INIT (lcp_xc_ip4_mcast_node, static) = { + .arc_name = "ip4-multicast", + .node_name = "linux-cp-xc-ip4", +}; + +VLIB_REGISTER_NODE (lcp_xc_ip6) = { .name = "linux-cp-xc-ip6", + .vector_size = sizeof (u32), + .format_trace = format_lcp_xc_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + .sibling_of = "ip6-rewrite" }; + +VNET_FEATURE_INIT (lcp_xc_ip6_ucast_node, static) = { + .arc_name = "ip6-unicast", + .node_name = "linux-cp-xc-ip6", +}; +VNET_FEATURE_INIT (lcp_xc_ip6_mcast_node, static) = { + .arc_name = "ip6-multicast", + .node_name = "linux-cp-xc-ip6", +}; + +typedef enum +{ + LCP_XC_L3_NEXT_XC, + LCP_XC_L3_N_NEXT, +} lcp_xc_l3_next_t; + +/** + * X-connect all packets from the HOST to the PHY on L3 interfaces + * + * There's only one adjacency that can be used on thises links. + */ +static_always_inline u32 +lcp_xc_l3_inline (vlib_main_t *vm, vlib_node_runtime_t *node, + vlib_frame_t *frame, ip_address_family_t af) +{ + u32 n_left_from, *from, *to_next, n_left_to_next; + lcp_xc_next_t next_index; + + next_index = 0; + n_left_from = frame->n_vectors; + from = vlib_frame_vector_args (frame); + + while (n_left_from > 0) + { + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + vlib_buffer_t *b0; + const lcp_itf_pair_t *lip; + u32 next0 = ~0; + u32 bi0, lipi; + + bi0 = to_next[0] = from[0]; + + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + /* Flag buffers as locally originated. Otherwise their TTL will + * be checked & decremented. That would break services like BGP + * which set a TTL of 1 by default. + */ + b0->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED; + + lipi = + lcp_itf_pair_find_by_host (vnet_buffer (b0)->sw_if_index[VLIB_RX]); + lip = lcp_itf_pair_get (lipi); + + vnet_buffer (b0)->sw_if_index[VLIB_TX] = lip->lip_phy_sw_if_index; + next0 = LCP_XC_L3_NEXT_XC; + vnet_buffer (b0)->ip.adj_index[VLIB_TX] = + lip->lip_phy_adjs.adj_index[af]; + + if (PREDICT_FALSE ((b0->flags & VLIB_BUFFER_IS_TRACED))) + { + lcp_xc_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t)); + t->phy_sw_if_index = lip->lip_phy_sw_if_index; + t->adj_index = vnet_buffer (b0)->ip.adj_index[VLIB_TX]; + } + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, + n_left_to_next, bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + return frame->n_vectors; +} + +/** + * X-connect all packets from the HOST to the PHY. + */ +VLIB_NODE_FN (lcp_xc_l3_ip4_node) +(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) +{ + return (lcp_xc_l3_inline (vm, node, frame, AF_IP4)); +} + +VLIB_NODE_FN (lcp_xc_l3_ip6_node) +(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) +{ + return (lcp_xc_l3_inline (vm, node, frame, AF_IP6)); +} + +VLIB_REGISTER_NODE (lcp_xc_l3_ip4_node) = { + .name = "linux-cp-xc-l3-ip4", + .vector_size = sizeof (u32), + .format_trace = format_lcp_xc_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_next_nodes = LCP_XC_L3_N_NEXT, + .next_nodes = { + [LCP_XC_L3_NEXT_XC] = "ip4-midchain", + }, +}; + +VNET_FEATURE_INIT (lcp_xc_node_l3_ip4_unicast, static) = { + .arc_name = "ip4-unicast", + .node_name = "linux-cp-xc-l3-ip4", +}; + +VNET_FEATURE_INIT (lcp_xc_node_l3_ip4_multicaast, static) = { + .arc_name = "ip4-multicast", + .node_name = "linux-cp-xc-l3-ip4", +}; + +VLIB_REGISTER_NODE (lcp_xc_l3_ip6_node) = { + .name = "linux-cp-xc-l3-ip6", + .vector_size = sizeof (u32), + .format_trace = format_lcp_xc_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_next_nodes = LCP_XC_L3_N_NEXT, + .next_nodes = { + [LCP_XC_L3_NEXT_XC] = "ip6-midchain", + }, +}; + +VNET_FEATURE_INIT (lcp_xc_node_l3_ip6_unicast, static) = { + .arc_name = "ip6-unicast", + .node_name = "linux-cp-xc-l3-ip6", +}; + +VNET_FEATURE_INIT (lcp_xc_node_l3_ip6_multicast, static) = { + .arc_name = "ip6-multicast", + .node_name = "linux-cp-xc-l3-ip6", +}; + +#define foreach_lcp_arp \ + _ (DROP, "error-drop") \ + _ (IO, "interface-output") + +typedef enum +{ +#define _(sym, str) LCP_ARP_NEXT_##sym, + foreach_lcp_arp +#undef _ + LCP_ARP_N_NEXT, +} lcp_arp_next_t; + +typedef struct lcp_arp_trace_t_ +{ + u32 rx_sw_if_index; + u16 arp_opcode; +} lcp_arp_trace_t; + +/* packet trace format function */ +static u8 * +format_lcp_arp_trace (u8 *s, va_list *args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + lcp_arp_trace_t *t = va_arg (*args, lcp_arp_trace_t *); + + s = format (s, "rx-sw-if-index: %u opcode: %u", t->rx_sw_if_index, + t->arp_opcode); + + return s; +} + +/** + * punt ARP replies to the host + */ +VLIB_NODE_FN (lcp_arp_phy_node) +(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) +{ + u32 n_left_from, *from, *to_next, n_left_to_next; + lcp_arp_next_t next_index; + u32 reply_copies[VLIB_FRAME_SIZE]; + u32 n_copies = 0; + + next_index = node->cached_next_index; + n_left_from = frame->n_vectors; + from = vlib_frame_vector_args (frame); + + while (n_left_from > 0) + { + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from >= 2 && n_left_to_next >= 2) + { + u32 next0, next1, bi0, bi1; + vlib_buffer_t *b0, *b1; + ethernet_arp_header_t *arp0, *arp1; + + bi0 = to_next[0] = from[0]; + bi1 = to_next[1] = from[1]; + + from += 2; + n_left_from -= 2; + to_next += 2; + n_left_to_next -= 2; + + next0 = next1 = LCP_ARP_NEXT_DROP; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + + arp0 = vlib_buffer_get_current (b0); + arp1 = vlib_buffer_get_current (b1); + + vnet_feature_next (&next0, b0); + vnet_feature_next (&next1, b1); + + /* + * Replies might need to be received by the host, so we + * make a copy of them. + */ + if (arp0->opcode == clib_host_to_net_u16 (ETHERNET_ARP_OPCODE_reply)) + { + lcp_itf_pair_t *lip0 = 0; + u32 lipi0; + vlib_buffer_t *c0; + u8 len0; + + lipi0 = lcp_itf_pair_find_by_phy ( + vnet_buffer (b0)->sw_if_index[VLIB_RX]); + lip0 = lcp_itf_pair_get (lipi0); + + if (lip0) + { + /* + * rewind to eth header, copy, advance back to current + */ + len0 = ((u8 *) vlib_buffer_get_current (b0) - + (u8 *) ethernet_buffer_get_header (b0)); + vlib_buffer_advance (b0, -len0); + c0 = vlib_buffer_copy (vm, b0); + vlib_buffer_advance (b0, len0); + + /* Send to the host */ + vnet_buffer (c0)->sw_if_index[VLIB_TX] = + lip0->lip_host_sw_if_index; + reply_copies[n_copies++] = vlib_get_buffer_index (vm, c0); + } + } + if (arp1->opcode == clib_host_to_net_u16 (ETHERNET_ARP_OPCODE_reply)) + { + lcp_itf_pair_t *lip1 = 0; + u32 lipi1; + vlib_buffer_t *c1; + u8 len1; + + lipi1 = lcp_itf_pair_find_by_phy ( + vnet_buffer (b1)->sw_if_index[VLIB_RX]); + lip1 = lcp_itf_pair_get (lipi1); + + if (lip1) + { + /* + * rewind to reveal the ethernet header + */ + len1 = ((u8 *) vlib_buffer_get_current (b1) - + (u8 *) ethernet_buffer_get_header (b1)); + vlib_buffer_advance (b1, -len1); + c1 = vlib_buffer_copy (vm, b1); + vlib_buffer_advance (b1, len1); + + /* Send to the host */ + vnet_buffer (c1)->sw_if_index[VLIB_TX] = + lip1->lip_host_sw_if_index; + reply_copies[n_copies++] = vlib_get_buffer_index (vm, c1); + } + } + + if (PREDICT_FALSE ((b0->flags & VLIB_BUFFER_IS_TRACED))) + { + lcp_arp_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t)); + t->rx_sw_if_index = vnet_buffer (b0)->sw_if_index[VLIB_RX]; + } + if (PREDICT_FALSE ((b1->flags & VLIB_BUFFER_IS_TRACED))) + { + lcp_arp_trace_t *t = vlib_add_trace (vm, node, b1, sizeof (*t)); + t->rx_sw_if_index = vnet_buffer (b1)->sw_if_index[VLIB_RX]; + } + + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next, + n_left_to_next, bi0, bi1, next0, + next1); + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 next0, bi0; + vlib_buffer_t *b0; + ethernet_arp_header_t *arp0; + u16 arp_opcode; + + bi0 = to_next[0] = from[0]; + + from += 1; + n_left_from -= 1; + to_next += 1; + n_left_to_next -= 1; + next0 = LCP_ARP_NEXT_DROP; + + b0 = vlib_get_buffer (vm, bi0); + arp0 = vlib_buffer_get_current (b0); + + vnet_feature_next (&next0, b0); + + /* + * Replies might need to be received by the host, so we + * make a copy of them. + */ + arp_opcode = clib_host_to_net_u16 (arp0->opcode); + + if (arp_opcode == ETHERNET_ARP_OPCODE_reply) + { + lcp_itf_pair_t *lip0 = 0; + vlib_buffer_t *c0; + u32 lipi0; + u8 len0; + + lipi0 = lcp_itf_pair_find_by_phy ( + vnet_buffer (b0)->sw_if_index[VLIB_RX]); + lip0 = lcp_itf_pair_get (lipi0); + + if (lip0) + { + + /* + * rewind to reveal the ethernet header + */ + len0 = ((u8 *) vlib_buffer_get_current (b0) - + (u8 *) ethernet_buffer_get_header (b0)); + vlib_buffer_advance (b0, -len0); + c0 = vlib_buffer_copy (vm, b0); + vlib_buffer_advance (b0, len0); + + /* Send to the host */ + vnet_buffer (c0)->sw_if_index[VLIB_TX] = + lip0->lip_host_sw_if_index; + reply_copies[n_copies++] = vlib_get_buffer_index (vm, c0); + } + } + + if (PREDICT_FALSE ((b0->flags & VLIB_BUFFER_IS_TRACED))) + { + lcp_arp_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t)); + t->rx_sw_if_index = vnet_buffer (b0)->sw_if_index[VLIB_RX]; + t->arp_opcode = arp_opcode; + } + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, + n_left_to_next, bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + if (n_copies) + vlib_buffer_enqueue_to_single_next (vm, node, reply_copies, + LCP_ARP_NEXT_IO, n_copies); + + return frame->n_vectors; +} + +VLIB_REGISTER_NODE (lcp_arp_phy_node) = { + .name = "linux-cp-arp-phy", + .vector_size = sizeof (u32), + .format_trace = format_lcp_arp_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = LINUXCP_N_ERROR, + .error_counters = linuxcp_error_counters, + + .n_next_nodes = LCP_ARP_N_NEXT, + .next_nodes = { + [LCP_ARP_NEXT_DROP] = "error-drop", + [LCP_ARP_NEXT_IO] = "interface-output", + }, +}; + +VNET_FEATURE_INIT (lcp_arp_phy_arp_feat, static) = { + .arc_name = "arp", + .node_name = "linux-cp-arp-phy", + .runs_before = VNET_FEATURES ("arp-reply"), +}; + +/** + * x-connect ARP packets from the host to the phy + */ +VLIB_NODE_FN (lcp_arp_host_node) +(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) +{ + u32 n_left_from, *from, *to_next, n_left_to_next; + lcp_arp_next_t next_index; + + next_index = node->cached_next_index; + n_left_from = frame->n_vectors; + from = vlib_frame_vector_args (frame); + + while (n_left_from > 0) + { + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + const lcp_itf_pair_t *lip0; + lcp_arp_next_t next0; + vlib_buffer_t *b0; + u32 bi0, lipi0; + u8 len0; + + bi0 = to_next[0] = from[0]; + + from += 1; + n_left_from -= 1; + to_next += 1; + n_left_to_next -= 1; + next0 = LCP_ARP_NEXT_IO; + + b0 = vlib_get_buffer (vm, bi0); + + lipi0 = + lcp_itf_pair_find_by_host (vnet_buffer (b0)->sw_if_index[VLIB_RX]); + lip0 = lcp_itf_pair_get (lipi0); + + /* Send to the phy */ + vnet_buffer (b0)->sw_if_index[VLIB_TX] = lip0->lip_phy_sw_if_index; + + len0 = ((u8 *) vlib_buffer_get_current (b0) - + (u8 *) ethernet_buffer_get_header (b0)); + vlib_buffer_advance (b0, -len0); + + if (PREDICT_FALSE ((b0->flags & VLIB_BUFFER_IS_TRACED))) + { + lcp_arp_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t)); + t->rx_sw_if_index = vnet_buffer (b0)->sw_if_index[VLIB_RX]; + } + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, + n_left_to_next, bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + return frame->n_vectors; +} + +VLIB_REGISTER_NODE (lcp_arp_host_node) = { + .name = "linux-cp-arp-host", + .vector_size = sizeof (u32), + .format_trace = format_lcp_arp_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = LINUXCP_N_ERROR, + .error_counters = linuxcp_error_counters, + + .n_next_nodes = LCP_ARP_N_NEXT, + .next_nodes = { + [LCP_ARP_NEXT_DROP] = "error-drop", + [LCP_ARP_NEXT_IO] = "interface-output", + }, +}; + +VNET_FEATURE_INIT (lcp_arp_host_arp_feat, static) = { + .arc_name = "arp", + .node_name = "linux-cp-arp-host", + .runs_before = VNET_FEATURES ("arp-reply"), +}; + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/linux-cp/test/lcp_unittest.c b/src/plugins/linux-cp/test/lcp_unittest.c new file mode 100644 index 00000000000..57858eb8df0 --- /dev/null +++ b/src/plugins/linux-cp/test/lcp_unittest.c @@ -0,0 +1,101 @@ +/* + * Copyright (c) 2021 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include + +#include + +static u32 host_vif; +const static char *host_template = "tap%d"; + +static clib_error_t * +lcp_add_pair_command_fn (vlib_main_t *vm, unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + u32 phy_sw_if_index, host_sw_if_index; + u8 is_add, *host_name; + vnet_main_t *vnm = vnet_get_main (); + + ++host_vif; + host_name = format (NULL, host_template, host_vif); + phy_sw_if_index = host_sw_if_index = ~0; + is_add = 1; + lcp_main.test_mode = 1; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "add")) + is_add = 1; + else if (unformat (input, "del")) + is_add = 0; + else if (unformat (input, "phy %U", unformat_vnet_sw_interface, vnm, + &phy_sw_if_index)) + ; + else if (unformat (input, "host %U", unformat_vnet_sw_interface, vnm, + &host_sw_if_index)) + ; + else + return clib_error_return (0, "unknown input:%U", format_unformat_error, + input); + } + + if (phy_sw_if_index == ~0) + return clib_error_return (0, "ERROR; no phy:%U", format_unformat_error, + input); + + lip_host_type_t host_type = + (vnet_sw_interface_is_p2p (vnm, phy_sw_if_index) ? LCP_ITF_HOST_TUN : + LCP_ITF_HOST_TAP); + + int rv; + + if (is_add) + { + if (host_sw_if_index == ~0) + return clib_error_return (0, "ERROR no-host:%U", format_unformat_error, + input); + + rv = lcp_itf_pair_add (host_sw_if_index, phy_sw_if_index, host_name, + host_vif, host_type, NULL); + } + else + rv = lcp_itf_pair_del (phy_sw_if_index); + + if (rv) + return clib_error_return (0, "ERROR rv:%d", rv); + + return (NULL); +} + +VLIB_CLI_COMMAND (test_time_range_command, static) = { + .path = "test lcp", + .short_help = "lcp [add|del] phy host ", + .function = lcp_add_pair_command_fn, +}; + +#include +#include +VLIB_PLUGIN_REGISTER () = { + .version = VPP_BUILD_VER, + .description = "Linux Control Plane - Unit Test", + .default_disabled = 1, +}; + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/linux-cp/test/test_linux_cp.py b/src/plugins/linux-cp/test/test_linux_cp.py new file mode 100644 index 00000000000..df38681b16e --- /dev/null +++ b/src/plugins/linux-cp/test/test_linux_cp.py @@ -0,0 +1,174 @@ +#!/usr/bin/env python3 + +import unittest + +from scapy.layers.inet import IP, UDP +from scapy.layers.inet6 import IPv6, Raw +from scapy.layers.l2 import Ether, ARP, Dot1Q + +from vpp_object import VppObject +from framework import VppTestCase, VppTestRunner + + +class VppLcpPair(VppObject): + def __init__(self, test, phy, host): + self._test = test + self.phy = phy + self.host = host + + def add_vpp_config(self): + self._test.vapi.cli("test lcp add phy %s host %s" % + (self.phy, self.host)) + self._test.registry.register(self, self._test.logger) + return self + + def remove_vpp_config(self): + self._test.vapi.cli("test lcp del phy %s host %s" % + (self.phy, self.host)) + + def object_id(self): + return "lcp:%d:%d" % (self.phy.sw_if_index, + self.host.sw_if_index) + + def query_vpp_config(self): + pairs = list(self._test.vapi.vpp.details_iter( + self._test.vapi.lcp_itf_pair_get)) + + for p in pairs: + if p.phy_sw_if_index == self.phy.sw_if_index and \ + p.host_sw_if_index == self.host.sw_if_index: + return True + return False + + +class TestLinuxCP(VppTestCase): + """ Linux Control Plane """ + + extra_vpp_plugin_config = ["plugin", + "linux_cp_plugin.so", + "{", "enable", "}", + "plugin", + "linux_cp_unittest_plugin.so", + "{", "enable", "}"] + + @classmethod + def setUpClass(cls): + super(TestLinuxCP, cls).setUpClass() + + @classmethod + def tearDownClass(cls): + super(TestLinuxCP, cls).tearDownClass() + + def setUp(self): + super(TestLinuxCP, self).setUp() + + # create 4 pg interfaces so there are a few addresses + # in the FIB + self.create_pg_interfaces(range(4)) + + for i in self.pg_interfaces: + i.admin_up() + + def tearDown(self): + for i in self.pg_interfaces: + i.admin_down() + super(TestLinuxCP, self).tearDown() + + def test_linux_cp_tap(self): + """ Linux CP TAP """ + + # + # Setup + # + + arp_opts = {"who-has": 1, "is-at": 2} + + # create two pairs, wihch a bunch of hots on the phys + hosts = [self.pg0, self.pg1] + phys = [self.pg2, self.pg3] + N_HOSTS = 4 + + for phy in phys: + phy.config_ip4() + phy.generate_remote_hosts(4) + phy.configure_ipv4_neighbors() + + pair1 = VppLcpPair(self, phys[0], hosts[0]).add_vpp_config() + pair2 = VppLcpPair(self, phys[1], hosts[1]).add_vpp_config() + + self.logger.info(self.vapi.cli("sh lcp adj verbose")) + self.logger.info(self.vapi.cli("sh lcp")) + + # + # Traffic Tests + # + + # hosts to phys + for phy, host in zip(phys, hosts): + for j in range(N_HOSTS): + p = (Ether(src=phy.local_mac, + dst=phy.remote_hosts[j].mac) / + IP(src=phy.local_ip4, + dst=phy.remote_hosts[j].ip4) / + UDP(sport=1234, dport=1234) / + Raw()) + + rxs = self.send_and_expect(host, [p], phy) + + # verify packet is unchanged + for rx in rxs: + self.assertEqual(p.show2(True), rx.show2(True)) + + # ARPs x-connect to phy + p = (Ether(dst="ff:ff:ff:ff:ff:ff", + src=phy.remote_hosts[j].mac) / + ARP(op="who-has", + hwdst=phy.remote_hosts[j].mac, + hwsrc=phy.local_mac, + psrc=phy.local_ip4, + pdst=phy.remote_hosts[j].ip4)) + + rxs = self.send_and_expect(host, [p], phy) + + # verify packet is unchanged + for rx in rxs: + self.assertEqual(p.show2(True), rx.show2(True)) + + # phy to host + for phy, host in zip(phys, hosts): + for j in range(N_HOSTS): + p = (Ether(dst=phy.local_mac, + src=phy.remote_hosts[j].mac) / + IP(dst=phy.local_ip4, + src=phy.remote_hosts[j].ip4) / + UDP(sport=1234, dport=1234) / + Raw()) + + rxs = self.send_and_expect(phy, [p], host) + + # verify packet is unchanged + for rx in rxs: + self.assertEqual(p.show2(True), rx.show2(True)) + + # ARPs rx'd on the phy are sent to the host + p = (Ether(dst="ff:ff:ff:ff:ff:ff", + src=phy.remote_hosts[j].mac) / + ARP(op="is-at", + hwsrc=phy.remote_hosts[j].mac, + hwdst=phy.local_mac, + pdst=phy.local_ip4, + psrc=phy.remote_hosts[j].ip4)) + + rxs = self.send_and_expect(phy, [p], host) + + # verify packet is unchanged + for rx in rxs: + self.assertEqual(p.show2(True), rx.show2(True)) + + # cleanup + for phy in phys: + phy.unconfig_ip4() + + +if __name__ == '__main__': + unittest.main(testRunner=VppTestRunner) diff --git a/src/vlibapi/api_helper_macros.h b/src/vlibapi/api_helper_macros.h index 8064d67a14e..d49282e9e65 100644 --- a/src/vlibapi/api_helper_macros.h +++ b/src/vlibapi/api_helper_macros.h @@ -201,33 +201,45 @@ do { \ vl_api_send_msg (rp, (u8 *)rmp); \ } while(0); -#define REPLY_AND_DETAILS_MACRO(t, p, body) \ -do { \ - vl_api_registration_t *rp; \ - rp = vl_api_client_index_to_registration (mp->client_index); \ - if (rp == 0) \ - return; \ - u32 cursor = clib_net_to_host_u32 (mp->cursor); \ - vlib_main_t *vm = vlib_get_main (); \ - f64 start = vlib_time_now (vm); \ - if (pool_is_free_index (p, cursor)) { \ - cursor = pool_next_index (p, cursor); \ - if (cursor == ~0) \ - rv = VNET_API_ERROR_INVALID_VALUE; \ - } \ - while (cursor != ~0) { \ - do {body;} while (0); \ - cursor = pool_next_index (p, cursor); \ - if (vl_api_process_may_suspend (vm, rp, start)) { \ - if (cursor != ~0) \ - rv = VNET_API_ERROR_EAGAIN; \ - break; \ - } \ - } \ - REPLY_MACRO2 (t, ({ \ - rmp->cursor = clib_host_to_net_u32 (cursor); \ - })); \ -} while(0); +#define REPLY_AND_DETAILS_MACRO(t, p, body) \ + do \ + { \ + if (pool_elts (p) == 0) \ + { \ + REPLY_MACRO (t); \ + break; \ + } \ + vl_api_registration_t *rp; \ + rp = vl_api_client_index_to_registration (mp->client_index); \ + if (rp == 0) \ + return; \ + u32 cursor = clib_net_to_host_u32 (mp->cursor); \ + vlib_main_t *vm = vlib_get_main (); \ + f64 start = vlib_time_now (vm); \ + if (pool_is_free_index (p, cursor)) \ + { \ + cursor = pool_next_index (p, cursor); \ + if (cursor == ~0) \ + rv = VNET_API_ERROR_INVALID_VALUE; \ + } \ + while (cursor != ~0) \ + { \ + do \ + { \ + body; \ + } \ + while (0); \ + cursor = pool_next_index (p, cursor); \ + if (vl_api_process_may_suspend (vm, rp, start)) \ + { \ + if (cursor != ~0) \ + rv = VNET_API_ERROR_EAGAIN; \ + break; \ + } \ + } \ + REPLY_MACRO2 (t, ({ rmp->cursor = clib_host_to_net_u32 (cursor); })); \ + } \ + while (0); #define REPLY_AND_DETAILS_VEC_MACRO(t, v, mp, rmp, rv, body) \ do { \ diff --git a/src/vnet/interface_funcs.h b/src/vnet/interface_funcs.h index 120d92cb11d..dbb0549314a 100644 --- a/src/vnet/interface_funcs.h +++ b/src/vnet/interface_funcs.h @@ -342,6 +342,14 @@ vnet_sw_interface_is_up (vnet_main_t * vnm, u32 sw_if_index) vnet_sw_interface_is_link_up (vnm, sw_if_index)); } +always_inline uword +vnet_sw_interface_is_sub (vnet_main_t *vnm, u32 sw_if_index) +{ + vnet_sw_interface_t *sw = vnet_get_sw_interface (vnm, sw_if_index); + + return (sw->sw_if_index != sw->sup_sw_if_index); +} + always_inline vlib_frame_t * vnet_get_frame_to_sw_interface (vnet_main_t * vnm, u32 sw_if_index) { diff --git a/src/vnet/l2/l2_input.h b/src/vnet/l2/l2_input.h index 96a0a1afa86..adc130e21fc 100644 --- a/src/vnet/l2/l2_input.h +++ b/src/vnet/l2/l2_input.h @@ -294,8 +294,8 @@ u32 set_int_l2_mode (vlib_main_t * vm, u32 bd_index, l2_bd_port_type_t port_type, u32 shg, u32 xc_sw_if_index); -static inline void -vnet_update_l2_len (vlib_buffer_t * b) +static inline u16 +vnet_update_l2_len (vlib_buffer_t *b) { ethernet_header_t *eth; u16 ethertype; @@ -326,6 +326,8 @@ vnet_update_l2_len (vlib_buffer_t * b) } } ethernet_buffer_set_vlan_count (b, vlan_count); + + return (ethertype); } /* -- 2.16.6