linux-cp: Linux Interface Mirroring for Control Plane Integration 59/30759/11
authorNeale Ranns <nranns@cisco.com>
Thu, 24 Dec 2020 09:16:09 +0000 (09:16 +0000)
committerOle Tr�an <otroan@employees.org>
Thu, 4 Feb 2021 14:16:34 +0000 (14:16 +0000)
Type: feature

please see FEATURE.yaml for details.

Signed-off-by: Neale Ranns <nranns@cisco.com>
Signed-off-by: Matthew Smith <mgsmith@netgate.com>
Signed-off-by: Jon Loeliger <jdl@netgate.com>
Signed-off-by: Pim van Pelt <pim@ipng.nl>
Change-Id: I04a45c15c0838906aa787e06660fa29f39f755fa

20 files changed:
MAINTAINERS
Makefile
src/plugins/linux-cp/CMakeLists.txt [new file with mode: 0644]
src/plugins/linux-cp/FEATURE.yaml [new file with mode: 0644]
src/plugins/linux-cp/lcp.api [new file with mode: 0644]
src/plugins/linux-cp/lcp.c [new file with mode: 0644]
src/plugins/linux-cp/lcp.h [new file with mode: 0644]
src/plugins/linux-cp/lcp.rst [new file with mode: 0644]
src/plugins/linux-cp/lcp_adj.c [new file with mode: 0644]
src/plugins/linux-cp/lcp_adj.h [new file with mode: 0644]
src/plugins/linux-cp/lcp_api.c [new file with mode: 0644]
src/plugins/linux-cp/lcp_cli.c [new file with mode: 0644]
src/plugins/linux-cp/lcp_interface.c [new file with mode: 0644]
src/plugins/linux-cp/lcp_interface.h [new file with mode: 0644]
src/plugins/linux-cp/lcp_node.c [new file with mode: 0644]
src/plugins/linux-cp/test/lcp_unittest.c [new file with mode: 0644]
src/plugins/linux-cp/test/test_linux_cp.py [new file with mode: 0644]
src/vlibapi/api_helper_macros.h
src/vnet/interface_funcs.h
src/vnet/l2/l2_input.h

index 3231cf8..b32f1ee 100644 (file)
@@ -714,6 +714,12 @@ I: geneve
 M:     community vpp-dev@lists.fd.io
 F:     src/plugins/geneve/
 
+Plugin - linux-cp
+I:     linux-cp
+M:     neale@graphiant.com
+M:     Matthew Smith <mgsmith@netgate.com>
+F:     src/plugins/linux-cp/
+
 THE REST
 I:     misc
 C:     Contact vpp-dev Mailing List <vpp-dev@fd.io>
index 1ae1684..7ecc2c6 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -71,6 +71,7 @@ DEB_DEPENDS += libboost-all-dev libffi-dev python3-ply libmbedtls-dev
 DEB_DEPENDS += cmake ninja-build uuid-dev python3-jsonschema python3-yaml
 DEB_DEPENDS += python3-venv  # ensurepip
 DEB_DEPENDS += python3-dev   # needed for python3 -m pip install psutil
+DEB_DEPENDS += libnl-3-dev libnl-route-3-dev
 # python3.6 on 16.04 requires python36-dev
 
 LIBFFI=libffi6 # works on all but 20.04 and debian-testing
@@ -114,6 +115,7 @@ RPM_DEPENDS += mbedtls-devel
 RPM_DEPENDS += ccache
 RPM_DEPENDS += xmlto
 RPM_DEPENDS += elfutils-libelf-devel
+RPM_DEPENDS += libnl3-devel
 
 ifeq ($(OS_ID),fedora)
        RPM_DEPENDS += dnf-utils
diff --git a/src/plugins/linux-cp/CMakeLists.txt b/src/plugins/linux-cp/CMakeLists.txt
new file mode 100644 (file)
index 0000000..6b6ccb3
--- /dev/null
@@ -0,0 +1,61 @@
+# Copyright (c) 2020 Cisco and/or its affiliates.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+find_path(LIBNL3_INCLUDE_DIR NAMES libnl3/netlink/route/link/vlan.h)
+
+if (NOT LIBNL3_INCLUDE_DIR)
+  message(WARNING "-- libnl3 headers not found - linux-cp plugin disabled")
+  return()
+endif()
+
+vpp_plugin_find_library(linux-cp LIBNL3_LIB libnl-3.so)
+vpp_plugin_find_library(linux-cp LIBNL3_ROUTE_LIB libnl-route-3.so.200)
+
+include_directories(${LIBNL3_INCLUDE_DIR}/libnl3)
+include_directories(${LIBMNL_INCLUDE_DIR})
+
+add_vpp_library(lcp
+  SOURCES
+  lcp_interface.c
+  lcp_adj.c
+  lcp.c
+
+  LINK_LIBRARIES
+  ${LIBNL3_LIB}
+  ${LIBNL3_ROUTE_LIB}
+
+  INSTALL_HEADERS
+  lcp_interface.h
+  lcp.h
+)
+
+add_vpp_plugin(linux_cp
+  SOURCES
+  lcp_api.c
+  lcp_cli.c
+  lcp_node.c
+
+  API_FILES
+  lcp.api
+
+  LINK_LIBRARIES
+  lcp
+)
+
+add_vpp_plugin(linux_cp_unittest
+  SOURCES
+  test/lcp_unittest.c
+
+  LINK_LIBRARIES
+  lcp
+)
diff --git a/src/plugins/linux-cp/FEATURE.yaml b/src/plugins/linux-cp/FEATURE.yaml
new file mode 100644 (file)
index 0000000..088b060
--- /dev/null
@@ -0,0 +1,25 @@
+---
+name: Linux Control Plane (integration)
+maintainer: Neale Ranns <neale@grahpiant.com>
+
+description: |-
+        This plugin provides the beginnings of an integration with the
+        Linux network stack.
+        The plugin provides the capability to 'mirror' VPP interfaces in
+        the Linux kernel. This means that for any interface in VPP the user
+        can create a corresponding TAP or TUN device in the Linux kernel
+        and have VPP plumb them together.
+        The plumbing mechanics is different in each direction.
+        In the RX direction, all packets received on a given VPP interface
+        that are punted (i.e. are not dropped or forwarded) are transmitted
+        on its mirror interface (this includes for example ARP, ND etc,
+        so the recommendation is to disable ARP, ND, ping plugin).
+        In the TX direction, packets received by VPP an the mirror Tap/Tun
+        are cross-connected to the VPP interfaces. For IP packets, IP output
+        features are applied.
+        This is the beginnings of integration, because there needs to be
+        an external agent that will configure (and synchronize) the IP
+        configuration of the paired interfaces.
+
+state: experimental
+properties: [API, CLI, MULTITHREAD]
diff --git a/src/plugins/linux-cp/lcp.api b/src/plugins/linux-cp/lcp.api
new file mode 100644 (file)
index 0000000..49fdedd
--- /dev/null
@@ -0,0 +1,166 @@
+/* Hey Emacs use -*- mode: C -*- */
+/*
+ * Linux Control Plane API
+ *
+ * Copyright 2020 Rubicon Communications, LLC.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+option version = "1.0.0";
+
+import "vnet/interface_types.api";
+
+/** \brief Set the default Linux Control Plane namespace
+    @param client_index - opaque cookie to identify the sender
+    @param context - sender context, to match reply w/ request
+    @param namespace - the new default namespace; namespace[0] == 0 iff none
+*/
+autoreply define lcp_default_ns_set
+{
+  u32 client_index;
+  u32 context;
+  string namespace[32];                /* LCP_NS_LEN */
+};
+
+/** \brief get the default Linux Control Plane namespace
+    @param client_index - opaque cookie to identify the sender
+    @param context - sender context, to match reply w/ request
+*/
+define lcp_default_ns_get
+{
+  u32 client_index;
+  u32 context;
+};
+
+/** \brief get the default Linux Control Plane namespace
+    @param client_index - opaque cookie to identify the sender
+    @param context - sender context, to match reply w/ request
+    @param namespace - the default namespace; namespace[0] == 0 iff none
+*/
+define lcp_default_ns_get_reply
+{
+  u32 context;
+  string namespace[32];                /* LCP_NS_LEN */
+};
+
+enum lcp_itf_host_type : u8
+{
+  LCP_API_ITF_HOST_TAP = 0,
+  LCP_API_ITF_HOST_TUN = 1,
+};
+
+/** \brief Add or delete a Linux Conrol Plane interface pair
+    @param client_index - opaque cookie to identify the sender
+    @param context - sender context, to match reply w/ request
+    @param is_add - 0 if deleting, != 0 if adding
+    @param sw_if_index - index of VPP PHY SW interface
+    @param host_if_name - host tap interface name
+    @param host_if_type - the type of host interface to create (tun, tap)
+    @param namespace - optional tap namespace; namespace[0] == 0 iff none
+*/
+autoreply autoendian define lcp_itf_pair_add_del
+{
+  u32 client_index;
+  u32 context;
+  bool is_add;
+  vl_api_interface_index_t sw_if_index;
+  string host_if_name[16];             /* IFNAMSIZ */
+  vl_api_lcp_itf_host_type_t host_if_type;
+  string namespace[32];                        /* LCP_NS_LEN */
+};
+
+/** \brief Dump Linux Control Plane interface pair data
+    @param client_index - opaque cookie to identify the sender
+    @param context - sender context, to match reply w/ request
+    @param sw_if_index - interface to use as filter (~0 == "all")
+*/
+autoendian define lcp_itf_pair_get
+{
+  u32 client_index;
+  u32 context;
+  u32 cursor;
+};
+autoendian define lcp_itf_pair_get_reply
+{
+  u32 context;
+  i32 retval;
+  u32 cursor;
+};
+
+/** \brief Linux Control Plane interface pair dump response
+    @param context - sender context which was passed in the request
+    @param phy_sw_if_index - VPP's sw_if_index for the PHY
+    @param host_sw_if_index - VPP's sw_if_index for the host tap
+    @param vif_index - tap linux index
+    @param host_if_name - host interface name
+    @param host_if_type - host interface type (tun, tap)
+    @param namespace - host interface namespace
+*/
+autoendian define lcp_itf_pair_details
+{
+  u32 context;
+  vl_api_interface_index_t phy_sw_if_index;
+  vl_api_interface_index_t host_sw_if_index;
+  u32 vif_index;
+  string host_if_name[16];     /* IFNAMSIZ */
+  vl_api_lcp_itf_host_type_t host_if_type;
+  string namespace[32];                /* LCP_NS_LEN */
+};
+
+service {
+  rpc lcp_itf_pair_get returns lcp_itf_pair_get_reply
+    stream lcp_itf_pair_details;
+};
+
+/** \brief Replace end/begin
+ */
+autoreply define lcp_itf_pair_replace_begin
+{
+  u32 client_index;
+  u32 context;
+};
+autoreply define lcp_itf_pair_replace_end
+{
+  u32 client_index;
+  u32 context;
+};
+
+/*
+ * Linux-CP Error counters/messages
+ */
+counters linuxcp {
+  packets {
+    severity info;
+    type counter64;
+    units "packets";
+    description "ARP packets processed";
+  };
+  copies {
+    severity info;
+    type counter64;
+    units "packets";
+    description "ARP replies copied to host";
+  };
+};
+
+paths {
+  "/err/linux-cp-arp-phy" "linuxcp";
+  "/err/linux-cp-arp-host" "linuxcp";
+};
+
+/*
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/linux-cp/lcp.c b/src/plugins/linux-cp/lcp.c
new file mode 100644 (file)
index 0000000..f4c491c
--- /dev/null
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2020 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <sched.h>
+#include <fcntl.h>
+#include <ctype.h>
+#include <sys/socket.h>
+#include <net/if.h>
+
+#include <plugins/linux-cp/lcp.h>
+
+lcp_main_t lcp_main;
+
+u8 *
+lcp_get_default_ns (void)
+{
+  lcp_main_t *lcpm = &lcp_main;
+
+  if (lcpm->default_namespace[0] == 0)
+    return 0;
+  return lcpm->default_namespace;
+}
+
+int
+lcp_get_default_ns_fd (void)
+{
+  lcp_main_t *lcpm = &lcp_main;
+
+  return lcpm->default_ns_fd;
+}
+
+/*
+ * ns is expected to be or look like a NUL-terminated C string.
+ */
+int
+lcp_set_default_ns (u8 *ns)
+{
+  lcp_main_t *lcpm = &lcp_main;
+  char *p;
+  int len;
+  u8 *s;
+
+  p = (char *) ns;
+  len = clib_strnlen (p, LCP_NS_LEN);
+  if (len >= LCP_NS_LEN)
+    return -1;
+
+  if (!p || *p == 0)
+    {
+      clib_memset (lcpm->default_namespace, 0,
+                  sizeof (lcpm->default_namespace));
+      if (lcpm->default_ns_fd > 0)
+       close (lcpm->default_ns_fd);
+      lcpm->default_ns_fd = 0;
+      return 0;
+    }
+
+  clib_strncpy ((char *) lcpm->default_namespace, p, LCP_NS_LEN - 1);
+
+  s = format (0, "/var/run/netns/%s%c", (char *) lcpm->default_namespace, 0);
+  lcpm->default_ns_fd = open ((char *) s, O_RDONLY);
+  vec_free (s);
+
+  return 0;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/linux-cp/lcp.h b/src/plugins/linux-cp/lcp.h
new file mode 100644 (file)
index 0000000..7fdad37
--- /dev/null
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2020 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __LCP_H__
+#define __LCP_H__
+
+#include <vlib/vlib.h>
+
+#define LCP_NS_LEN 32
+
+typedef struct lcp_main_s
+{
+  u16 msg_id_base;                 /* API message ID base */
+  u8 default_namespace[LCP_NS_LEN]; /* default namespace if set */
+  int default_ns_fd;
+  u8 auto_intf;
+  /* Set when Unit testing */
+  u8 test_mode;
+} lcp_main_t;
+
+extern lcp_main_t lcp_main;
+
+/**
+ * Get/Set the default namespace for LCP host taps.
+ */
+int lcp_set_default_ns (u8 *ns);
+u8 *lcp_get_default_ns (void); /* Returns NULL or shared string */
+int lcp_get_default_ns_fd (void);
+
+#endif
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/linux-cp/lcp.rst b/src/plugins/linux-cp/lcp.rst
new file mode 100644 (file)
index 0000000..6d81901
--- /dev/null
@@ -0,0 +1,96 @@
+.. _Linux_control_plane:
+
+.. toctree::
+
+Linux Control Plane Integration
+===============================
+
+Overview
+________
+
+This plugin allows VPP to integrate with the Linux. The
+general model is that Linux is the network stack, i.e. it has the
+control plane protocols, like ARP, IPv6 ND/MLD, Ping, etc, and VPP
+provides a SW based ASIC for forwarding.
+
+Interfaces
+__________
+
+VPP owns the interfaces in the system; physical (.e.g PCI), quasi
+physical (e.g. vhost), or virtual (e.g. tunnel). However,
+for the Linux networking stack to function it needs a representation
+of these interfaces; it needs a mirror image in the kernel. For this
+mirror we use a Tap interface, if the VPP interface is multi-point, a
+Tun if it's point-to-point. A physical and its mirror form an
+interface 'pair'.
+
+The host interface has two identities; the sw_if_index of the Tap and
+the virtual interface index in the kernel. It may be in a Linux namespace.
+
+The creation of the interface pairs is required from the control
+plane. It can be statically configured in the VPP startup
+configuration file. The intent here was to make the pair creation
+explicit, rather than have VPP guess which of the interfaces it owns
+require a mirror.
+
+Configuration
+_____________
+
+Linux will send and receive packets on the mirrored tap/tun
+interfaces. Any configuration that is made on these Linux interfaces,
+also needs to be applied on the corresponding physical interface in
+VPP.
+
+This is functionality is not provided in this plugin, but it can be
+achieved in various ways, for example by listening to the netlink
+messages and applying the config. As a result all e.g. routes
+programmed in Linux, will also be present in VPP's FIB.
+
+Linux will own the [ARP/ND] nieghbor tables (which will be copied via
+netlink to VPP also). This means that Linux will send packets with the
+peer's MAC address in the rewrite to VPP. The receiving TAP interface
+must therefore be in promiscuous mode.
+
+
+Forwarding
+__________
+
+The basic principle is to x-connect traffic from a Linux host interface
+(received on the Tap/Tun) to its paired the physical, and vice-versa.
+
+Host to Physical
+^^^^^^^^^^^^^^^^
+
+All packets sent by the host, and received by VPP on a Tap/Tun should
+be sent to its paired physical interface. However, they should be sent
+with the same consequences as if they had originated from VPP,
+i.e. they should be subject to all output features on the physical
+interface. To achieve this there is a per-IP-address-family (AF) node
+inserted in the per-AF input feature arc. The node must be per-AF,
+since it must be a sibling of a start node for the ipX-output feature
+arc. This node uses the packet's L2 rewrite to search for the
+adjacency that VPP would have used to send this packet; this adjacency
+is stored in the buffer's meta data so that it is available to all
+output features. Then the packet is sent through the physical
+interface's IP output feature arc.
+All ARP packets are x-connected from the tap to the physical.
+
+Physical to Host
+^^^^^^^^^^^^^^^^
+
+All ARP packets received on the physical are sent to the paired
+Tap. This allows the Linux network stack to build the nieghbour table.
+
+IP packets that are punted are sent to the host. They are sent on the
+tap that is paired with the physical on which they were originally
+received. The packet is sent on the Tap/Tun 'exactly' as it was
+received (i.e. with the L2 rewrite) but post any translations that
+input features may have made.
+
+
+Recommendations
+^^^^^^^^^^^^^^^
+
+When using this plugin disable the ARP, ND, IGMP plugins; this is the
+task for Linux.
+Disable ping plugin, since Linux will now respond.
diff --git a/src/plugins/linux-cp/lcp_adj.c b/src/plugins/linux-cp/lcp_adj.c
new file mode 100644 (file)
index 0000000..9a08591
--- /dev/null
@@ -0,0 +1,151 @@
+/*
+ * Copyright (c) 2020 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/adj/adj_delegate.h>
+#include <linux-cp/lcp_adj.h>
+
+#include <vppinfra/bihash_32_8.h>
+#include <vppinfra/bihash_template.c>
+
+static adj_delegate_type_t adj_type;
+
+/**
+ * The table of adjacencies indexed by the rewrite string
+ */
+BVT (clib_bihash) lcp_adj_tbl;
+
+static_always_inline void
+lcp_adj_mk_key_adj (const ip_adjacency_t *adj, lcp_adj_key_t *key)
+{
+  lcp_adj_mk_key (adj->rewrite_header.data, adj->rewrite_header.data_bytes,
+                 adj->rewrite_header.sw_if_index, key);
+}
+
+static u8 *
+lcp_adj_delegate_format (const adj_delegate_t *aed, u8 *s)
+{
+  return (format (s, "lcp"));
+}
+
+static void
+lcp_adj_delegate_adj_deleted (adj_delegate_t *aed)
+{
+  ip_adjacency_t *adj;
+  lcp_adj_kv_t kv;
+
+  adj = adj_get (aed->ad_adj_index);
+
+  lcp_adj_mk_key_adj (adj, &kv.k);
+
+  BV (clib_bihash_add_del) (&lcp_adj_tbl, &kv.kv, 0);
+}
+
+static void
+lcp_adj_delegate_adj_modified (adj_delegate_t *aed)
+{
+  ip_adjacency_t *adj;
+  lcp_adj_kv_t kv;
+
+  adj = adj_get (aed->ad_adj_index);
+
+  if (IP_LOOKUP_NEXT_REWRITE != adj->lookup_next_index)
+    return;
+
+  lcp_adj_mk_key_adj (adj, &kv.k);
+  kv.v = aed->ad_adj_index;
+
+  BV (clib_bihash_add_del) (&lcp_adj_tbl, &kv.kv, 1);
+}
+
+static void
+lcp_adj_delegate_adj_created (adj_index_t ai)
+{
+  ip_adjacency_t *adj;
+  lcp_adj_kv_t kv;
+
+  adj = adj_get (ai);
+
+  if (IP_LOOKUP_NEXT_REWRITE != adj->lookup_next_index)
+    return;
+
+  lcp_adj_mk_key_adj (adj, &kv.k);
+  kv.v = ai;
+
+  BV (clib_bihash_add_del) (&lcp_adj_tbl, &kv.kv, 1);
+}
+
+u8 *
+format_lcp_adj_kvp (u8 *s, va_list *args)
+{
+  BVT (clib_bihash_kv) *kv = va_arg (*args, BVT (clib_bihash_kv) *);
+  CLIB_UNUSED (int verbose) = va_arg (*args, int);
+  lcp_adj_kv_t *akv = (lcp_adj_kv_t *) kv;
+
+  s = format (s, "  %U:%U\n    %U", format_vnet_sw_if_index_name,
+             vnet_get_main (), akv->k.sw_if_index, format_hex_bytes,
+             akv->k.rewrite, 18, format_adj_nbr, akv->v, 4);
+
+  return (s);
+}
+
+static clib_error_t *
+lcp_adj_show_cmd (vlib_main_t *vm, unformat_input_t *input,
+                 vlib_cli_command_t *cmd)
+{
+  u8 verbose = 0;
+
+  if (unformat (input, "verbose"))
+    verbose = 1;
+
+  vlib_cli_output (vm, "Linux-CP Adjs:\n%U", BV (format_bihash), &lcp_adj_tbl,
+                  verbose);
+
+  return 0;
+}
+
+VLIB_CLI_COMMAND (lcp_itf_pair_show_cmd_node, static) = {
+  .path = "show lcp adj",
+  .function = lcp_adj_show_cmd,
+  .short_help = "show lcp adj",
+  .is_mp_safe = 1,
+};
+
+const adj_delegate_vft_t lcp_adj_vft = {
+  .adv_format = lcp_adj_delegate_format,
+  .adv_adj_deleted = lcp_adj_delegate_adj_deleted,
+  .adv_adj_modified = lcp_adj_delegate_adj_modified,
+  .adv_adj_created = lcp_adj_delegate_adj_created,
+};
+
+static clib_error_t *
+lcp_adj_init (vlib_main_t *vm)
+{
+  adj_type = adj_delegate_register_new_type (&lcp_adj_vft);
+
+  BV (clib_bihash_init) (&lcp_adj_tbl, "linux-cp ADJ table", 1024, 1 << 24);
+  BV (clib_bihash_set_kvp_format_fn) (&lcp_adj_tbl, format_lcp_adj_kvp);
+
+  return (NULL);
+}
+
+VLIB_INIT_FUNCTION (lcp_adj_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/linux-cp/lcp_adj.h b/src/plugins/linux-cp/lcp_adj.h
new file mode 100644 (file)
index 0000000..006d183
--- /dev/null
@@ -0,0 +1,84 @@
+/*
+ * Copyright (c) 2020 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LCP_ADJ_DELEGATE_H__
+#define __LCP_ADJ_DELEGATE_H__
+
+#include <vppinfra/bihash_32_8.h>
+
+typedef struct lcp_adj_key_t_
+{
+  u32 sw_if_index;
+  u8 rewrite[28];
+} lcp_adj_key_t;
+
+STATIC_ASSERT (sizeof (lcp_adj_key_t) == 32, "LCP ADJ Key size changed");
+
+typedef struct lcp_adj_kv_t_
+{
+  union
+  {
+    BVT (clib_bihash_kv) kv;
+    struct
+    {
+      lcp_adj_key_t k;
+      u64 v;
+    };
+  };
+} lcp_adj_kv_t;
+
+STATIC_ASSERT (sizeof (lcp_adj_kv_t) == sizeof (BVT (clib_bihash_kv)),
+              "LCP ADJ Key size changed");
+
+/**
+ * The table of adjacencies indexed by the rewrite string
+ */
+extern BVT (clib_bihash) lcp_adj_tbl;
+
+static_always_inline void
+lcp_adj_mk_key (const u8 *rewrite, u8 len, u32 sw_if_index, lcp_adj_key_t *key)
+{
+  /*
+   * Construct the key from the provided rewrite, then pad with zeros
+   * to ensure the key does not have garbage bytes
+   */
+  ASSERT (len <= sizeof (key->rewrite));
+  clib_memcpy_fast (key->rewrite, rewrite, len);
+  clib_memset (key->rewrite + len, 0, sizeof (key->rewrite) - len);
+  key->sw_if_index = sw_if_index;
+}
+
+static_always_inline adj_index_t
+lcp_adj_lkup (const u8 *rewrite, u8 len, u32 sw_if_index)
+{
+  lcp_adj_kv_t kv;
+
+  lcp_adj_mk_key (rewrite, len, sw_if_index, &kv.k);
+
+  if (!BV (clib_bihash_search_inline) (&lcp_adj_tbl, &kv.kv))
+    return (kv.v);
+
+  return (ADJ_INDEX_INVALID);
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
+
+#endif
diff --git a/src/plugins/linux-cp/lcp_api.c b/src/plugins/linux-cp/lcp_api.c
new file mode 100644 (file)
index 0000000..409aa72
--- /dev/null
@@ -0,0 +1,232 @@
+/*
+ * Copyright 2020 Rubicon Communications, LLC.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include <sys/socket.h>
+#include <linux/if.h>
+
+#include <vnet/vnet.h>
+#include <vnet/plugin/plugin.h>
+
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+#include <vpp/app/version.h>
+#include <vnet/format_fns.h>
+
+#include <linux-cp/lcp_interface.h>
+#include <linux-cp/lcp.api_enum.h>
+#include <linux-cp/lcp.api_types.h>
+
+static u16 lcp_msg_id_base;
+#define REPLY_MSG_ID_BASE lcp_msg_id_base
+#include <vlibapi/api_helper_macros.h>
+
+static lip_host_type_t
+api_decode_host_type (vl_api_lcp_itf_host_type_t type)
+{
+  if (type == LCP_API_ITF_HOST_TUN)
+    return LCP_ITF_HOST_TUN;
+
+  return LCP_ITF_HOST_TAP;
+}
+
+static vl_api_lcp_itf_host_type_t
+api_encode_host_type (lip_host_type_t type)
+{
+  if (type == LCP_ITF_HOST_TUN)
+    return LCP_API_ITF_HOST_TUN;
+
+  return LCP_API_ITF_HOST_TAP;
+}
+
+void
+lcp_set_auto_intf (u8 is_auto)
+{
+  lcp_main_t *lcpm = &lcp_main;
+
+  lcpm->auto_intf = (is_auto != 0);
+}
+
+int
+lcp_auto_intf (void)
+{
+  lcp_main_t *lcpm = &lcp_main;
+
+  return lcpm->auto_intf;
+}
+
+static void
+vl_api_lcp_itf_pair_add_del_t_handler (vl_api_lcp_itf_pair_add_del_t *mp)
+{
+  u32 phy_sw_if_index;
+  vl_api_lcp_itf_pair_add_del_reply_t *rmp;
+  lip_host_type_t lip_host_type;
+  int rv;
+
+  if (!vnet_sw_if_index_is_api_valid (mp->sw_if_index))
+    {
+      rv = VNET_API_ERROR_INVALID_SW_IF_INDEX;
+      goto bad_sw_if_index;
+    }
+
+  phy_sw_if_index = mp->sw_if_index;
+  lip_host_type = api_decode_host_type (mp->host_if_type);
+  if (mp->is_add)
+    {
+      u8 *host_if_name, *netns;
+      int host_len, netns_len;
+
+      host_if_name = netns = 0;
+
+      /* lcp_itf_pair_create expects vec of u8 */
+      host_len = clib_strnlen ((char *) mp->host_if_name,
+                              sizeof (mp->host_if_name) - 1);
+      vec_add (host_if_name, mp->host_if_name, host_len);
+      vec_add1 (host_if_name, 0);
+
+      netns_len =
+       clib_strnlen ((char *) mp->namespace, sizeof (mp->namespace) - 1);
+      vec_add (netns, mp->namespace, netns_len);
+      vec_add1 (netns, 0);
+
+      rv = lcp_itf_pair_create (phy_sw_if_index, host_if_name, lip_host_type,
+                               netns);
+
+      vec_free (host_if_name);
+      vec_free (netns);
+    }
+  else
+    {
+      rv = lcp_itf_pair_delete (phy_sw_if_index);
+    }
+
+  BAD_SW_IF_INDEX_LABEL;
+  REPLY_MACRO (VL_API_LCP_ITF_PAIR_ADD_DEL_REPLY);
+}
+
+static void
+send_lcp_itf_pair_details (index_t lipi, vl_api_registration_t *rp,
+                          u32 context)
+{
+  vl_api_lcp_itf_pair_details_t *rmp;
+  lcp_itf_pair_t *lcp_pair = lcp_itf_pair_get (lipi);
+
+  REPLY_MACRO_DETAILS4 (
+    VL_API_LCP_ITF_PAIR_DETAILS, rp, context, ({
+      rmp->phy_sw_if_index = lcp_pair->lip_phy_sw_if_index;
+      rmp->host_sw_if_index = lcp_pair->lip_host_sw_if_index;
+      rmp->vif_index = lcp_pair->lip_vif_index;
+      rmp->host_if_type = api_encode_host_type (lcp_pair->lip_host_type);
+
+      clib_strncpy ((char *) rmp->host_if_name,
+                   (char *) lcp_pair->lip_host_name,
+                   vec_len (lcp_pair->lip_host_name) - 1);
+
+      clib_strncpy ((char *) rmp->namespace, (char *) lcp_pair->lip_namespace,
+                   vec_len (lcp_pair->lip_namespace));
+    }));
+}
+
+static void
+vl_api_lcp_itf_pair_get_t_handler (vl_api_lcp_itf_pair_get_t *mp)
+{
+  vl_api_lcp_itf_pair_get_reply_t *rmp;
+  i32 rv = 0;
+
+  REPLY_AND_DETAILS_MACRO (
+    VL_API_LCP_ITF_PAIR_GET_REPLY, lcp_itf_pair_pool,
+    ({ send_lcp_itf_pair_details (cursor, rp, mp->context); }));
+}
+
+static void
+vl_api_lcp_default_ns_set_t_handler (vl_api_lcp_default_ns_set_t *mp)
+{
+  vl_api_lcp_default_ns_set_reply_t *rmp;
+  int rv;
+
+  mp->namespace[LCP_NS_LEN - 1] = 0;
+  rv = lcp_set_default_ns (mp->namespace);
+
+  REPLY_MACRO (VL_API_LCP_DEFAULT_NS_SET_REPLY);
+}
+
+static void
+vl_api_lcp_default_ns_get_t_handler (vl_api_lcp_default_ns_get_t *mp)
+{
+  lcp_main_t *lcpm = &lcp_main;
+  vl_api_lcp_default_ns_get_reply_t *rmp;
+  vl_api_registration_t *reg;
+  char *ns;
+
+  reg = vl_api_client_index_to_registration (mp->client_index);
+  if (!reg)
+    return;
+
+  rmp = vl_msg_api_alloc (sizeof (*rmp));
+  clib_memset (rmp, 0, sizeof (*rmp));
+  rmp->_vl_msg_id = (VL_API_LCP_DEFAULT_NS_GET_REPLY + lcpm->msg_id_base);
+  rmp->context = mp->context;
+
+  ns = (char *) lcp_get_default_ns ();
+  if (ns)
+    clib_strncpy ((char *) rmp->namespace, ns, LCP_NS_LEN - 1);
+
+  vl_api_send_msg (reg, (u8 *) rmp);
+}
+
+static void
+vl_api_lcp_itf_pair_replace_begin_t_handler (
+  vl_api_lcp_itf_pair_replace_begin_t *mp)
+{
+  vl_api_lcp_itf_pair_replace_begin_reply_t *rmp;
+  int rv;
+
+  rv = lcp_itf_pair_replace_begin ();
+
+  REPLY_MACRO (VL_API_LCP_ITF_PAIR_REPLACE_BEGIN_REPLY);
+}
+
+static void
+vl_api_lcp_itf_pair_replace_end_t_handler (
+  vl_api_lcp_itf_pair_replace_end_t *mp)
+{
+  vl_api_lcp_itf_pair_replace_end_reply_t *rmp;
+  int rv = 0;
+
+  rv = lcp_itf_pair_replace_end ();
+
+  REPLY_MACRO (VL_API_LCP_ITF_PAIR_REPLACE_END_REPLY);
+}
+
+/*
+ * Set up the API message handling tables
+ */
+#include <linux-cp/lcp.api.c>
+
+static clib_error_t *
+lcp_plugin_api_hookup (vlib_main_t *vm)
+{
+  /* Ask for a correctly-sized block of API message decode slots */
+  lcp_msg_id_base = setup_message_id_table ();
+
+  return (NULL);
+}
+
+VLIB_INIT_FUNCTION (lcp_plugin_api_hookup);
+
+#include <vpp/app/version.h>
+VLIB_PLUGIN_REGISTER () = {
+  .version = VPP_BUILD_VER,
+  .description = "Linux Control Plane - Interface Mirror",
+  .default_disabled = 1,
+};
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/linux-cp/lcp_cli.c b/src/plugins/linux-cp/lcp_cli.c
new file mode 100644 (file)
index 0000000..3b0598c
--- /dev/null
@@ -0,0 +1,236 @@
+/* Hey Emacs use -*- mode: C -*- */
+/*
+ * Copyright 2020 Rubicon Communications, LLC.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <sys/socket.h>
+#include <linux/if.h>
+
+#include <vnet/vnet.h>
+#include <vnet/plugin/plugin.h>
+
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+#include <vpp/app/version.h>
+#include <vnet/format_fns.h>
+
+#include <plugins/linux-cp/lcp_interface.h>
+
+static clib_error_t *
+lcp_itf_pair_create_command_fn (vlib_main_t *vm, unformat_input_t *input,
+                               vlib_cli_command_t *cmd)
+{
+  unformat_input_t _line_input, *line_input = &_line_input;
+  vnet_main_t *vnm = vnet_get_main ();
+  u32 sw_if_index;
+  u8 *host_if_name;
+  lip_host_type_t host_if_type;
+  u8 *ns;
+  int r;
+
+  if (!unformat_user (input, unformat_line_input, line_input))
+    return 0;
+
+  sw_if_index = ~0;
+  host_if_name = ns = NULL;
+  host_if_type = LCP_ITF_HOST_TAP;
+
+  while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+    {
+      if (unformat (line_input, "%d", &sw_if_index))
+       ;
+      else if (unformat (line_input, "%U", unformat_vnet_sw_interface, vnm,
+                        &sw_if_index))
+       ;
+      else if (unformat (line_input, "host-if %s", &host_if_name))
+       ;
+      else if (unformat (line_input, "netns %s", &ns))
+       ;
+      else if (unformat (line_input, "tun"))
+       host_if_type = LCP_ITF_HOST_TUN;
+      else
+       {
+         unformat_free (line_input);
+         vec_free (host_if_name);
+         vec_free (ns);
+         return clib_error_return (0, "unknown input `%U'",
+                                   format_unformat_error, input);
+       }
+    }
+
+  unformat_free (line_input);
+
+  if (sw_if_index == ~0)
+    {
+      vec_free (host_if_name);
+      vec_free (ns);
+      return clib_error_return (0, "interface name or sw_if_index required");
+    }
+
+  if (vec_len (ns) >= LCP_NS_LEN)
+    {
+      vec_free (host_if_name);
+      vec_free (ns);
+      return clib_error_return (
+       0, "Namespace name should be fewer than %d characters", LCP_NS_LEN);
+    }
+
+  r = lcp_itf_pair_create (sw_if_index, host_if_name, host_if_type, ns);
+
+  vec_free (host_if_name);
+  vec_free (ns);
+
+  if (r)
+    return clib_error_return (0, "linux-cp pair creation failed (%d)", r);
+
+  return 0;
+}
+
+VLIB_CLI_COMMAND (lcp_itf_pair_create_command, static) = {
+  .path = "lcp create",
+  .short_help = "lcp create <sw_if_index>|<if-name> host-if <host-if-name> "
+               "netns <namespace> [tun]",
+  .function = lcp_itf_pair_create_command_fn,
+};
+
+static clib_error_t *
+lcp_default_netns_command_fn (vlib_main_t *vm, unformat_input_t *input,
+                             vlib_cli_command_t *cmd)
+{
+  unformat_input_t _line_input, *line_input = &_line_input;
+  u8 *ns;
+  int r;
+
+  if (!unformat_user (input, unformat_line_input, line_input))
+    return 0;
+
+  ns = 0;
+
+  while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+    {
+      if (unformat (line_input, "netns %s", &ns))
+       ;
+      else if (unformat (line_input, "clear netns"))
+       ;
+    }
+
+  unformat_free (line_input);
+
+  vlib_cli_output (vm, "lcp set default netns '%s'\n", (char *) ns);
+
+  r = lcp_set_default_ns (ns);
+
+  if (r)
+    return clib_error_return (0, "linux-cp set default netns failed (%d)", r);
+
+  return 0;
+}
+
+VLIB_CLI_COMMAND (lcp_default_netns_command, static) = {
+  .path = "lcp default",
+  .short_help = "lcp default netns [<namespace>]",
+  .function = lcp_default_netns_command_fn,
+};
+
+static clib_error_t *
+lcp_itf_pair_delete_command_fn (vlib_main_t *vm, unformat_input_t *input,
+                               vlib_cli_command_t *cmd)
+{
+  vnet_main_t *vnm = vnet_get_main ();
+  unformat_input_t _line_input, *line_input = &_line_input;
+  u32 sw_if_index;
+  int r;
+
+  if (!unformat_user (input, unformat_line_input, line_input))
+    return 0;
+
+  sw_if_index = ~0;
+
+  while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+    {
+      if (unformat (line_input, "%d", &sw_if_index))
+       ;
+      else if (unformat (line_input, "%U", unformat_vnet_sw_interface, vnm,
+                        &sw_if_index))
+       ;
+      else
+       return clib_error_return (0, "unknown input `%U'",
+                                 format_unformat_error, input);
+    }
+
+  unformat_free (line_input);
+
+  if (sw_if_index == ~0)
+    return clib_error_return (0, "interface name or sw_if_index required");
+
+  r = lcp_itf_pair_delete (sw_if_index);
+
+  if (r)
+    return clib_error_return (0, "linux-cp pair deletion failed (%d)", r);
+  return 0;
+}
+
+VLIB_CLI_COMMAND (lcp_itf_pair_delete_command, static) = {
+  .path = "lcp delete",
+  .short_help = "lcp delete <sw_if_index>|<if-name>",
+  .function = lcp_itf_pair_delete_command_fn,
+};
+
+static clib_error_t *
+lcp_itf_pair_show_cmd (vlib_main_t *vm, unformat_input_t *input,
+                      vlib_cli_command_t *cmd)
+{
+  vnet_main_t *vnm = vnet_get_main ();
+  u32 phy_sw_if_index;
+
+  phy_sw_if_index = ~0;
+
+  while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+    {
+      if (unformat (input, "phy %U", unformat_vnet_sw_interface, vnm,
+                   &phy_sw_if_index))
+       ;
+      else
+       return clib_error_return (0, "unknown input '%U'",
+                                 format_unformat_error, input);
+    }
+
+  lcp_itf_pair_show (phy_sw_if_index);
+
+  return 0;
+}
+
+VLIB_CLI_COMMAND (lcp_itf_pair_show_cmd_node, static) = {
+  .path = "show lcp",
+  .function = lcp_itf_pair_show_cmd,
+  .short_help = "show lcp [phy <interface>]",
+  .is_mp_safe = 1,
+};
+
+clib_error_t *
+lcp_cli_init (vlib_main_t *vm)
+{
+  return 0;
+}
+
+VLIB_INIT_FUNCTION (lcp_cli_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/linux-cp/lcp_interface.c b/src/plugins/linux-cp/lcp_interface.c
new file mode 100644 (file)
index 0000000..534d974
--- /dev/null
@@ -0,0 +1,1016 @@
+/*
+ * Copyright (c) 2020 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#define _GNU_SOURCE
+#include <sched.h>
+#include <fcntl.h>
+#include <ctype.h>
+#include <sys/socket.h>
+#include <net/if.h>
+
+#include <linux-cp/lcp_interface.h>
+#include <netlink/route/link/vlan.h>
+
+#include <vnet/plugin/plugin.h>
+#include <vnet/plugin/plugin.h>
+
+#include <vnet/ip/ip_punt_drop.h>
+#include <vnet/fib/fib_table.h>
+#include <vnet/adj/adj_mcast.h>
+#include <vnet/udp/udp.h>
+#include <vnet/tcp/tcp.h>
+#include <vnet/devices/tap/tap.h>
+#include <vnet/devices/virtio/virtio.h>
+#include <vnet/devices/netlink.h>
+#include <vlibapi/api_helper_macros.h>
+#include <vnet/ipsec/ipsec_punt.h>
+
+static vlib_log_class_t lcp_itf_pair_logger;
+
+/**
+ * Pool of LIP objects
+ */
+lcp_itf_pair_t *lcp_itf_pair_pool;
+
+u32
+lcp_itf_num_pairs (void)
+{
+  return pool_elts (lcp_itf_pair_pool);
+}
+
+/**
+ * DBs of interface-pair objects:
+ *  - key'd by VIF (linux ID)
+ *  - key'd by VPP's physical interface
+ *  - number of shared uses of VPP's tap/host interface
+ */
+static uword *lip_db_by_vif;
+index_t *lip_db_by_phy;
+u32 *lip_db_by_host;
+
+#define LCP_ITF_PAIR_DBG(...)                                                 \
+  vlib_log_notice (lcp_itf_pair_logger, __VA_ARGS__);
+
+#define LCP_ITF_PAIR_INFO(...)                                                \
+  vlib_log_notice (lcp_itf_pair_logger, __VA_ARGS__);
+
+u8 *
+format_lcp_itf_pair (u8 *s, va_list *args)
+{
+  vnet_main_t *vnm = vnet_get_main ();
+  lcp_itf_pair_t *lip = va_arg (*args, lcp_itf_pair_t *);
+  vnet_sw_interface_t *swif_phy;
+  vnet_sw_interface_t *swif_host;
+
+  s = format (s, "itf-pair: [%d]", lip - lcp_itf_pair_pool);
+
+  swif_phy = vnet_get_sw_interface_or_null (vnm, lip->lip_phy_sw_if_index);
+  if (!swif_phy)
+    s = format (s, " <no-phy-if>");
+  else
+    s = format (s, " %U", format_vnet_sw_interface_name, vnm, swif_phy);
+
+  swif_host = vnet_get_sw_interface_or_null (vnm, lip->lip_host_sw_if_index);
+  if (!swif_host)
+    s = format (s, " <no-host-if>");
+  else
+    s = format (s, " %U", format_vnet_sw_interface_name, vnm, swif_host);
+
+  s = format (s, " %s %d type %s", lip->lip_host_name, lip->lip_vif_index,
+             (lip->lip_host_type == LCP_ITF_HOST_TAP) ? "tap" : "tun");
+
+  if (lip->lip_namespace)
+    s = format (s, " netns %s", lip->lip_namespace);
+
+  return s;
+}
+
+static walk_rc_t
+lcp_itf_pair_walk_show_cb (index_t api, void *ctx)
+{
+  vlib_main_t *vm;
+  lcp_itf_pair_t *lip;
+
+  lip = lcp_itf_pair_get (api);
+  if (!lip)
+    return WALK_STOP;
+
+  vm = vlib_get_main ();
+  vlib_cli_output (vm, "%U\n", format_lcp_itf_pair, lip);
+
+  return WALK_CONTINUE;
+}
+
+void
+lcp_itf_pair_show (u32 phy_sw_if_index)
+{
+  vlib_main_t *vm;
+  u8 *ns;
+  index_t api;
+
+  vm = vlib_get_main ();
+  ns = lcp_get_default_ns ();
+  vlib_cli_output (vm, "lcp default netns '%s'\n",
+                  ns ? (char *) ns : "<unset>");
+
+  if (phy_sw_if_index == ~0)
+    {
+      lcp_itf_pair_walk (lcp_itf_pair_walk_show_cb, 0);
+    }
+  else
+    {
+      api = lcp_itf_pair_find_by_phy (phy_sw_if_index);
+      if (api != INDEX_INVALID)
+       lcp_itf_pair_walk_show_cb (api, 0);
+    }
+}
+
+lcp_itf_pair_t *
+lcp_itf_pair_get (u32 index)
+{
+  return pool_elt_at_index (lcp_itf_pair_pool, index);
+}
+
+index_t
+lcp_itf_pair_find_by_vif (u32 vif_index)
+{
+  uword *p;
+
+  p = hash_get (lip_db_by_vif, vif_index);
+
+  if (p)
+    return p[0];
+
+  return INDEX_INVALID;
+}
+
+int
+lcp_itf_pair_add_sub (u32 vif, u8 *host_if_name, u32 sub_sw_if_index,
+                     u32 phy_sw_if_index, u8 *ns)
+{
+  lcp_itf_pair_t *lip;
+
+  lip = lcp_itf_pair_get (lcp_itf_pair_find_by_phy (phy_sw_if_index));
+
+  return lcp_itf_pair_add (lip->lip_host_sw_if_index, sub_sw_if_index,
+                          host_if_name, vif, lip->lip_host_type, ns);
+}
+
+const char *lcp_itf_l3_feat_names[N_LCP_ITF_HOST][N_AF] = {
+  [LCP_ITF_HOST_TAP] = {
+    [AF_IP4] = "linux-cp-xc-ip4",
+    [AF_IP6] = "linux-cp-xc-ip6",
+  },
+  [LCP_ITF_HOST_TUN] = {
+    [AF_IP4] = "linux-cp-xc-l3-ip4",
+    [AF_IP6] = "linux-cp-xc-l3-ip6",
+  },
+};
+
+const fib_route_path_flags_t lcp_itf_route_path_flags[N_LCP_ITF_HOST] = {
+  [LCP_ITF_HOST_TAP] = FIB_ROUTE_PATH_DVR,
+  [LCP_ITF_HOST_TUN] = FIB_ROUTE_PATH_FLAG_NONE,
+};
+
+static void
+lcp_itf_unset_adjs (lcp_itf_pair_t *lip)
+{
+  adj_unlock (lip->lip_phy_adjs.adj_index[AF_IP4]);
+  adj_unlock (lip->lip_phy_adjs.adj_index[AF_IP6]);
+}
+
+static void
+lcp_itf_set_adjs (lcp_itf_pair_t *lip)
+{
+  if (lip->lip_host_type == LCP_ITF_HOST_TUN)
+    {
+      lip->lip_phy_adjs.adj_index[AF_IP4] = adj_nbr_add_or_lock (
+       FIB_PROTOCOL_IP4, VNET_LINK_IP4, &zero_addr, lip->lip_phy_sw_if_index);
+      lip->lip_phy_adjs.adj_index[AF_IP6] = adj_nbr_add_or_lock (
+       FIB_PROTOCOL_IP6, VNET_LINK_IP6, &zero_addr, lip->lip_phy_sw_if_index);
+    }
+  else
+    {
+      lip->lip_phy_adjs.adj_index[AF_IP4] = adj_mcast_add_or_lock (
+       FIB_PROTOCOL_IP4, VNET_LINK_IP4, lip->lip_phy_sw_if_index);
+      lip->lip_phy_adjs.adj_index[AF_IP6] = adj_mcast_add_or_lock (
+       FIB_PROTOCOL_IP6, VNET_LINK_IP6, lip->lip_phy_sw_if_index);
+    }
+
+  ip_adjacency_t *adj;
+
+  adj = adj_get (lip->lip_phy_adjs.adj_index[AF_IP4]);
+
+  lip->lip_rewrite_len = adj->rewrite_header.data_bytes;
+}
+
+int __clib_weak
+lcp_nl_drain_messages (void)
+{
+  return 0;
+}
+
+int
+lcp_itf_pair_add (u32 host_sw_if_index, u32 phy_sw_if_index, u8 *host_name,
+                 u32 host_index, lip_host_type_t host_type, u8 *ns)
+{
+  index_t lipi;
+  lcp_itf_pair_t *lip;
+
+  lipi = lcp_itf_pair_find_by_phy (phy_sw_if_index);
+
+  LCP_ITF_PAIR_INFO ("add: host:%U phy:%U, host_if:%v vif:%d ns:%v",
+                    format_vnet_sw_if_index_name, vnet_get_main (),
+                    host_sw_if_index, format_vnet_sw_if_index_name,
+                    vnet_get_main (), phy_sw_if_index, host_name, host_index,
+                    ns);
+
+  if (lipi != INDEX_INVALID)
+    return VNET_API_ERROR_VALUE_EXIST;
+
+  /*
+   * Drain netlink messages before adding the new pair.
+   * This avoids unnecessarily applying messages that were generated by
+   * the creation of the tap/tun interface. By processing them before we
+   * store the pair data, we will ensure that they are ignored.
+   */
+  lcp_nl_drain_messages ();
+
+  /*
+   * Create a new pair.
+   */
+  pool_get (lcp_itf_pair_pool, lip);
+
+  lipi = lip - lcp_itf_pair_pool;
+
+  vec_validate_init_empty (lip_db_by_phy, phy_sw_if_index, INDEX_INVALID);
+  vec_validate_init_empty (lip_db_by_host, host_sw_if_index, INDEX_INVALID);
+  lip_db_by_phy[phy_sw_if_index] = lipi;
+  lip_db_by_host[host_sw_if_index] = lipi;
+  hash_set (lip_db_by_vif, host_index, lipi);
+
+  lip->lip_host_sw_if_index = host_sw_if_index;
+  lip->lip_phy_sw_if_index = phy_sw_if_index;
+  lip->lip_host_name = vec_dup (host_name);
+  lip->lip_host_type = host_type;
+  lip->lip_vif_index = host_index;
+  lip->lip_namespace = vec_dup (ns);
+  lip->lip_create_ts = vlib_time_now (vlib_get_main ());
+
+  if (lip->lip_host_sw_if_index == ~0)
+    return 0;
+
+  /*
+   * First use of this host interface.
+   * Enable the x-connect feature on the host to send
+   * all packets to the phy.
+   */
+  ip_address_family_t af;
+
+  FOR_EACH_IP_ADDRESS_FAMILY (af)
+  ip_feature_enable_disable (af, N_SAFI, IP_FEATURE_INPUT,
+                            lcp_itf_l3_feat_names[lip->lip_host_type][af],
+                            lip->lip_host_sw_if_index, 1, NULL, 0);
+
+  /*
+   * Configure passive punt to the host interface.
+   */
+  fib_route_path_t *rpaths = NULL, rpath = {
+    .frp_flags = lcp_itf_route_path_flags[lip->lip_host_type],
+    .frp_proto = DPO_PROTO_IP4,
+    .frp_sw_if_index = lip->lip_host_sw_if_index,
+    .frp_weight = 1,
+    .frp_fib_index = ~0,
+  };
+
+  vec_add1 (rpaths, rpath);
+
+  ip4_punt_redirect_add_paths (lip->lip_phy_sw_if_index, rpaths);
+
+  rpaths[0].frp_proto = DPO_PROTO_IP6;
+
+  ip6_punt_redirect_add_paths (lip->lip_phy_sw_if_index, rpaths);
+
+  vec_free (rpaths);
+
+  lcp_itf_set_adjs (lip);
+
+  /* enable ARP feature node for broadcast interfaces */
+  if (lip->lip_host_type != LCP_ITF_HOST_TUN)
+    {
+      vnet_feature_enable_disable ("arp", "linux-cp-arp-phy",
+                                  lip->lip_phy_sw_if_index, 1, NULL, 0);
+      vnet_feature_enable_disable ("arp", "linux-cp-arp-host",
+                                  lip->lip_host_sw_if_index, 1, NULL, 0);
+    }
+  else
+    {
+      vnet_feature_enable_disable ("ip4-punt", "linux-cp-punt-l3", 0, 1, NULL,
+                                  0);
+      vnet_feature_enable_disable ("ip6-punt", "linux-cp-punt-l3", 0, 1, NULL,
+                                  0);
+    }
+
+  return 0;
+}
+
+static clib_error_t *
+lcp_netlink_add_link_vlan (int parent, u32 vlan, const char *name)
+{
+  struct rtnl_link *link;
+  struct nl_sock *sk;
+  int err;
+
+  sk = nl_socket_alloc ();
+  if ((err = nl_connect (sk, NETLINK_ROUTE)) < 0)
+    return clib_error_return (NULL, "Unable to connect socket: %d", err);
+
+  link = rtnl_link_vlan_alloc ();
+
+  rtnl_link_set_link (link, parent);
+  rtnl_link_set_name (link, name);
+
+  rtnl_link_vlan_set_id (link, vlan);
+
+  if ((err = rtnl_link_add (sk, link, NLM_F_CREATE)) < 0)
+    return clib_error_return (NULL, "Unable to add link %s: %d", name, err);
+
+  rtnl_link_put (link);
+  nl_close (sk);
+
+  return NULL;
+}
+
+static clib_error_t *
+lcp_netlink_del_link (const char *name)
+{
+  struct rtnl_link *link;
+  struct nl_sock *sk;
+  int err;
+
+  sk = nl_socket_alloc ();
+  if ((err = nl_connect (sk, NETLINK_ROUTE)) < 0)
+    return clib_error_return (NULL, "Unable to connect socket: %d", err);
+
+  link = rtnl_link_alloc ();
+  rtnl_link_set_name (link, name);
+
+  if ((err = rtnl_link_delete (sk, link)) < 0)
+    return clib_error_return (NULL, "Unable to del link %s: %d", name, err);
+
+  rtnl_link_put (link);
+  nl_close (sk);
+
+  return NULL;
+}
+
+int
+lcp_itf_pair_del (u32 phy_sw_if_index)
+{
+  ip_address_family_t af;
+  lcp_itf_pair_t *lip;
+  u32 lipi;
+
+  lipi = lcp_itf_pair_find_by_phy (phy_sw_if_index);
+
+  if (lipi == INDEX_INVALID)
+    return VNET_API_ERROR_INVALID_SW_IF_INDEX;
+
+  lip = lcp_itf_pair_get (lipi);
+
+  LCP_ITF_PAIR_INFO ("pair delete: {%U, %U, %s}", format_vnet_sw_if_index_name,
+                    vnet_get_main (), lip->lip_phy_sw_if_index,
+                    format_vnet_sw_if_index_name, vnet_get_main (),
+                    lip->lip_host_sw_if_index, lip->lip_host_name);
+
+  FOR_EACH_IP_ADDRESS_FAMILY (af)
+  ip_feature_enable_disable (af, N_SAFI, IP_FEATURE_INPUT,
+                            lcp_itf_l3_feat_names[lip->lip_host_type][af],
+                            lip->lip_host_sw_if_index, 0, NULL, 0);
+
+  lcp_itf_unset_adjs (lip);
+
+  ip4_punt_redirect_del (lip->lip_phy_sw_if_index);
+  ip6_punt_redirect_del (lip->lip_phy_sw_if_index);
+
+  /* disable ARP feature node for broadcast interfaces */
+  if (lip->lip_host_type != LCP_ITF_HOST_TUN)
+    {
+      vnet_feature_enable_disable ("arp", "linux-cp-arp-phy",
+                                  lip->lip_phy_sw_if_index, 0, NULL, 0);
+      vnet_feature_enable_disable ("arp", "linux-cp-arp-host",
+                                  lip->lip_host_sw_if_index, 0, NULL, 0);
+    }
+  else
+    {
+      vnet_feature_enable_disable ("ip4-punt", "linux-cp-punt-l3", 0, 0, NULL,
+                                  0);
+      vnet_feature_enable_disable ("ip6-punt", "linux-cp-punt-l3", 0, 0, NULL,
+                                  0);
+    }
+
+  lip_db_by_phy[phy_sw_if_index] = INDEX_INVALID;
+  lip_db_by_phy[lip->lip_host_sw_if_index] = INDEX_INVALID;
+
+  vec_free (lip->lip_host_name);
+  vec_free (lip->lip_namespace);
+  pool_put (lcp_itf_pair_pool, lip);
+
+  return 0;
+}
+
+static void
+lcp_itf_pair_delete_by_index (index_t lipi)
+{
+  u32 host_sw_if_index;
+  lcp_itf_pair_t *lip;
+  u8 *host_name;
+
+  lip = lcp_itf_pair_get (lipi);
+
+  host_name = vec_dup (lip->lip_host_name);
+  host_sw_if_index = lip->lip_host_sw_if_index;
+
+  lcp_itf_pair_del (lip->lip_phy_sw_if_index);
+
+  if (vnet_sw_interface_is_sub (vnet_get_main (), host_sw_if_index))
+    {
+      lcp_netlink_del_link ((const char *) host_name);
+      vnet_delete_sub_interface (host_sw_if_index);
+    }
+  else
+    tap_delete_if (vlib_get_main (), host_sw_if_index);
+
+  vec_free (host_name);
+}
+
+int
+lcp_itf_pair_delete (u32 phy_sw_if_index)
+{
+  index_t lipi;
+
+  lipi = lcp_itf_pair_find_by_phy (phy_sw_if_index);
+
+  if (lipi == INDEX_INVALID)
+    return VNET_API_ERROR_INVALID_SW_IF_INDEX;
+
+  lcp_itf_pair_delete_by_index (lipi);
+
+  return 0;
+}
+
+void
+lcp_itf_pair_walk (lcp_itf_pair_walk_cb_t cb, void *ctx)
+{
+  u32 api;
+
+  pool_foreach_index (api, lcp_itf_pair_pool)
+    {
+      if (!cb (api, ctx))
+       break;
+    };
+}
+
+typedef struct lcp_itf_pair_names_t_
+{
+  u8 *lipn_host_name;
+  u8 *lipn_phy_name;
+  u8 *lipn_namespace;
+  u32 lipn_phy_sw_if_index;
+} lcp_itf_pair_names_t;
+
+static lcp_itf_pair_names_t *lipn_names;
+
+static clib_error_t *
+lcp_itf_pair_config (vlib_main_t *vm, unformat_input_t *input)
+{
+  u8 *host, *phy;
+  u8 *ns;
+  u8 *default_ns;
+
+  host = phy = ns = default_ns = NULL;
+
+  while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+    {
+      vec_reset_length (host);
+
+      if (unformat (input, "pair %s %s %s", &phy, &host, &ns))
+       {
+         lcp_itf_pair_names_t *lipn;
+
+         if (vec_len (ns) > LCP_NS_LEN)
+           {
+             return clib_error_return (0,
+                                       "linux-cp IF namespace must"
+                                       " be less than %d characters",
+                                       LCP_NS_LEN);
+           }
+
+         vec_add2 (lipn_names, lipn, 1);
+
+         lipn->lipn_host_name = vec_dup (host);
+         lipn->lipn_phy_name = vec_dup (phy);
+         lipn->lipn_namespace = vec_dup (ns);
+       }
+      else if (unformat (input, "pair %v %v", &phy, &host))
+       {
+         lcp_itf_pair_names_t *lipn;
+
+         vec_add2 (lipn_names, lipn, 1);
+
+         lipn->lipn_host_name = vec_dup (host);
+         lipn->lipn_phy_name = vec_dup (phy);
+         lipn->lipn_namespace = 0;
+       }
+      else if (unformat (input, "default netns %v", &default_ns))
+       {
+         vec_add1 (default_ns, 0);
+         if (lcp_set_default_ns (default_ns) < 0)
+           {
+             return clib_error_return (0,
+                                       "linux-cp default namespace must"
+                                       " be less than %d characters",
+                                       LCP_NS_LEN);
+           }
+       }
+      else if (unformat (input, "interface-auto-create"))
+       lcp_set_auto_intf (1 /* is_auto */);
+      else
+       return clib_error_return (0, "interfaces not found");
+    }
+
+  vec_free (host);
+  vec_free (phy);
+  vec_free (default_ns);
+
+  return NULL;
+}
+
+VLIB_EARLY_CONFIG_FUNCTION (lcp_itf_pair_config, "linux-cp");
+
+/*
+ * Returns 1 if the tap name is valid.
+ * Returns 0 if the tap name is invalid.
+ */
+static int
+lcp_validate_if_name (u8 *name)
+{
+  int len;
+  char *p;
+
+  p = (char *) name;
+  len = clib_strnlen (p, IFNAMSIZ);
+  if (len >= IFNAMSIZ)
+    return 0;
+
+  for (; *p; ++p)
+    {
+      if (isalnum (*p))
+       continue;
+
+      switch (*p)
+       {
+       case '-':
+       case '_':
+       case '%':
+       case '@':
+       case ':':
+       case '.':
+         continue;
+       }
+
+      return 0;
+    }
+
+  return 1;
+}
+
+static int
+lcp_itf_get_ns_fd (char *ns_name)
+{
+  char ns_path[256] = "/proc/self/ns/net";
+
+  if (ns_name)
+    snprintf (ns_path, sizeof (ns_path) - 1, "/var/run/netns/%s", ns_name);
+
+  return open (ns_path, O_RDONLY);
+}
+
+static void
+lcp_itf_set_vif_link_state (u32 vif_index, u8 up, u8 *ns)
+{
+  int curr_ns_fd, vif_ns_fd;
+
+  curr_ns_fd = vif_ns_fd = -1;
+
+  if (ns)
+    {
+      u8 *ns_path = 0;
+
+      curr_ns_fd = open ("/proc/self/ns/net", O_RDONLY);
+      ns_path = format (0, "/var/run/netns/%s%c", (char *) ns, 0);
+      vif_ns_fd = open ((char *) ns_path, O_RDONLY);
+      if (vif_ns_fd != -1)
+       setns (vif_ns_fd, CLONE_NEWNET);
+    }
+
+  vnet_netlink_set_link_state (vif_index, up);
+
+  if (vif_ns_fd != -1)
+    close (vif_ns_fd);
+
+  if (curr_ns_fd != -1)
+    {
+      setns (curr_ns_fd, CLONE_NEWNET);
+      close (curr_ns_fd);
+    }
+}
+
+int
+lcp_itf_pair_create (u32 phy_sw_if_index, u8 *host_if_name,
+                    lip_host_type_t host_if_type, u8 *ns)
+{
+  vlib_main_t *vm;
+  vnet_main_t *vnm;
+  u32 vif_index = 0, host_sw_if_index;
+  const vnet_sw_interface_t *sw;
+  const vnet_hw_interface_t *hw;
+
+  if (!vnet_sw_if_index_is_api_valid (phy_sw_if_index))
+    return VNET_API_ERROR_INVALID_SW_IF_INDEX;
+
+  if (!lcp_validate_if_name (host_if_name))
+    return VNET_API_ERROR_INVALID_ARGUMENT;
+
+  vnm = vnet_get_main ();
+  sw = vnet_get_sw_interface (vnm, phy_sw_if_index);
+  hw = vnet_get_sup_hw_interface (vnm, phy_sw_if_index);
+
+  /*
+   * Use interface-specific netns if supplied.
+   * Otherwise, use default netns if defined.
+   * Otherwise ignore a netns and use the OS default.
+   */
+  if (ns == 0 || ns[0] == 0)
+    ns = lcp_get_default_ns ();
+
+  /* sub interfaces do not need a tap created */
+  if (vnet_sw_interface_is_sub (vnm, phy_sw_if_index))
+    {
+      const lcp_itf_pair_t *lip;
+      int orig_ns_fd, ns_fd;
+      clib_error_t *err;
+      u16 vlan;
+
+      /*
+       * Find the parent tap by finding the pair from the parent phy
+       */
+      lip = lcp_itf_pair_get (lcp_itf_pair_find_by_phy (sw->sup_sw_if_index));
+      vlan = sw->sub.eth.outer_vlan_id;
+
+      /*
+       * see if the requested host interface has already been created
+       */
+      orig_ns_fd = ns_fd = -1;
+      err = NULL;
+
+      if (ns && ns[0] != 0)
+       {
+         orig_ns_fd = lcp_itf_get_ns_fd (NULL);
+         ns_fd = lcp_itf_get_ns_fd ((char *) ns);
+         if (orig_ns_fd == -1 || ns_fd == -1)
+           goto socket_close;
+
+         setns (ns_fd, CLONE_NEWNET);
+       }
+
+      vif_index = if_nametoindex ((const char *) host_if_name);
+
+      if (!vif_index)
+       {
+         /*
+          * no existing host interface, create it now
+          */
+         err = lcp_netlink_add_link_vlan (lip->lip_vif_index, vlan,
+                                          (const char *) host_if_name);
+
+         if (!err && -1 != ns_fd)
+           err = vnet_netlink_set_link_netns (vif_index, ns_fd, NULL);
+
+         if (!err)
+           vif_index = if_nametoindex ((char *) host_if_name);
+       }
+
+      /*
+       * create a sub-interface on the tap
+       */
+      if (!err && vnet_create_sub_interface (lip->lip_host_sw_if_index,
+                                            sw->sub.id, sw->sub.eth.raw_flags,
+                                            sw->sub.eth.inner_vlan_id, vlan,
+                                            &host_sw_if_index))
+       LCP_ITF_PAIR_INFO ("failed create vlan: %d on %U", vlan,
+                          format_vnet_sw_if_index_name, vnet_get_main (),
+                          lip->lip_host_sw_if_index);
+
+    socket_close:
+      if (orig_ns_fd != -1)
+       {
+         setns (orig_ns_fd, CLONE_NEWNET);
+         close (orig_ns_fd);
+       }
+      if (ns_fd != -1)
+       close (ns_fd);
+
+      if (err)
+       return VNET_API_ERROR_INVALID_ARGUMENT;
+    }
+  else
+    {
+      tap_create_if_args_t args = {
+       .num_rx_queues = clib_max (1, vlib_num_workers ()),
+       .id = hw->hw_if_index,
+       .sw_if_index = ~0,
+       .rx_ring_sz = 256,
+       .tx_ring_sz = 256,
+       .host_if_name = host_if_name,
+       .host_namespace = 0,
+      };
+      ethernet_interface_t *ei;
+
+      if (host_if_type == LCP_ITF_HOST_TUN)
+       args.tap_flags |= TAP_FLAG_TUN;
+      else
+       {
+         ei = pool_elt_at_index (ethernet_main.interfaces, hw->hw_instance);
+         mac_address_copy (&args.host_mac_addr, &ei->address.mac);
+       }
+
+      if (sw->mtu[VNET_MTU_L3])
+       {
+         args.host_mtu_set = 1;
+         args.host_mtu_size = sw->mtu[VNET_MTU_L3];
+       }
+
+      if (ns && ns[0] != 0)
+       args.host_namespace = ns;
+
+      vm = vlib_get_main ();
+      tap_create_if (vm, &args);
+
+      if (args.rv < 0)
+       {
+         return args.rv;
+       }
+
+      /*
+       * get the hw and ethernet of the tap
+       */
+      hw = vnet_get_sup_hw_interface (vnm, args.sw_if_index);
+
+      /*
+       * Set the interface down on the host side.
+       * This controls whether the host can RX/TX.
+       */
+      virtio_main_t *mm = &virtio_main;
+      virtio_if_t *vif = pool_elt_at_index (mm->interfaces, hw->dev_instance);
+
+      lcp_itf_set_vif_link_state (vif->ifindex, 0 /* down */,
+                                 args.host_namespace);
+
+      /*
+       * Leave the TAP permanently up on the VPP side.
+       * This TAP will be shared by many sub-interface.
+       * Therefore we can't use it to manage admin state.
+       * force the tap in promiscuous mode.
+       */
+      if (host_if_type == LCP_ITF_HOST_TAP)
+       {
+         ei = pool_elt_at_index (ethernet_main.interfaces, hw->hw_instance);
+         ei->flags |= ETHERNET_INTERFACE_FLAG_STATUS_L3;
+       }
+
+      vif_index = vif->ifindex;
+      host_sw_if_index = args.sw_if_index;
+    }
+
+  if (!vif_index)
+    {
+      LCP_ITF_PAIR_INFO ("failed pair add (no vif index): {%U, %U, %s}",
+                        format_vnet_sw_if_index_name, vnet_get_main (),
+                        phy_sw_if_index, format_vnet_sw_if_index_name,
+                        vnet_get_main (), host_sw_if_index, host_if_name);
+      return -1;
+    }
+
+  vnet_sw_interface_admin_up (vnm, host_sw_if_index);
+  lcp_itf_pair_add (host_sw_if_index, phy_sw_if_index, host_if_name, vif_index,
+                   host_if_type, ns);
+
+  LCP_ITF_PAIR_INFO ("pair create: {%U, %U, %s}", format_vnet_sw_if_index_name,
+                    vnet_get_main (), phy_sw_if_index,
+                    format_vnet_sw_if_index_name, vnet_get_main (),
+                    host_sw_if_index, host_if_name);
+
+  return 0;
+}
+
+static walk_rc_t
+lcp_itf_pair_walk_mark (index_t lipi, void *ctx)
+{
+  lcp_itf_pair_t *lip;
+
+  lip = lcp_itf_pair_get (lipi);
+
+  lip->lip_flags |= LIP_FLAG_STALE;
+
+  return (WALK_CONTINUE);
+}
+
+int
+lcp_itf_pair_replace_begin (void)
+{
+  lcp_itf_pair_walk (lcp_itf_pair_walk_mark, NULL);
+
+  return (0);
+}
+
+typedef struct lcp_itf_pair_sweep_ctx_t_
+{
+  index_t *indicies;
+} lcp_itf_pair_sweep_ctx_t;
+
+static walk_rc_t
+lcp_itf_pair_walk_sweep (index_t lipi, void *arg)
+{
+  lcp_itf_pair_sweep_ctx_t *ctx = arg;
+  lcp_itf_pair_t *lip;
+
+  lip = lcp_itf_pair_get (lipi);
+
+  if (lip->lip_flags & LIP_FLAG_STALE)
+    vec_add1 (ctx->indicies, lipi);
+
+  return (WALK_CONTINUE);
+}
+
+int
+lcp_itf_pair_replace_end (void)
+{
+  lcp_itf_pair_sweep_ctx_t ctx = {
+    .indicies = NULL,
+  };
+  index_t *lipi;
+
+  lcp_itf_pair_walk (lcp_itf_pair_walk_sweep, &ctx);
+
+  vec_foreach (lipi, ctx.indicies)
+    lcp_itf_pair_delete_by_index (*lipi);
+
+  vec_free (ctx.indicies);
+  return (0);
+}
+
+static uword
+lcp_itf_pair_process (vlib_main_t *vm, vlib_node_runtime_t *rt,
+                     vlib_frame_t *f)
+{
+  uword *event_data = 0;
+  uword *lipn_index;
+
+  while (1)
+    {
+      vlib_process_wait_for_event (vm);
+
+      vlib_process_get_events (vm, &event_data);
+
+      vec_foreach (lipn_index, event_data)
+       {
+         lcp_itf_pair_names_t *lipn;
+
+         lipn = &lipn_names[*lipn_index];
+         lcp_itf_pair_create (lipn->lipn_phy_sw_if_index,
+                              lipn->lipn_host_name, LCP_ITF_HOST_TAP,
+                              lipn->lipn_namespace);
+       }
+
+      vec_reset_length (event_data);
+    }
+
+  return 0;
+}
+
+VLIB_REGISTER_NODE (lcp_itf_pair_process_node, static) = {
+  .function = lcp_itf_pair_process,
+  .name = "linux-cp-itf-process",
+  .type = VLIB_NODE_TYPE_PROCESS,
+};
+
+static clib_error_t *
+lcp_itf_phy_add (vnet_main_t *vnm, u32 sw_if_index, u32 is_create)
+{
+  lcp_itf_pair_names_t *lipn;
+  vlib_main_t *vm = vlib_get_main ();
+  vnet_hw_interface_t *hw;
+
+  if (!is_create || vnet_sw_interface_is_sub (vnm, sw_if_index))
+    return NULL;
+
+  hw = vnet_get_sup_hw_interface (vnm, sw_if_index);
+
+  vec_foreach (lipn, lipn_names)
+    {
+      if (!vec_cmp (hw->name, lipn->lipn_phy_name))
+       {
+         lipn->lipn_phy_sw_if_index = sw_if_index;
+
+         vlib_process_signal_event (vm, lcp_itf_pair_process_node.index, 0,
+                                    lipn - lipn_names);
+         break;
+       }
+    }
+
+  return NULL;
+}
+
+VNET_SW_INTERFACE_ADD_DEL_FUNCTION (lcp_itf_phy_add);
+
+static clib_error_t *
+lcp_itf_pair_link_up_down (vnet_main_t *vnm, u32 hw_if_index, u32 flags)
+{
+  vnet_hw_interface_t *hi;
+  vnet_sw_interface_t *si;
+  index_t lipi;
+  lcp_itf_pair_t *lip;
+
+  hi = vnet_get_hw_interface_or_null (vnm, hw_if_index);
+  if (!hi)
+    return 0;
+
+  lipi = lcp_itf_pair_find_by_phy (hi->sw_if_index);
+  if (lipi == INDEX_INVALID)
+    return 0;
+
+  lip = lcp_itf_pair_get (lipi);
+  si = vnet_get_sw_interface_or_null (vnm, lip->lip_host_sw_if_index);
+  if (!si)
+    return 0;
+
+  if (!lcp_main.test_mode)
+    {
+      tap_set_carrier (si->hw_if_index,
+                      (flags & VNET_HW_INTERFACE_FLAG_LINK_UP));
+
+      if (flags & VNET_HW_INTERFACE_FLAG_LINK_UP)
+       {
+         tap_set_speed (si->hw_if_index, hi->link_speed / 1000);
+       }
+    }
+
+  return 0;
+}
+
+VNET_HW_INTERFACE_LINK_UP_DOWN_FUNCTION (lcp_itf_pair_link_up_down);
+
+static clib_error_t *
+lcp_itf_pair_init (vlib_main_t *vm)
+{
+  vlib_punt_hdl_t punt_hdl = vlib_punt_client_register ("linux-cp");
+
+  /* punt IKE */
+  vlib_punt_register (punt_hdl, ipsec_punt_reason[IPSEC_PUNT_IP4_SPI_UDP_0],
+                     "linux-cp-punt");
+
+  /* punt all unknown ports */
+  udp_punt_unknown (vm, 0, 1);
+  udp_punt_unknown (vm, 1, 1);
+  tcp_punt_unknown (vm, 0, 1);
+  tcp_punt_unknown (vm, 1, 1);
+
+  lcp_itf_pair_logger = vlib_log_register_class ("linux-cp", "itf");
+
+  return NULL;
+}
+
+VLIB_INIT_FUNCTION (lcp_itf_pair_init) = {
+  .runs_after = VLIB_INITS ("vnet_interface_init", "tcp_init", "udp_init"),
+};
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/linux-cp/lcp_interface.h b/src/plugins/linux-cp/lcp_interface.h
new file mode 100644 (file)
index 0000000..d2f19e8
--- /dev/null
@@ -0,0 +1,160 @@
+/*
+ * Copyright (c) 2020 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __LCP_ITF_PAIR_H__
+#define __LCP_ITF_PAIR_H__
+
+#include <vnet/dpo/dpo.h>
+#include <vnet/adj/adj.h>
+#include <vnet/ip/ip_types.h>
+
+#include <plugins/linux-cp/lcp.h>
+
+#define foreach_lcp_itf_pair_flag _ (STALE, 0, "stale")
+
+typedef enum lip_flag_t_
+{
+#define _(a, b, c) LIP_FLAG_##a = (1 << b),
+  foreach_lcp_itf_pair_flag
+#undef _
+} lip_flag_t;
+
+typedef enum
+{
+  LCP_ITF_HOST_TAP = 0,
+  LCP_ITF_HOST_TUN = 1,
+} lip_host_type_t;
+
+#define N_LCP_ITF_HOST (LCP_ITF_HOST_TUN + 1)
+
+typedef struct lcp_itf_phy_adj
+{
+  adj_index_t adj_index[N_AF];
+} lcp_itf_phy_adj_t;
+
+/**
+ * A pair of interfaces
+ */
+typedef struct lcp_itf_pair_t_
+{
+  u32 lip_host_sw_if_index;      /* VPP's sw_if_index for the host tap */
+  u32 lip_phy_sw_if_index;       /* VPP's sw_if_index for the phy */
+  u8 *lip_host_name;             /* linux's name for the tap */
+  u32 lip_vif_index;             /* linux's index for the tap */
+  u8 *lip_namespace;             /* namespace in which the tap lives */
+  lip_host_type_t lip_host_type;  /* type of host interface */
+  lcp_itf_phy_adj_t lip_phy_adjs; /* adjacencies for phy l3 interface */
+  lip_flag_t lip_flags;                  /* Flags */
+  u8 lip_rewrite_len;            /* The length of an L2 MAC rewrite */
+  f64 lip_create_ts;             /* Timestamp of creation */
+} lcp_itf_pair_t;
+extern lcp_itf_pair_t *lcp_itf_pair_pool;
+
+extern vlib_node_registration_t lcp_ethernet_node;
+
+u8 *format_lcp_itf_pair (u8 *s, va_list *args);
+void lcp_itf_pair_show (u32 phy_sw_if_index);
+u32 lcp_itf_num_pairs (void);
+
+/**
+ * Get an interface-pair object from its VPP index
+ */
+extern lcp_itf_pair_t *lcp_itf_pair_get (index_t index);
+
+/**
+ * Find a interface-pair object from the host interface
+ *
+ * @param host_sw_if_index host interface
+ * @return VPP's object index
+ */
+extern index_t lcp_itf_pair_find_by_vif (u32 vif_index);
+
+/**
+ * Create an interface-pair
+ *
+ * @return error code
+ */
+extern int lcp_itf_pair_add (u32 host_sw_if_index, u32 phy_sw_if_index,
+                            u8 *host_name, u32 host_index,
+                            lip_host_type_t host_type, u8 *ns);
+extern int lcp_itf_pair_add_sub (u32 vif, u8 *host_name, u32 sub_sw_if_index,
+                                u32 phy_sw_if_index, u8 *ns);
+extern int lcp_itf_pair_del (u32 phy_sw_if_index);
+
+/**
+ * Create an interface-pair from PHY sw_if_index and tap name.
+ *
+ * @return error code
+ */
+extern int lcp_itf_pair_create (u32 phy_sw_if_index, u8 *host_if_name,
+                               lip_host_type_t host_if_type, u8 *ns);
+
+/**
+ * Delete a LCP_ITF_PAIR
+ */
+extern int lcp_itf_pair_delete (u32 phy_sw_if_index);
+
+/**
+ * Callback function invoked during a walk of all interface-pairs
+ */
+typedef walk_rc_t (*lcp_itf_pair_walk_cb_t) (index_t index, void *ctx);
+
+/**
+ * Walk/visit each of the interface pairs
+ */
+extern void lcp_itf_pair_walk (lcp_itf_pair_walk_cb_t cb, void *ctx);
+
+/**
+ * Begin and End the replace process
+ */
+extern int lcp_itf_pair_replace_begin (void);
+extern int lcp_itf_pair_replace_end (void);
+
+/**
+ * Retreive the pair in the DP
+ */
+extern index_t *lip_db_by_phy;
+extern u32 *lip_db_by_host;
+
+always_inline index_t
+lcp_itf_pair_find_by_phy (u32 phy_sw_if_index)
+{
+  if (phy_sw_if_index >= vec_len (lip_db_by_phy))
+    return INDEX_INVALID;
+  return (lip_db_by_phy[phy_sw_if_index]);
+}
+
+always_inline index_t
+lcp_itf_pair_find_by_host (u32 host_sw_if_index)
+{
+  if (host_sw_if_index >= vec_len (lip_db_by_host))
+    return INDEX_INVALID;
+  return (lip_db_by_host[host_sw_if_index]);
+}
+
+/**
+ * manage interface auto creation
+ */
+void lcp_set_auto_intf (u8 is_auto);
+int lcp_auto_intf (void);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
+
+#endif
diff --git a/src/plugins/linux-cp/lcp_node.c b/src/plugins/linux-cp/lcp_node.c
new file mode 100644 (file)
index 0000000..7f099ff
--- /dev/null
@@ -0,0 +1,919 @@
+/*
+ * lcp_enthernet_node.c : linux control plane ethernet node
+ *
+ * Copyright (c) 2021 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <sys/socket.h>
+#include <linux/if.h>
+
+#include <plugins/linux-cp/lcp_interface.h>
+#include <plugins/linux-cp/lcp_adj.h>
+#include <linux-cp/lcp.api_enum.h>
+
+#include <vnet/feature/feature.h>
+#include <vnet/ip/ip4_packet.h>
+#include <vnet/ethernet/arp_packet.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/ip/ip_types.h>
+#include <vnet/ip/lookup.h>
+#include <vnet/ip/ip4.h>
+#include <vnet/ip/ip6.h>
+#include <vnet/l2/l2_input.h>
+
+#define foreach_lip_punt                                                      \
+  _ (IO, "punt to host")                                                      \
+  _ (DROP, "unknown input interface")
+
+typedef enum
+{
+#define _(sym, str) LIP_PUNT_NEXT_##sym,
+  foreach_lip_punt
+#undef _
+    LIP_PUNT_N_NEXT,
+} lip_punt_next_t;
+
+typedef struct lip_punt_trace_t_
+{
+  u32 phy_sw_if_index;
+  u32 host_sw_if_index;
+} lip_punt_trace_t;
+
+/* packet trace format function */
+static u8 *
+format_lip_punt_trace (u8 *s, va_list *args)
+{
+  CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+  CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+  lip_punt_trace_t *t = va_arg (*args, lip_punt_trace_t *);
+
+  s =
+    format (s, "lip-punt: %u -> %u", t->phy_sw_if_index, t->host_sw_if_index);
+
+  return s;
+}
+
+/**
+ * Pass punted packets from the PHY to the HOST.
+ */
+VLIB_NODE_FN (lip_punt_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+  u32 n_left_from, *from, *to_next, n_left_to_next;
+  lip_punt_next_t next_index;
+
+  next_index = node->cached_next_index;
+  n_left_from = frame->n_vectors;
+  from = vlib_frame_vector_args (frame);
+
+  while (n_left_from > 0)
+    {
+      vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+      while (n_left_from > 0 && n_left_to_next > 0)
+       {
+         vlib_buffer_t *b0;
+         const lcp_itf_pair_t *lip0 = NULL;
+         u32 next0 = ~0;
+         u32 bi0, lipi0;
+         u32 sw_if_index0;
+         u8 len0;
+
+         bi0 = to_next[0] = from[0];
+
+         from += 1;
+         to_next += 1;
+         n_left_from -= 1;
+         n_left_to_next -= 1;
+         next0 = LIP_PUNT_NEXT_DROP;
+
+         b0 = vlib_get_buffer (vm, bi0);
+
+         sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+         lipi0 = lcp_itf_pair_find_by_phy (sw_if_index0);
+         if (PREDICT_FALSE (lipi0 == INDEX_INVALID))
+           goto trace0;
+
+         lip0 = lcp_itf_pair_get (lipi0);
+         next0 = LIP_PUNT_NEXT_IO;
+         vnet_buffer (b0)->sw_if_index[VLIB_TX] = lip0->lip_host_sw_if_index;
+
+         if (PREDICT_TRUE (lip0->lip_host_type == LCP_ITF_HOST_TAP))
+           {
+             /*
+              * rewind to ethernet header
+              */
+             len0 = ((u8 *) vlib_buffer_get_current (b0) -
+                     (u8 *) ethernet_buffer_get_header (b0));
+             vlib_buffer_advance (b0, -len0);
+           }
+         /* Tun packets don't need any special treatment, just need to
+          * be escorted past the TTL decrement. If we still want to use
+          * ip[46]-punt-redirect with these, we could just set the
+          * VNET_BUFFER_F_LOCALLY_ORIGINATED in an 'else {}' here and
+          * then pass to the next node on the ip[46]-punt feature arc
+          */
+
+       trace0:
+         if (PREDICT_FALSE ((b0->flags & VLIB_BUFFER_IS_TRACED)))
+           {
+             lip_punt_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t));
+             t->phy_sw_if_index = sw_if_index0;
+             t->host_sw_if_index =
+               (lipi0 == INDEX_INVALID) ? ~0 : lip0->lip_host_sw_if_index;
+           }
+
+         vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+                                          n_left_to_next, bi0, next0);
+       }
+
+      vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+    }
+
+  return frame->n_vectors;
+}
+
+VLIB_REGISTER_NODE (lip_punt_node) = {
+  .name = "linux-cp-punt",
+  .vector_size = sizeof (u32),
+  .format_trace = format_lip_punt_trace,
+  .type = VLIB_NODE_TYPE_INTERNAL,
+
+  .n_next_nodes = LIP_PUNT_N_NEXT,
+  .next_nodes = {
+    [LIP_PUNT_NEXT_DROP] = "error-drop",
+    [LIP_PUNT_NEXT_IO] = "interface-output",
+  },
+};
+
+#define foreach_lcp_punt_l3 _ (DROP, "unknown error")
+
+typedef enum
+{
+#define _(sym, str) LCP_LOCAL_NEXT_##sym,
+  foreach_lcp_punt_l3
+#undef _
+    LCP_LOCAL_N_NEXT,
+} lcp_punt_l3_next_t;
+
+typedef struct lcp_punt_l3_trace_t_
+{
+  u32 phy_sw_if_index;
+} lcp_punt_l3_trace_t;
+
+/* packet trace format function */
+static u8 *
+format_lcp_punt_l3_trace (u8 *s, va_list *args)
+{
+  CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+  CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+  lcp_punt_l3_trace_t *t = va_arg (*args, lcp_punt_l3_trace_t *);
+
+  s = format (s, "linux-cp-punt-l3: %u", t->phy_sw_if_index);
+
+  return s;
+}
+
+VLIB_NODE_FN (lcp_punt_l3_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+  u32 n_left_from, *from, *to_next, n_left_to_next;
+  lip_punt_next_t next_index;
+
+  next_index = node->cached_next_index;
+  n_left_from = frame->n_vectors;
+  from = vlib_frame_vector_args (frame);
+
+  while (n_left_from > 0)
+    {
+      vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+      while (n_left_from > 0 && n_left_to_next > 0)
+       {
+         vlib_buffer_t *b0;
+         u32 next0 = LCP_LOCAL_NEXT_DROP;
+         u32 bi0;
+         index_t lipi0;
+         lcp_itf_pair_t *lip0;
+
+         bi0 = to_next[0] = from[0];
+
+         from += 1;
+         to_next += 1;
+         n_left_from -= 1;
+         n_left_to_next -= 1;
+
+         b0 = vlib_get_buffer (vm, bi0);
+         vnet_feature_next (&next0, b0);
+
+         lipi0 =
+           lcp_itf_pair_find_by_phy (vnet_buffer (b0)->sw_if_index[VLIB_RX]);
+         if (lipi0 != INDEX_INVALID)
+           {
+             /*
+              * Avoid TTL check for packets which arrived on a tunnel and
+              * are being punted to the local host.
+              */
+             lip0 = lcp_itf_pair_get (lipi0);
+             if (lip0->lip_host_type == LCP_ITF_HOST_TUN)
+               b0->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
+           }
+
+         if (PREDICT_FALSE ((b0->flags & VLIB_BUFFER_IS_TRACED)))
+           {
+             lcp_punt_l3_trace_t *t =
+               vlib_add_trace (vm, node, b0, sizeof (*t));
+             t->phy_sw_if_index = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+           }
+
+         vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+                                          n_left_to_next, bi0, next0);
+       }
+
+      vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+    }
+
+  return frame->n_vectors;
+}
+
+VLIB_REGISTER_NODE (lcp_punt_l3_node) = {
+  .name = "linux-cp-punt-l3",
+  .vector_size = sizeof (u32),
+  .format_trace = format_lcp_punt_l3_trace,
+  .type = VLIB_NODE_TYPE_INTERNAL,
+
+  .n_next_nodes = 1,
+  .next_nodes = {
+    [LCP_LOCAL_NEXT_DROP] = "error-drop",
+  },
+};
+
+VNET_FEATURE_INIT (lcp_punt_l3_ip4, static) = {
+  .arc_name = "ip4-punt",
+  .node_name = "linux-cp-punt-l3",
+  .runs_before = VNET_FEATURES ("ip4-punt-redirect"),
+};
+
+VNET_FEATURE_INIT (lip_punt_l3_ip6, static) = {
+  .arc_name = "ip6-punt",
+  .node_name = "linux-cp-punt-l3",
+  .runs_before = VNET_FEATURES ("ip6-punt-redirect"),
+};
+
+#define foreach_lcp_xc                                                        \
+  _ (DROP, "drop")                                                            \
+  _ (XC_IP4, "x-connnect-ip4")                                                \
+  _ (XC_IP6, "x-connnect-ip6")
+
+typedef enum
+{
+#define _(sym, str) LCP_XC_NEXT_##sym,
+  foreach_lcp_xc
+#undef _
+    LCP_XC_N_NEXT,
+} lcp_xc_next_t;
+
+typedef struct lcp_xc_trace_t_
+{
+  u32 phy_sw_if_index;
+  adj_index_t adj_index;
+} lcp_xc_trace_t;
+
+/* packet trace format function */
+static u8 *
+format_lcp_xc_trace (u8 *s, va_list *args)
+{
+  CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+  CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+  lcp_xc_trace_t *t = va_arg (*args, lcp_xc_trace_t *);
+
+  s = format (s, "lcp-xc: itf:%d adj:%d", t->phy_sw_if_index, t->adj_index);
+
+  return s;
+}
+
+/**
+ * X-connect all packets from the HOST to the PHY.
+ *
+ * This runs in either the IP4 or IP6 path. The MAC rewrite on the received
+ * packet from the host is used as a key to find the adjacency used on the phy.
+ * This allows this code to start the feature arc on that adjacency.
+ * Consequently, all packet sent from the host are also subject to output
+ * features, which is symmetric w.r.t. to input features.
+ */
+static_always_inline u32
+lcp_xc_inline (vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame,
+              ip_address_family_t af)
+{
+  u32 n_left_from, *from, *to_next, n_left_to_next;
+  lcp_xc_next_t next_index;
+  ip_lookup_main_t *lm;
+
+  next_index = 0;
+  n_left_from = frame->n_vectors;
+  from = vlib_frame_vector_args (frame);
+
+  if (AF_IP4 == af)
+    lm = &ip4_main.lookup_main;
+  else
+    lm = &ip6_main.lookup_main;
+
+  while (n_left_from > 0)
+    {
+      vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+      while (n_left_from > 0 && n_left_to_next > 0)
+       {
+         const ethernet_header_t *eth;
+         const lcp_itf_pair_t *lip;
+         u32 next0, bi0, lipi, ai;
+         vlib_buffer_t *b0;
+
+         bi0 = to_next[0] = from[0];
+
+         from += 1;
+         to_next += 1;
+         n_left_from -= 1;
+         n_left_to_next -= 1;
+
+         b0 = vlib_get_buffer (vm, bi0);
+
+         lipi =
+           lcp_itf_pair_find_by_host (vnet_buffer (b0)->sw_if_index[VLIB_RX]);
+         lip = lcp_itf_pair_get (lipi);
+
+         vnet_buffer (b0)->sw_if_index[VLIB_TX] = lip->lip_phy_sw_if_index;
+         vlib_buffer_advance (b0, -lip->lip_rewrite_len);
+         eth = vlib_buffer_get_current (b0);
+
+         if (ethernet_address_cast (eth->dst_address))
+           ai = lip->lip_phy_adjs.adj_index[af];
+         else
+           ai = lcp_adj_lkup ((u8 *) eth, lip->lip_rewrite_len,
+                              vnet_buffer (b0)->sw_if_index[VLIB_TX]);
+
+         if (ADJ_INDEX_INVALID != ai)
+           {
+             const ip_adjacency_t *adj;
+
+             adj = adj_get (ai);
+             vnet_buffer (b0)->ip.adj_index[VLIB_TX] = ai;
+             next0 = adj->rewrite_header.next_index;
+             vnet_buffer (b0)->ip.save_rewrite_length = lip->lip_rewrite_len;
+
+             if (PREDICT_FALSE (adj->rewrite_header.flags &
+                                VNET_REWRITE_HAS_FEATURES))
+               vnet_feature_arc_start_w_cfg_index (
+                 lm->output_feature_arc_index,
+                 vnet_buffer (b0)->sw_if_index[VLIB_TX], &next0, b0,
+                 adj->ia_cfg_index);
+           }
+         else
+           next0 = LCP_XC_NEXT_DROP;
+
+         if (PREDICT_FALSE ((b0->flags & VLIB_BUFFER_IS_TRACED)))
+           {
+             lcp_xc_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t));
+             t->phy_sw_if_index = lip->lip_phy_sw_if_index;
+             t->adj_index = vnet_buffer (b0)->ip.adj_index[VLIB_TX];
+           }
+
+         vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+                                          n_left_to_next, bi0, next0);
+       }
+
+      vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+    }
+
+  return frame->n_vectors;
+}
+
+VLIB_NODE_FN (lcp_xc_ip4)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+  return (lcp_xc_inline (vm, node, frame, AF_IP4));
+}
+
+VLIB_NODE_FN (lcp_xc_ip6)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+  return (lcp_xc_inline (vm, node, frame, AF_IP6));
+}
+
+VLIB_REGISTER_NODE (lcp_xc_ip4) = { .name = "linux-cp-xc-ip4",
+                                   .vector_size = sizeof (u32),
+                                   .format_trace = format_lcp_xc_trace,
+                                   .type = VLIB_NODE_TYPE_INTERNAL,
+                                   .sibling_of = "ip4-rewrite" };
+
+VNET_FEATURE_INIT (lcp_xc_ip4_ucast_node, static) = {
+  .arc_name = "ip4-unicast",
+  .node_name = "linux-cp-xc-ip4",
+};
+VNET_FEATURE_INIT (lcp_xc_ip4_mcast_node, static) = {
+  .arc_name = "ip4-multicast",
+  .node_name = "linux-cp-xc-ip4",
+};
+
+VLIB_REGISTER_NODE (lcp_xc_ip6) = { .name = "linux-cp-xc-ip6",
+                                   .vector_size = sizeof (u32),
+                                   .format_trace = format_lcp_xc_trace,
+                                   .type = VLIB_NODE_TYPE_INTERNAL,
+                                   .sibling_of = "ip6-rewrite" };
+
+VNET_FEATURE_INIT (lcp_xc_ip6_ucast_node, static) = {
+  .arc_name = "ip6-unicast",
+  .node_name = "linux-cp-xc-ip6",
+};
+VNET_FEATURE_INIT (lcp_xc_ip6_mcast_node, static) = {
+  .arc_name = "ip6-multicast",
+  .node_name = "linux-cp-xc-ip6",
+};
+
+typedef enum
+{
+  LCP_XC_L3_NEXT_XC,
+  LCP_XC_L3_N_NEXT,
+} lcp_xc_l3_next_t;
+
+/**
+ * X-connect all packets from the HOST to the PHY on L3 interfaces
+ *
+ * There's only one adjacency that can be used on thises links.
+ */
+static_always_inline u32
+lcp_xc_l3_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
+                 vlib_frame_t *frame, ip_address_family_t af)
+{
+  u32 n_left_from, *from, *to_next, n_left_to_next;
+  lcp_xc_next_t next_index;
+
+  next_index = 0;
+  n_left_from = frame->n_vectors;
+  from = vlib_frame_vector_args (frame);
+
+  while (n_left_from > 0)
+    {
+      vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+      while (n_left_from > 0 && n_left_to_next > 0)
+       {
+         vlib_buffer_t *b0;
+         const lcp_itf_pair_t *lip;
+         u32 next0 = ~0;
+         u32 bi0, lipi;
+
+         bi0 = to_next[0] = from[0];
+
+         from += 1;
+         to_next += 1;
+         n_left_from -= 1;
+         n_left_to_next -= 1;
+
+         b0 = vlib_get_buffer (vm, bi0);
+
+         /* Flag buffers as locally originated. Otherwise their TTL will
+          * be checked & decremented. That would break services like BGP
+          * which set a TTL of 1 by default.
+          */
+         b0->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
+
+         lipi =
+           lcp_itf_pair_find_by_host (vnet_buffer (b0)->sw_if_index[VLIB_RX]);
+         lip = lcp_itf_pair_get (lipi);
+
+         vnet_buffer (b0)->sw_if_index[VLIB_TX] = lip->lip_phy_sw_if_index;
+         next0 = LCP_XC_L3_NEXT_XC;
+         vnet_buffer (b0)->ip.adj_index[VLIB_TX] =
+           lip->lip_phy_adjs.adj_index[af];
+
+         if (PREDICT_FALSE ((b0->flags & VLIB_BUFFER_IS_TRACED)))
+           {
+             lcp_xc_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t));
+             t->phy_sw_if_index = lip->lip_phy_sw_if_index;
+             t->adj_index = vnet_buffer (b0)->ip.adj_index[VLIB_TX];
+           }
+
+         vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+                                          n_left_to_next, bi0, next0);
+       }
+
+      vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+    }
+
+  return frame->n_vectors;
+}
+
+/**
+ * X-connect all packets from the HOST to the PHY.
+ */
+VLIB_NODE_FN (lcp_xc_l3_ip4_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+  return (lcp_xc_l3_inline (vm, node, frame, AF_IP4));
+}
+
+VLIB_NODE_FN (lcp_xc_l3_ip6_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+  return (lcp_xc_l3_inline (vm, node, frame, AF_IP6));
+}
+
+VLIB_REGISTER_NODE (lcp_xc_l3_ip4_node) = {
+  .name = "linux-cp-xc-l3-ip4",
+  .vector_size = sizeof (u32),
+  .format_trace = format_lcp_xc_trace,
+  .type = VLIB_NODE_TYPE_INTERNAL,
+
+  .n_next_nodes = LCP_XC_L3_N_NEXT,
+  .next_nodes = {
+    [LCP_XC_L3_NEXT_XC] = "ip4-midchain",
+  },
+};
+
+VNET_FEATURE_INIT (lcp_xc_node_l3_ip4_unicast, static) = {
+  .arc_name = "ip4-unicast",
+  .node_name = "linux-cp-xc-l3-ip4",
+};
+
+VNET_FEATURE_INIT (lcp_xc_node_l3_ip4_multicaast, static) = {
+  .arc_name = "ip4-multicast",
+  .node_name = "linux-cp-xc-l3-ip4",
+};
+
+VLIB_REGISTER_NODE (lcp_xc_l3_ip6_node) = {
+  .name = "linux-cp-xc-l3-ip6",
+  .vector_size = sizeof (u32),
+  .format_trace = format_lcp_xc_trace,
+  .type = VLIB_NODE_TYPE_INTERNAL,
+
+  .n_next_nodes = LCP_XC_L3_N_NEXT,
+  .next_nodes = {
+    [LCP_XC_L3_NEXT_XC] = "ip6-midchain",
+  },
+};
+
+VNET_FEATURE_INIT (lcp_xc_node_l3_ip6_unicast, static) = {
+  .arc_name = "ip6-unicast",
+  .node_name = "linux-cp-xc-l3-ip6",
+};
+
+VNET_FEATURE_INIT (lcp_xc_node_l3_ip6_multicast, static) = {
+  .arc_name = "ip6-multicast",
+  .node_name = "linux-cp-xc-l3-ip6",
+};
+
+#define foreach_lcp_arp                                                       \
+  _ (DROP, "error-drop")                                                      \
+  _ (IO, "interface-output")
+
+typedef enum
+{
+#define _(sym, str) LCP_ARP_NEXT_##sym,
+  foreach_lcp_arp
+#undef _
+    LCP_ARP_N_NEXT,
+} lcp_arp_next_t;
+
+typedef struct lcp_arp_trace_t_
+{
+  u32 rx_sw_if_index;
+  u16 arp_opcode;
+} lcp_arp_trace_t;
+
+/* packet trace format function */
+static u8 *
+format_lcp_arp_trace (u8 *s, va_list *args)
+{
+  CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+  CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+  lcp_arp_trace_t *t = va_arg (*args, lcp_arp_trace_t *);
+
+  s = format (s, "rx-sw-if-index: %u opcode: %u", t->rx_sw_if_index,
+             t->arp_opcode);
+
+  return s;
+}
+
+/**
+ * punt ARP replies to the host
+ */
+VLIB_NODE_FN (lcp_arp_phy_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+  u32 n_left_from, *from, *to_next, n_left_to_next;
+  lcp_arp_next_t next_index;
+  u32 reply_copies[VLIB_FRAME_SIZE];
+  u32 n_copies = 0;
+
+  next_index = node->cached_next_index;
+  n_left_from = frame->n_vectors;
+  from = vlib_frame_vector_args (frame);
+
+  while (n_left_from > 0)
+    {
+      vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+      while (n_left_from >= 2 && n_left_to_next >= 2)
+       {
+         u32 next0, next1, bi0, bi1;
+         vlib_buffer_t *b0, *b1;
+         ethernet_arp_header_t *arp0, *arp1;
+
+         bi0 = to_next[0] = from[0];
+         bi1 = to_next[1] = from[1];
+
+         from += 2;
+         n_left_from -= 2;
+         to_next += 2;
+         n_left_to_next -= 2;
+
+         next0 = next1 = LCP_ARP_NEXT_DROP;
+
+         b0 = vlib_get_buffer (vm, bi0);
+         b1 = vlib_get_buffer (vm, bi1);
+
+         arp0 = vlib_buffer_get_current (b0);
+         arp1 = vlib_buffer_get_current (b1);
+
+         vnet_feature_next (&next0, b0);
+         vnet_feature_next (&next1, b1);
+
+         /*
+          * Replies might need to be received by the host, so we
+          * make a copy of them.
+          */
+         if (arp0->opcode == clib_host_to_net_u16 (ETHERNET_ARP_OPCODE_reply))
+           {
+             lcp_itf_pair_t *lip0 = 0;
+             u32 lipi0;
+             vlib_buffer_t *c0;
+             u8 len0;
+
+             lipi0 = lcp_itf_pair_find_by_phy (
+               vnet_buffer (b0)->sw_if_index[VLIB_RX]);
+             lip0 = lcp_itf_pair_get (lipi0);
+
+             if (lip0)
+               {
+                 /*
+                  * rewind to eth header, copy, advance back to current
+                  */
+                 len0 = ((u8 *) vlib_buffer_get_current (b0) -
+                         (u8 *) ethernet_buffer_get_header (b0));
+                 vlib_buffer_advance (b0, -len0);
+                 c0 = vlib_buffer_copy (vm, b0);
+                 vlib_buffer_advance (b0, len0);
+
+                 /* Send to the host */
+                 vnet_buffer (c0)->sw_if_index[VLIB_TX] =
+                   lip0->lip_host_sw_if_index;
+                 reply_copies[n_copies++] = vlib_get_buffer_index (vm, c0);
+               }
+           }
+         if (arp1->opcode == clib_host_to_net_u16 (ETHERNET_ARP_OPCODE_reply))
+           {
+             lcp_itf_pair_t *lip1 = 0;
+             u32 lipi1;
+             vlib_buffer_t *c1;
+             u8 len1;
+
+             lipi1 = lcp_itf_pair_find_by_phy (
+               vnet_buffer (b1)->sw_if_index[VLIB_RX]);
+             lip1 = lcp_itf_pair_get (lipi1);
+
+             if (lip1)
+               {
+                 /*
+                  * rewind to reveal the ethernet header
+                  */
+                 len1 = ((u8 *) vlib_buffer_get_current (b1) -
+                         (u8 *) ethernet_buffer_get_header (b1));
+                 vlib_buffer_advance (b1, -len1);
+                 c1 = vlib_buffer_copy (vm, b1);
+                 vlib_buffer_advance (b1, len1);
+
+                 /* Send to the host */
+                 vnet_buffer (c1)->sw_if_index[VLIB_TX] =
+                   lip1->lip_host_sw_if_index;
+                 reply_copies[n_copies++] = vlib_get_buffer_index (vm, c1);
+               }
+           }
+
+         if (PREDICT_FALSE ((b0->flags & VLIB_BUFFER_IS_TRACED)))
+           {
+             lcp_arp_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t));
+             t->rx_sw_if_index = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+           }
+         if (PREDICT_FALSE ((b1->flags & VLIB_BUFFER_IS_TRACED)))
+           {
+             lcp_arp_trace_t *t = vlib_add_trace (vm, node, b1, sizeof (*t));
+             t->rx_sw_if_index = vnet_buffer (b1)->sw_if_index[VLIB_RX];
+           }
+
+         vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next,
+                                          n_left_to_next, bi0, bi1, next0,
+                                          next1);
+       }
+
+      while (n_left_from > 0 && n_left_to_next > 0)
+       {
+         u32 next0, bi0;
+         vlib_buffer_t *b0;
+         ethernet_arp_header_t *arp0;
+         u16 arp_opcode;
+
+         bi0 = to_next[0] = from[0];
+
+         from += 1;
+         n_left_from -= 1;
+         to_next += 1;
+         n_left_to_next -= 1;
+         next0 = LCP_ARP_NEXT_DROP;
+
+         b0 = vlib_get_buffer (vm, bi0);
+         arp0 = vlib_buffer_get_current (b0);
+
+         vnet_feature_next (&next0, b0);
+
+         /*
+          * Replies might need to be received by the host, so we
+          * make a copy of them.
+          */
+         arp_opcode = clib_host_to_net_u16 (arp0->opcode);
+
+         if (arp_opcode == ETHERNET_ARP_OPCODE_reply)
+           {
+             lcp_itf_pair_t *lip0 = 0;
+             vlib_buffer_t *c0;
+             u32 lipi0;
+             u8 len0;
+
+             lipi0 = lcp_itf_pair_find_by_phy (
+               vnet_buffer (b0)->sw_if_index[VLIB_RX]);
+             lip0 = lcp_itf_pair_get (lipi0);
+
+             if (lip0)
+               {
+
+                 /*
+                  * rewind to reveal the ethernet header
+                  */
+                 len0 = ((u8 *) vlib_buffer_get_current (b0) -
+                         (u8 *) ethernet_buffer_get_header (b0));
+                 vlib_buffer_advance (b0, -len0);
+                 c0 = vlib_buffer_copy (vm, b0);
+                 vlib_buffer_advance (b0, len0);
+
+                 /* Send to the host */
+                 vnet_buffer (c0)->sw_if_index[VLIB_TX] =
+                   lip0->lip_host_sw_if_index;
+                 reply_copies[n_copies++] = vlib_get_buffer_index (vm, c0);
+               }
+           }
+
+         if (PREDICT_FALSE ((b0->flags & VLIB_BUFFER_IS_TRACED)))
+           {
+             lcp_arp_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t));
+             t->rx_sw_if_index = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+             t->arp_opcode = arp_opcode;
+           }
+
+         vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+                                          n_left_to_next, bi0, next0);
+       }
+
+      vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+    }
+
+  if (n_copies)
+    vlib_buffer_enqueue_to_single_next (vm, node, reply_copies,
+                                       LCP_ARP_NEXT_IO, n_copies);
+
+  return frame->n_vectors;
+}
+
+VLIB_REGISTER_NODE (lcp_arp_phy_node) = {
+  .name = "linux-cp-arp-phy",
+  .vector_size = sizeof (u32),
+  .format_trace = format_lcp_arp_trace,
+  .type = VLIB_NODE_TYPE_INTERNAL,
+
+  .n_errors = LINUXCP_N_ERROR,
+  .error_counters = linuxcp_error_counters,
+
+  .n_next_nodes = LCP_ARP_N_NEXT,
+  .next_nodes = {
+    [LCP_ARP_NEXT_DROP] = "error-drop",
+    [LCP_ARP_NEXT_IO] = "interface-output",
+  },
+};
+
+VNET_FEATURE_INIT (lcp_arp_phy_arp_feat, static) = {
+  .arc_name = "arp",
+  .node_name = "linux-cp-arp-phy",
+  .runs_before = VNET_FEATURES ("arp-reply"),
+};
+
+/**
+ * x-connect ARP packets from the host to the phy
+ */
+VLIB_NODE_FN (lcp_arp_host_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+  u32 n_left_from, *from, *to_next, n_left_to_next;
+  lcp_arp_next_t next_index;
+
+  next_index = node->cached_next_index;
+  n_left_from = frame->n_vectors;
+  from = vlib_frame_vector_args (frame);
+
+  while (n_left_from > 0)
+    {
+      vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+      while (n_left_from > 0 && n_left_to_next > 0)
+       {
+         const lcp_itf_pair_t *lip0;
+         lcp_arp_next_t next0;
+         vlib_buffer_t *b0;
+         u32 bi0, lipi0;
+         u8 len0;
+
+         bi0 = to_next[0] = from[0];
+
+         from += 1;
+         n_left_from -= 1;
+         to_next += 1;
+         n_left_to_next -= 1;
+         next0 = LCP_ARP_NEXT_IO;
+
+         b0 = vlib_get_buffer (vm, bi0);
+
+         lipi0 =
+           lcp_itf_pair_find_by_host (vnet_buffer (b0)->sw_if_index[VLIB_RX]);
+         lip0 = lcp_itf_pair_get (lipi0);
+
+         /* Send to the phy */
+         vnet_buffer (b0)->sw_if_index[VLIB_TX] = lip0->lip_phy_sw_if_index;
+
+         len0 = ((u8 *) vlib_buffer_get_current (b0) -
+                 (u8 *) ethernet_buffer_get_header (b0));
+         vlib_buffer_advance (b0, -len0);
+
+         if (PREDICT_FALSE ((b0->flags & VLIB_BUFFER_IS_TRACED)))
+           {
+             lcp_arp_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t));
+             t->rx_sw_if_index = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+           }
+
+         vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+                                          n_left_to_next, bi0, next0);
+       }
+
+      vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+    }
+
+  return frame->n_vectors;
+}
+
+VLIB_REGISTER_NODE (lcp_arp_host_node) = {
+  .name = "linux-cp-arp-host",
+  .vector_size = sizeof (u32),
+  .format_trace = format_lcp_arp_trace,
+  .type = VLIB_NODE_TYPE_INTERNAL,
+
+  .n_errors = LINUXCP_N_ERROR,
+  .error_counters = linuxcp_error_counters,
+
+  .n_next_nodes = LCP_ARP_N_NEXT,
+  .next_nodes = {
+    [LCP_ARP_NEXT_DROP] = "error-drop",
+    [LCP_ARP_NEXT_IO] = "interface-output",
+  },
+};
+
+VNET_FEATURE_INIT (lcp_arp_host_arp_feat, static) = {
+  .arc_name = "arp",
+  .node_name = "linux-cp-arp-host",
+  .runs_before = VNET_FEATURES ("arp-reply"),
+};
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/linux-cp/test/lcp_unittest.c b/src/plugins/linux-cp/test/lcp_unittest.c
new file mode 100644 (file)
index 0000000..57858eb
--- /dev/null
@@ -0,0 +1,101 @@
+/*
+ * Copyright (c) 2021 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vlib/vlib.h>
+
+#include <plugins/linux-cp/lcp_interface.h>
+
+static u32 host_vif;
+const static char *host_template = "tap%d";
+
+static clib_error_t *
+lcp_add_pair_command_fn (vlib_main_t *vm, unformat_input_t *input,
+                        vlib_cli_command_t *cmd)
+{
+  u32 phy_sw_if_index, host_sw_if_index;
+  u8 is_add, *host_name;
+  vnet_main_t *vnm = vnet_get_main ();
+
+  ++host_vif;
+  host_name = format (NULL, host_template, host_vif);
+  phy_sw_if_index = host_sw_if_index = ~0;
+  is_add = 1;
+  lcp_main.test_mode = 1;
+
+  while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+    {
+      if (unformat (input, "add"))
+       is_add = 1;
+      else if (unformat (input, "del"))
+       is_add = 0;
+      else if (unformat (input, "phy %U", unformat_vnet_sw_interface, vnm,
+                        &phy_sw_if_index))
+       ;
+      else if (unformat (input, "host %U", unformat_vnet_sw_interface, vnm,
+                        &host_sw_if_index))
+       ;
+      else
+       return clib_error_return (0, "unknown input:%U", format_unformat_error,
+                                 input);
+    }
+
+  if (phy_sw_if_index == ~0)
+    return clib_error_return (0, "ERROR; no phy:%U", format_unformat_error,
+                             input);
+
+  lip_host_type_t host_type =
+    (vnet_sw_interface_is_p2p (vnm, phy_sw_if_index) ? LCP_ITF_HOST_TUN :
+                                                      LCP_ITF_HOST_TAP);
+
+  int rv;
+
+  if (is_add)
+    {
+      if (host_sw_if_index == ~0)
+       return clib_error_return (0, "ERROR no-host:%U", format_unformat_error,
+                                 input);
+
+      rv = lcp_itf_pair_add (host_sw_if_index, phy_sw_if_index, host_name,
+                            host_vif, host_type, NULL);
+    }
+  else
+    rv = lcp_itf_pair_del (phy_sw_if_index);
+
+  if (rv)
+    return clib_error_return (0, "ERROR rv:%d", rv);
+
+  return (NULL);
+}
+
+VLIB_CLI_COMMAND (test_time_range_command, static) = {
+  .path = "test lcp",
+  .short_help = "lcp [add|del] phy <SW_IF_INDEX> host <SW_IF_INDEX>",
+  .function = lcp_add_pair_command_fn,
+};
+
+#include <vnet/plugin/plugin.h>
+#include <vpp/app/version.h>
+VLIB_PLUGIN_REGISTER () = {
+  .version = VPP_BUILD_VER,
+  .description = "Linux Control Plane - Unit Test",
+  .default_disabled = 1,
+};
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/linux-cp/test/test_linux_cp.py b/src/plugins/linux-cp/test/test_linux_cp.py
new file mode 100644 (file)
index 0000000..df38681
--- /dev/null
@@ -0,0 +1,174 @@
+#!/usr/bin/env python3
+
+import unittest
+
+from scapy.layers.inet import IP, UDP
+from scapy.layers.inet6 import IPv6, Raw
+from scapy.layers.l2 import Ether, ARP, Dot1Q
+
+from vpp_object import VppObject
+from framework import VppTestCase, VppTestRunner
+
+
+class VppLcpPair(VppObject):
+    def __init__(self, test, phy, host):
+        self._test = test
+        self.phy = phy
+        self.host = host
+
+    def add_vpp_config(self):
+        self._test.vapi.cli("test lcp add phy %s host %s" %
+                            (self.phy, self.host))
+        self._test.registry.register(self, self._test.logger)
+        return self
+
+    def remove_vpp_config(self):
+        self._test.vapi.cli("test lcp del phy %s host %s" %
+                            (self.phy, self.host))
+
+    def object_id(self):
+        return "lcp:%d:%d" % (self.phy.sw_if_index,
+                              self.host.sw_if_index)
+
+    def query_vpp_config(self):
+        pairs = list(self._test.vapi.vpp.details_iter(
+            self._test.vapi.lcp_itf_pair_get))
+
+        for p in pairs:
+            if p.phy_sw_if_index == self.phy.sw_if_index and \
+               p.host_sw_if_index == self.host.sw_if_index:
+                return True
+        return False
+
+
+class TestLinuxCP(VppTestCase):
+    """ Linux Control Plane """
+
+    extra_vpp_plugin_config = ["plugin",
+                               "linux_cp_plugin.so",
+                               "{", "enable", "}",
+                               "plugin",
+                               "linux_cp_unittest_plugin.so",
+                               "{", "enable", "}"]
+
+    @classmethod
+    def setUpClass(cls):
+        super(TestLinuxCP, cls).setUpClass()
+
+    @classmethod
+    def tearDownClass(cls):
+        super(TestLinuxCP, cls).tearDownClass()
+
+    def setUp(self):
+        super(TestLinuxCP, self).setUp()
+
+        # create 4 pg interfaces so there are a few addresses
+        # in the FIB
+        self.create_pg_interfaces(range(4))
+
+        for i in self.pg_interfaces:
+            i.admin_up()
+
+    def tearDown(self):
+        for i in self.pg_interfaces:
+            i.admin_down()
+        super(TestLinuxCP, self).tearDown()
+
+    def test_linux_cp_tap(self):
+        """ Linux CP TAP """
+
+        #
+        # Setup
+        #
+
+        arp_opts = {"who-has": 1, "is-at": 2}
+
+        # create two pairs, wihch a bunch of hots on the phys
+        hosts = [self.pg0, self.pg1]
+        phys = [self.pg2, self.pg3]
+        N_HOSTS = 4
+
+        for phy in phys:
+            phy.config_ip4()
+            phy.generate_remote_hosts(4)
+            phy.configure_ipv4_neighbors()
+
+        pair1 = VppLcpPair(self, phys[0], hosts[0]).add_vpp_config()
+        pair2 = VppLcpPair(self, phys[1], hosts[1]).add_vpp_config()
+
+        self.logger.info(self.vapi.cli("sh lcp adj verbose"))
+        self.logger.info(self.vapi.cli("sh lcp"))
+
+        #
+        # Traffic Tests
+        #
+
+        # hosts to phys
+        for phy, host in zip(phys, hosts):
+            for j in range(N_HOSTS):
+                p = (Ether(src=phy.local_mac,
+                           dst=phy.remote_hosts[j].mac) /
+                     IP(src=phy.local_ip4,
+                        dst=phy.remote_hosts[j].ip4) /
+                     UDP(sport=1234, dport=1234) /
+                     Raw())
+
+                rxs = self.send_and_expect(host, [p], phy)
+
+                # verify packet is unchanged
+                for rx in rxs:
+                    self.assertEqual(p.show2(True), rx.show2(True))
+
+                # ARPs x-connect to phy
+                p = (Ether(dst="ff:ff:ff:ff:ff:ff",
+                           src=phy.remote_hosts[j].mac) /
+                     ARP(op="who-has",
+                         hwdst=phy.remote_hosts[j].mac,
+                         hwsrc=phy.local_mac,
+                         psrc=phy.local_ip4,
+                         pdst=phy.remote_hosts[j].ip4))
+
+                rxs = self.send_and_expect(host, [p], phy)
+
+                # verify packet is unchanged
+                for rx in rxs:
+                    self.assertEqual(p.show2(True), rx.show2(True))
+
+        # phy to host
+        for phy, host in zip(phys, hosts):
+            for j in range(N_HOSTS):
+                p = (Ether(dst=phy.local_mac,
+                           src=phy.remote_hosts[j].mac) /
+                     IP(dst=phy.local_ip4,
+                        src=phy.remote_hosts[j].ip4) /
+                     UDP(sport=1234, dport=1234) /
+                     Raw())
+
+                rxs = self.send_and_expect(phy, [p], host)
+
+                # verify packet is unchanged
+                for rx in rxs:
+                    self.assertEqual(p.show2(True), rx.show2(True))
+
+                # ARPs rx'd on the phy are sent to the host
+                p = (Ether(dst="ff:ff:ff:ff:ff:ff",
+                           src=phy.remote_hosts[j].mac) /
+                     ARP(op="is-at",
+                         hwsrc=phy.remote_hosts[j].mac,
+                         hwdst=phy.local_mac,
+                         pdst=phy.local_ip4,
+                         psrc=phy.remote_hosts[j].ip4))
+
+                rxs = self.send_and_expect(phy, [p], host)
+
+                # verify packet is unchanged
+                for rx in rxs:
+                    self.assertEqual(p.show2(True), rx.show2(True))
+
+        # cleanup
+        for phy in phys:
+            phy.unconfig_ip4()
+
+
+if __name__ == '__main__':
+    unittest.main(testRunner=VppTestRunner)
index 8064d67..d49282e 100644 (file)
@@ -201,33 +201,45 @@ do {                                                                    \
     vl_api_send_msg (rp, (u8 *)rmp);                                    \
 } while(0);
 
-#define REPLY_AND_DETAILS_MACRO(t, p, body)                    \
-do {                                                           \
-  vl_api_registration_t *rp;                                   \
-  rp = vl_api_client_index_to_registration (mp->client_index); \
-  if (rp == 0)                                                 \
-    return;                                                    \
-  u32 cursor = clib_net_to_host_u32 (mp->cursor);              \
-  vlib_main_t *vm = vlib_get_main ();                          \
-  f64 start = vlib_time_now (vm);                              \
-  if (pool_is_free_index (p, cursor)) {                                \
-    cursor = pool_next_index (p, cursor);                      \
-    if (cursor == ~0)                                          \
-      rv = VNET_API_ERROR_INVALID_VALUE;                       \
-  }                                                            \
-  while (cursor != ~0) {                                       \
-    do {body;} while (0);                                      \
-    cursor = pool_next_index (p, cursor);                      \
-    if (vl_api_process_may_suspend (vm, rp, start)) {          \
-      if (cursor != ~0)                                                \
-        rv = VNET_API_ERROR_EAGAIN;                            \
-      break;                                                   \
-    }                                                          \
-  }                                                            \
-  REPLY_MACRO2 (t, ({                                          \
-    rmp->cursor = clib_host_to_net_u32 (cursor);               \
-  }));                                                         \
-} while(0);
+#define REPLY_AND_DETAILS_MACRO(t, p, body)                                   \
+  do                                                                          \
+    {                                                                         \
+      if (pool_elts (p) == 0)                                                 \
+       {                                                                     \
+         REPLY_MACRO (t);                                                    \
+         break;                                                              \
+       }                                                                     \
+      vl_api_registration_t *rp;                                              \
+      rp = vl_api_client_index_to_registration (mp->client_index);            \
+      if (rp == 0)                                                            \
+       return;                                                               \
+      u32 cursor = clib_net_to_host_u32 (mp->cursor);                         \
+      vlib_main_t *vm = vlib_get_main ();                                     \
+      f64 start = vlib_time_now (vm);                                         \
+      if (pool_is_free_index (p, cursor))                                     \
+       {                                                                     \
+         cursor = pool_next_index (p, cursor);                               \
+         if (cursor == ~0)                                                   \
+           rv = VNET_API_ERROR_INVALID_VALUE;                                \
+       }                                                                     \
+      while (cursor != ~0)                                                    \
+       {                                                                     \
+         do                                                                  \
+           {                                                                 \
+             body;                                                           \
+           }                                                                 \
+         while (0);                                                          \
+         cursor = pool_next_index (p, cursor);                               \
+         if (vl_api_process_may_suspend (vm, rp, start))                     \
+           {                                                                 \
+             if (cursor != ~0)                                               \
+               rv = VNET_API_ERROR_EAGAIN;                                   \
+             break;                                                          \
+           }                                                                 \
+       }                                                                     \
+      REPLY_MACRO2 (t, ({ rmp->cursor = clib_host_to_net_u32 (cursor); }));   \
+    }                                                                         \
+  while (0);
 
 #define REPLY_AND_DETAILS_VEC_MACRO(t, v, mp, rmp, rv, body)   \
 do {                                                           \
index 120d92c..dbb0549 100644 (file)
@@ -342,6 +342,14 @@ vnet_sw_interface_is_up (vnet_main_t * vnm, u32 sw_if_index)
          vnet_sw_interface_is_link_up (vnm, sw_if_index));
 }
 
+always_inline uword
+vnet_sw_interface_is_sub (vnet_main_t *vnm, u32 sw_if_index)
+{
+  vnet_sw_interface_t *sw = vnet_get_sw_interface (vnm, sw_if_index);
+
+  return (sw->sw_if_index != sw->sup_sw_if_index);
+}
+
 always_inline vlib_frame_t *
 vnet_get_frame_to_sw_interface (vnet_main_t * vnm, u32 sw_if_index)
 {
index 96a0a1a..adc130e 100644 (file)
@@ -294,8 +294,8 @@ u32 set_int_l2_mode (vlib_main_t * vm,
                     u32 bd_index, l2_bd_port_type_t port_type,
                     u32 shg, u32 xc_sw_if_index);
 
-static inline void
-vnet_update_l2_len (vlib_buffer_t * b)
+static inline u16
+vnet_update_l2_len (vlib_buffer_t *b)
 {
   ethernet_header_t *eth;
   u16 ethertype;
@@ -326,6 +326,8 @@ vnet_update_l2_len (vlib_buffer_t * b)
        }
     }
   ethernet_buffer_set_vlan_count (b, vlan_count);
+
+  return (ethertype);
 }
 
 /*