Distributed Virtual Router Support 38/8638/2
authorNeale Ranns <nranns@cisco.com>
Tue, 3 Oct 2017 15:20:21 +0000 (08:20 -0700)
committerDamjan Marion <dmarion.lists@gmail.com>
Thu, 5 Oct 2017 09:50:26 +0000 (09:50 +0000)
A distributed virtual router works by attmpeting to switch a packet, but on failing to find a local consumer (i.e. the packet is destined to a locally attached host) then the packet is sent unmodified 'upstream' to where the rest of the 'distributed' router is present. When L3 switching a packet this means the L2 header must not be modifed. This patch adds a 'l2-bridge' object to the L3 FIB which re-injects packets from the L3 path back into the L2 path - use with extreme caution.

Change-Id: I069724eb45956647d7980cbe40a80a788ee6ee82
Signed-off-by: Neale Ranns <nranns@cisco.com>
16 files changed:
src/vnet.am
src/vnet/dpo/dpo.c
src/vnet/dpo/dpo.h
src/vnet/dpo/l2_bridge_dpo.c [new file with mode: 0644]
src/vnet/dpo/l2_bridge_dpo.h [new file with mode: 0644]
src/vnet/fib/fib_api.h
src/vnet/fib/fib_path.c
src/vnet/fib/fib_table.c
src/vnet/fib/fib_test.c
src/vnet/fib/fib_test.h
src/vnet/ip/ip.api
src/vnet/ip/ip_api.c
src/vnet/mpls/mpls_api.c
test/test_dvr.py [new file with mode: 0644]
test/vpp_ip_route.py
test/vpp_papi_provider.py

index 055d3a7..aa3ada1 100644 (file)
@@ -1015,7 +1015,8 @@ libvnet_la_SOURCES +=                             \
   vnet/dpo/interface_rx_dpo.c                          \
   vnet/dpo/interface_tx_dpo.c                          \
   vnet/dpo/mpls_disposition.c                  \
-  vnet/dpo/mpls_label_dpo.c
+  vnet/dpo/mpls_label_dpo.c                    \
+  vnet/dpo/l2_bridge_dpo.c
 
 nobase_include_HEADERS +=                      \
   vnet/dpo/load_balance.h                      \
index bd18b66..e94f347 100644 (file)
@@ -40,6 +40,7 @@
 #include <vnet/dpo/interface_rx_dpo.h>
 #include <vnet/dpo/interface_tx_dpo.h>
 #include <vnet/dpo/mpls_disposition.h>
+#include <vnet/dpo/l2_bridge_dpo.h>
 
 /**
  * Array of char* names for the DPO types and protos
@@ -523,6 +524,7 @@ dpo_module_init (vlib_main_t * vm)
     interface_rx_dpo_module_init();
     interface_tx_dpo_module_init();
     mpls_disp_dpo_module_init();
+    l2_bridge_dpo_module_init();
 
     return (NULL);
 }
index 3356296..d1309c1 100644 (file)
@@ -114,6 +114,7 @@ typedef enum dpo_type_t_ {
     DPO_MFIB_ENTRY,
     DPO_INTERFACE_RX,
     DPO_INTERFACE_TX,
+    DPO_L2_BRIDGE,
     DPO_LAST,
 } __attribute__((packed)) dpo_type_t;
 
@@ -140,7 +141,8 @@ typedef enum dpo_type_t_ {
     [DPO_MPLS_DISPOSITION] = "dpo-mpls-diposition", \
     [DPO_MFIB_ENTRY] = "dpo-mfib_entry", \
     [DPO_INTERFACE_RX] = "dpo-interface-rx",   \
-    [DPO_INTERFACE_TX] = "dpo-interface-tx"    \
+    [DPO_INTERFACE_TX] = "dpo-interface-tx",   \
+    [DPO_L2_BRIDGE] = "dpo-l2-bridge"  \
 }
 
 /**
diff --git a/src/vnet/dpo/l2_bridge_dpo.c b/src/vnet/dpo/l2_bridge_dpo.c
new file mode 100644 (file)
index 0000000..1694781
--- /dev/null
@@ -0,0 +1,375 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/dpo/l2_bridge_dpo.h>
+#include <vnet/fib/fib_node.h>
+#include <vnet/ethernet/ethernet.h>
+
+/*
+ * The 'DB' of L2 bridge DPOs.
+ * There is only one per-interface, so this is a per-interface vector
+ */
+static index_t *l2_bridge_dpo_db;
+
+static l2_bridge_dpo_t *
+l2_bridge_dpo_alloc (void)
+{
+    l2_bridge_dpo_t *l2b;
+
+    pool_get(l2_bridge_dpo_pool, l2b);
+
+    return (l2b);
+}
+
+static inline l2_bridge_dpo_t *
+l2_bridge_dpo_get_from_dpo (const dpo_id_t *dpo)
+{
+    ASSERT(DPO_L2_BRIDGE == dpo->dpoi_type);
+
+    return (l2_bridge_dpo_get(dpo->dpoi_index));
+}
+
+static inline index_t
+l2_bridge_dpo_get_index (l2_bridge_dpo_t *l2b)
+{
+    return (l2b - l2_bridge_dpo_pool);
+}
+
+static void
+l2_bridge_dpo_lock (dpo_id_t *dpo)
+{
+    l2_bridge_dpo_t *l2b;
+
+    l2b = l2_bridge_dpo_get_from_dpo(dpo);
+    l2b->l2b_locks++;
+}
+
+static void
+l2_bridge_dpo_unlock (dpo_id_t *dpo)
+{
+    l2_bridge_dpo_t *l2b;
+
+    l2b = l2_bridge_dpo_get_from_dpo(dpo);
+    l2b->l2b_locks--;
+
+    if (0 == l2b->l2b_locks)
+    {
+        l2_bridge_dpo_db[l2b->l2b_sw_if_index] = INDEX_INVALID;
+        pool_put(l2_bridge_dpo_pool, l2b);
+    }
+}
+
+/*
+ * l2_bridge_dpo_add_or_lock
+ *
+ * Add/create and lock a new or lock an existing for the L2 Bridge
+ * on the interface given
+ */
+void
+l2_bridge_dpo_add_or_lock (u32 sw_if_index,
+                           dpo_id_t *dpo)
+{
+    l2_bridge_dpo_t *l2b;
+
+    vec_validate_init_empty(l2_bridge_dpo_db,
+                            sw_if_index,
+                            INDEX_INVALID);
+
+    if (INDEX_INVALID == l2_bridge_dpo_db[sw_if_index])
+    {
+        l2b = l2_bridge_dpo_alloc();
+
+        l2b->l2b_sw_if_index = sw_if_index;
+
+        l2_bridge_dpo_db[sw_if_index] =
+            l2_bridge_dpo_get_index(l2b);
+    }
+    else
+    {
+        l2b = l2_bridge_dpo_get(l2_bridge_dpo_db[sw_if_index]);
+    }
+
+    dpo_set(dpo, DPO_L2_BRIDGE, DPO_PROTO_ETHERNET, l2_bridge_dpo_get_index(l2b));
+}
+
+
+static clib_error_t *
+l2_bridge_dpo_interface_state_change (vnet_main_t * vnm,
+                                      u32 sw_if_index,
+                                      u32 flags)
+{
+    /*
+     */
+    return (NULL);
+}
+
+VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION(
+    l2_bridge_dpo_interface_state_change);
+
+/**
+ * @brief Registered callback for HW interface state changes
+ */
+static clib_error_t *
+l2_bridge_dpo_hw_interface_state_change (vnet_main_t * vnm,
+                                         u32 hw_if_index,
+                                         u32 flags)
+{
+    return (NULL);
+}
+
+VNET_HW_INTERFACE_LINK_UP_DOWN_FUNCTION(
+    l2_bridge_dpo_hw_interface_state_change);
+
+static clib_error_t *
+l2_bridge_dpo_interface_delete (vnet_main_t * vnm,
+                                u32 sw_if_index,
+                                u32 is_add)
+{
+    return (NULL);
+}
+
+VNET_SW_INTERFACE_ADD_DEL_FUNCTION(
+    l2_bridge_dpo_interface_delete);
+
+u8*
+format_l2_bridge_dpo (u8* s, va_list *ap)
+{
+    index_t index = va_arg(*ap, index_t);
+    CLIB_UNUSED(u32 indent) = va_arg(*ap, u32);
+    vnet_main_t * vnm = vnet_get_main();
+    l2_bridge_dpo_t *l2b = l2_bridge_dpo_get(index);
+
+    return (format(s, "l2-bridge-%U-dpo",
+                   format_vnet_sw_interface_name,
+                   vnm,
+                   vnet_get_sw_interface(vnm, l2b->l2b_sw_if_index)));
+}
+
+static void
+l2_bridge_dpo_mem_show (void)
+{
+    fib_show_memory_usage("L2-bridge",
+                          pool_elts(l2_bridge_dpo_pool),
+                          pool_len(l2_bridge_dpo_pool),
+                          sizeof(l2_bridge_dpo_t));
+}
+
+
+const static dpo_vft_t l2_bridge_dpo_vft = {
+    .dv_lock = l2_bridge_dpo_lock,
+    .dv_unlock = l2_bridge_dpo_unlock,
+    .dv_format = format_l2_bridge_dpo,
+    .dv_mem_show = l2_bridge_dpo_mem_show,
+};
+
+/**
+ * @brief The per-protocol VLIB graph nodes that are assigned to a glean
+ *        object.
+ *
+ * this means that these graph nodes are ones from which a glean is the
+ * parent object in the DPO-graph.
+ */
+const static char* const l2_bridge_dpo_l2_nodes[] =
+{
+    "l2-bridge-dpo",
+    NULL,
+};
+
+const static char* const * const l2_bridge_dpo_nodes[DPO_PROTO_NUM] =
+{
+    [DPO_PROTO_ETHERNET]  = l2_bridge_dpo_l2_nodes,
+};
+
+void
+l2_bridge_dpo_module_init (void)
+{
+    dpo_register(DPO_L2_BRIDGE,
+                 &l2_bridge_dpo_vft,
+                 l2_bridge_dpo_nodes);
+}
+
+/**
+ * @brief Interface DPO trace data
+ */
+typedef struct l2_bridge_dpo_trace_t_
+{
+    u32 sw_if_index;
+} l2_bridge_dpo_trace_t;
+
+typedef enum l2_bridge_dpo_next_t_
+{
+    L2_BRIDGE_DPO_DROP = 0,
+    L2_BRIDGE_DPO_OUTPUT = 1,
+} l2_bridge_dpo_next_t;
+
+always_inline uword
+l2_bridge_dpo_inline (vlib_main_t * vm,
+                      vlib_node_runtime_t * node,
+                      vlib_frame_t * from_frame)
+{
+    u32 n_left_from, next_index, * from, * to_next;
+
+    from = vlib_frame_vector_args (from_frame);
+    n_left_from = from_frame->n_vectors;
+
+    next_index = node->cached_next_index;
+
+    while (n_left_from > 0)
+    {
+        u32 n_left_to_next;
+
+        vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next);
+
+        while (n_left_from >= 4 && n_left_to_next > 2)
+        {
+            const l2_bridge_dpo_t *l2b0, *l2b1;
+            u32 bi0, l2bi0, bi1, l2bi1;
+            vlib_buffer_t *b0, *b1;
+            u8 len0, len1;
+
+            bi0 = from[0];
+            to_next[0] = bi0;
+            bi1 = from[1];
+            to_next[1] = bi1;
+            from += 2;
+            to_next += 2;
+            n_left_from -= 2;
+            n_left_to_next -= 2;
+
+            b0 = vlib_get_buffer (vm, bi0);
+            b1 = vlib_get_buffer (vm, bi1);
+
+            l2bi0 = vnet_buffer(b0)->ip.adj_index[VLIB_TX];
+            l2bi1 = vnet_buffer(b1)->ip.adj_index[VLIB_TX];
+            l2b0 = l2_bridge_dpo_get(l2bi0);
+            l2b1 = l2_bridge_dpo_get(l2bi1);
+
+            vnet_buffer(b0)->sw_if_index[VLIB_TX] = l2b0->l2b_sw_if_index;
+            vnet_buffer(b1)->sw_if_index[VLIB_TX] = l2b1->l2b_sw_if_index;
+
+            len0 = ((u8*)vlib_buffer_get_current(b0) -
+                    (u8*)ethernet_buffer_get_header(b0));
+            len1 = ((u8*)vlib_buffer_get_current(b1) -
+                    (u8*)ethernet_buffer_get_header(b1));
+            vnet_buffer(b0)->l2.l2_len = len0;
+            vnet_buffer(b1)->l2.l2_len = len1;
+
+            vlib_buffer_advance(b0, -len0);
+            vlib_buffer_advance(b1, -len1);
+
+            if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
+            {
+                l2_bridge_dpo_trace_t *tr0;
+
+                tr0 = vlib_add_trace (vm, node, b0, sizeof (*tr0));
+                tr0->sw_if_index = l2b0->l2b_sw_if_index;
+            }
+            if (PREDICT_FALSE(b1->flags & VLIB_BUFFER_IS_TRACED))
+            {
+                l2_bridge_dpo_trace_t *tr1;
+
+                tr1 = vlib_add_trace (vm, node, b1, sizeof (*tr1));
+                tr1->sw_if_index = l2b1->l2b_sw_if_index;
+            }
+
+            vlib_validate_buffer_enqueue_x2(vm, node, next_index, to_next,
+                                            n_left_to_next, bi0, bi1,
+                                            L2_BRIDGE_DPO_OUTPUT,
+                                            L2_BRIDGE_DPO_OUTPUT);
+        }
+
+        while (n_left_from > 0 && n_left_to_next > 0)
+        {
+            const l2_bridge_dpo_t * l2b0;
+            vlib_buffer_t * b0;
+            u32 bi0, l2bi0;
+            u8 len0;
+
+            bi0 = from[0];
+            to_next[0] = bi0;
+            from += 1;
+            to_next += 1;
+            n_left_from -= 1;
+            n_left_to_next -= 1;
+
+            b0 = vlib_get_buffer (vm, bi0);
+
+            l2bi0 = vnet_buffer(b0)->ip.adj_index[VLIB_TX];
+            l2b0 = l2_bridge_dpo_get(l2bi0);
+
+            vnet_buffer(b0)->sw_if_index[VLIB_TX] = l2b0->l2b_sw_if_index;
+
+            /*
+             * take that, and rewind it back...
+             */
+            len0 = ((u8*)vlib_buffer_get_current(b0) -
+                    (u8*)ethernet_buffer_get_header(b0));
+            vnet_buffer(b0)->l2.l2_len = len0;
+            vlib_buffer_advance(b0, -len0);
+
+            if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
+            {
+                l2_bridge_dpo_trace_t *tr;
+
+                tr = vlib_add_trace (vm, node, b0, sizeof (*tr));
+                tr->sw_if_index = l2b0->l2b_sw_if_index;
+            }
+
+            vlib_validate_buffer_enqueue_x1(vm, node, next_index, to_next,
+                                            n_left_to_next, bi0,
+                                            L2_BRIDGE_DPO_OUTPUT);
+        }
+        vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+    }
+    return from_frame->n_vectors;
+}
+
+static u8 *
+format_l2_bridge_dpo_trace (u8 * s, va_list * args)
+{
+    CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+    CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+    l2_bridge_dpo_trace_t * t = va_arg (*args, l2_bridge_dpo_trace_t *);
+    uword indent = format_get_indent (s);
+    s = format (s, "%U sw_if_index:%d",
+                format_white_space, indent,
+                t->sw_if_index);
+    return s;
+}
+
+static uword
+l2_bridge_dpo_l2 (vlib_main_t * vm,
+                  vlib_node_runtime_t * node,
+                  vlib_frame_t * from_frame)
+{
+    return (l2_bridge_dpo_inline(vm, node, from_frame));
+}
+
+
+VLIB_REGISTER_NODE (l2_bridge_dpo_l2_node) = {
+    .function = l2_bridge_dpo_l2,
+    .name = "l2-bridge-dpo",
+    .vector_size = sizeof (u32),
+    .format_trace = format_l2_bridge_dpo_trace,
+
+    .n_next_nodes = 2,
+    .next_nodes = {
+        [L2_BRIDGE_DPO_DROP] = "error-drop",
+        [L2_BRIDGE_DPO_OUTPUT] = "l2-output",
+    },
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (l2_bridge_dpo_l2_node,
+                              l2_bridge_dpo_l2)
diff --git a/src/vnet/dpo/l2_bridge_dpo.h b/src/vnet/dpo/l2_bridge_dpo.h
new file mode 100644 (file)
index 0000000..0a20dd7
--- /dev/null
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __L2_BRIDGE_DPO_H__
+#define __L2_BRIDGE_DPO_H__
+
+#include <vnet/dpo/dpo.h>
+
+/**
+ * @brief
+ * The data-path object representing an L2 bridge.
+ * If a packet encounters an object of this type in the L3 data-path, it
+ * is injected back into the L2 bridge.
+ */
+typedef struct l2_bridge_dpo_t_
+{
+    /**
+     * The Software interface index that the packets will output on
+     */
+    u32 l2b_sw_if_index;
+
+    /**
+     * number of locks.
+     */
+    u16 l2b_locks;
+} l2_bridge_dpo_t;
+
+extern void l2_bridge_dpo_add_or_lock (u32 sw_if_index,
+                                       dpo_id_t *dpo);
+
+extern void l2_bridge_dpo_module_init(void);
+
+/**
+ * @brief pool of all interface DPOs
+ */
+l2_bridge_dpo_t *l2_bridge_dpo_pool;
+
+static inline l2_bridge_dpo_t *
+l2_bridge_dpo_get (index_t index)
+{
+    return (pool_elt_at_index(l2_bridge_dpo_pool, index));
+}
+
+#endif
index f5a107c..c369e8f 100644 (file)
@@ -40,6 +40,7 @@ add_del_route_t_handler (u8 is_multipath,
                         u8 is_resolve_attached,
                         u8 is_interface_rx,
                          u8 is_rpf_id,
+                         u8 is_l2_bridged,
                         u32 fib_index,
                         const fib_prefix_t * prefix,
                         dpo_proto_t next_hop_proto,
index f126333..889d17d 100644 (file)
@@ -23,6 +23,7 @@
 #include <vnet/dpo/lookup_dpo.h>
 #include <vnet/dpo/interface_rx_dpo.h>
 #include <vnet/dpo/mpls_disposition.h>
+#include <vnet/dpo/l2_bridge_dpo.h>
 
 #include <vnet/adj/adj.h>
 #include <vnet/adj/adj_mcast.h>
@@ -771,11 +772,18 @@ fib_path_unresolve (fib_path_t *path)
        }
        break;
     case FIB_PATH_TYPE_ATTACHED_NEXT_HOP:
-    case FIB_PATH_TYPE_ATTACHED:
        adj_child_remove(path->fp_dpo.dpoi_index,
                         path->fp_sibling);
         adj_unlock(path->fp_dpo.dpoi_index);
         break;
+    case FIB_PATH_TYPE_ATTACHED:
+        if (DPO_PROTO_ETHERNET != path->fp_nh_proto)
+        {
+            adj_child_remove(path->fp_dpo.dpoi_index,
+                             path->fp_sibling);
+            adj_unlock(path->fp_dpo.dpoi_index);
+        }
+        break;
     case FIB_PATH_TYPE_EXCLUSIVE:
        dpo_reset(&path->exclusive.fp_ex_dpo);
         break;
@@ -1594,28 +1602,35 @@ fib_path_resolve (fib_node_index_t path_index)
        fib_path_attached_next_hop_set(path);
        break;
     case FIB_PATH_TYPE_ATTACHED:
-       /*
-        * path->attached.fp_interface
-        */
-       if (!vnet_sw_interface_is_admin_up(vnet_get_main(),
-                                          path->attached.fp_interface))
-       {
-           path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RESOLVED;
-       }
-        dpo_set(&path->fp_dpo,
-                DPO_ADJACENCY,
-                path->fp_nh_proto,
-                fib_path_attached_get_adj(path,
-                                          dpo_proto_to_link(path->fp_nh_proto)));
-
-       /*
-        * become a child of the adjacency so we receive updates
-        * when the interface state changes
-        */
-       path->fp_sibling = adj_child_add(path->fp_dpo.dpoi_index,
-                                        FIB_NODE_TYPE_PATH,
-                                        fib_path_get_index(path));
+        if (DPO_PROTO_ETHERNET == path->fp_nh_proto)
+        {
+            l2_bridge_dpo_add_or_lock(path->attached.fp_interface,
+                                      &path->fp_dpo);
+        }
+        else
+        {
+            /*
+             * path->attached.fp_interface
+             */
+            if (!vnet_sw_interface_is_admin_up(vnet_get_main(),
+                                               path->attached.fp_interface))
+            {
+                path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RESOLVED;
+            }
+            dpo_set(&path->fp_dpo,
+                    DPO_ADJACENCY,
+                    path->fp_nh_proto,
+                    fib_path_attached_get_adj(path,
+                                              dpo_proto_to_link(path->fp_nh_proto)));
 
+            /*
+             * become a child of the adjacency so we receive updates
+             * when the interface state changes
+             */
+            path->fp_sibling = adj_child_add(path->fp_dpo.dpoi_index,
+                                             FIB_NODE_TYPE_PATH,
+                                             fib_path_get_index(path));
+        }
        break;
     case FIB_PATH_TYPE_RECURSIVE:
     {
@@ -1996,6 +2011,11 @@ fib_path_contribute_forwarding (fib_node_index_t path_index,
            dpo_copy(dpo, &path->exclusive.fp_ex_dpo);
            break;
         case FIB_PATH_TYPE_ATTACHED:
+            if (DPO_PROTO_ETHERNET == path->fp_nh_proto)
+            {
+                dpo_copy(dpo, &path->fp_dpo);
+                break;
+            }
            switch (fct)
            {
            case FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS:
index 6daa61c..d5625d8 100644 (file)
@@ -481,7 +481,8 @@ fib_table_route_path_fixup (const fib_prefix_t *prefix,
     }
     if (fib_prefix_is_host(prefix) &&
        ip46_address_is_zero(&path->frp_addr) &&
-       path->frp_sw_if_index != ~0)
+       path->frp_sw_if_index != ~0 &&
+        path->frp_proto != DPO_PROTO_ETHERNET)
     {
        path->frp_addr = prefix->fp_addr;
         path->frp_flags |= FIB_ROUTE_PATH_ATTACHED;
index 64d9047..03c9ee7 100644 (file)
 #include <vnet/bfd/bfd_main.h>
 #include <vnet/dpo/interface_rx_dpo.h>
 #include <vnet/dpo/replicate_dpo.h>
+#include <vnet/dpo/l2_bridge_dpo.h>
 
 #include <vnet/mpls/mpls.h>
 
+#include <vnet/fib/fib_test.h>
 #include <vnet/fib/fib_path_list.h>
 #include <vnet/fib/fib_entry_src.h>
 #include <vnet/fib/fib_walk.h>
@@ -266,83 +268,6 @@ fib_test_build_rewrite (u8 *eth_addr)
     return (rewrite);
 }
 
-typedef enum fib_test_lb_bucket_type_t_ {
-    FT_LB_LABEL_O_ADJ,
-    FT_LB_LABEL_STACK_O_ADJ,
-    FT_LB_LABEL_O_LB,
-    FT_LB_O_LB,
-    FT_LB_SPECIAL,
-    FT_LB_ADJ,
-    FT_LB_INTF,
-} fib_test_lb_bucket_type_t;
-
-typedef struct fib_test_lb_bucket_t_ {
-    fib_test_lb_bucket_type_t type;
-
-    union
-    {
-       struct
-       {
-           mpls_eos_bit_t eos;
-           mpls_label_t label;
-           u8 ttl;
-           adj_index_t adj;
-       } label_o_adj;
-       struct
-       {
-           mpls_eos_bit_t eos;
-           mpls_label_t label_stack[8];
-           u8 label_stack_size;
-           u8 ttl;
-           adj_index_t adj;
-       } label_stack_o_adj;
-       struct
-       {
-           mpls_eos_bit_t eos;
-           mpls_label_t label;
-           u8 ttl;
-           index_t lb;
-       } label_o_lb;
-       struct
-       {
-           index_t adj;
-       } adj;
-       struct
-       {
-           index_t lb;
-       } lb;
-       struct
-       {
-           index_t adj;
-       } special;
-    };
-} fib_test_lb_bucket_t;
-
-typedef enum fib_test_rep_bucket_type_t_ {
-    FT_REP_LABEL_O_ADJ,
-    FT_REP_DISP_MFIB_LOOKUP,
-    FT_REP_INTF,
-} fib_test_rep_bucket_type_t;
-
-typedef struct fib_test_rep_bucket_t_ {
-    fib_test_rep_bucket_type_t type;
-
-    union
-    {
-       struct
-       {
-           mpls_eos_bit_t eos;
-           mpls_label_t label;
-           u8 ttl;
-           adj_index_t adj;
-       } label_o_adj;
-       struct
-       {
-           adj_index_t adj;
-       } adj;
-   };
-} fib_test_rep_bucket_t;
-
 #define FIB_TEST_LB(_cond, _comment, _args...)                 \
 {                                                              \
     if (!FIB_TEST_I(_cond, _comment, ##_args)) {               \
@@ -598,6 +523,16 @@ fib_test_validate_lb_v (const load_balance_t *lb,
                        bucket,
                        exp->adj.adj);
            break;
+       case FT_LB_L2:
+           FIB_TEST_I((DPO_L2_BRIDGE == dpo->dpoi_type),
+                      "bucket %d stacks on %U",
+                      bucket,
+                      format_dpo_type, dpo->dpoi_type);
+           FIB_TEST_LB((exp->adj.adj == dpo->dpoi_index),
+                       "bucket %d stacks on adj %d",
+                       bucket,
+                       exp->adj.adj);
+           break;
        case FT_LB_O_LB:
            FIB_TEST_I((DPO_LOAD_BALANCE == dpo->dpoi_type),
                        "bucket %d stacks on %U",
@@ -4066,6 +4001,45 @@ fib_test_v4 (void)
              "Table and LB newhash config match: %U",
              format_ip_flow_hash_config, lb->lb_hash_config);
 
+    /*
+     * A route via an L2 Bridge
+     */
+    fei = fib_table_entry_path_add(fib_index,
+                                   &pfx_10_10_10_3_s_32,
+                                   FIB_SOURCE_API,
+                                   FIB_ENTRY_FLAG_NONE,
+                                   DPO_PROTO_ETHERNET,
+                                   &zero_addr,
+                                   tm->hw[0]->sw_if_index,
+                                   ~0,
+                                   1,
+                                   NULL,
+                                   FIB_ROUTE_PATH_FLAG_NONE);
+    dpo_id_t l2_dpo = DPO_INVALID;
+    l2_bridge_dpo_add_or_lock(tm->hw[0]->sw_if_index, &l2_dpo);
+    fib_test_lb_bucket_t ip_o_l2 = {
+        .type = FT_LB_L2,
+        .adj = {
+            .adj = l2_dpo.dpoi_index,
+        },
+    };
+
+    FIB_TEST(fib_test_validate_entry(fei,
+                                     FIB_FORW_CHAIN_TYPE_UNICAST_IP4,
+                                     1,
+                                     &ip_o_l2),
+             "10.10.10.3 via L2 on Eth0");
+    fib_table_entry_path_remove(fib_index,
+                                &pfx_10_10_10_3_s_32,
+                                FIB_SOURCE_API,
+                                DPO_PROTO_ETHERNET,
+                                &zero_addr,
+                                tm->hw[0]->sw_if_index,
+                                fib_index,
+                                1,
+                                FIB_ROUTE_PATH_FLAG_NONE);
+    dpo_reset(&l2_dpo);
+
     /*
      * CLEANUP
      *    remove adj-fibs: 
@@ -4165,6 +4139,8 @@ fib_test_v4 (void)
             pool_elts(load_balance_map_pool));
     FIB_TEST((lb_count == pool_elts(load_balance_pool)), "LB pool size is %d",
              pool_elts(load_balance_pool));
+    FIB_TEST((0 == pool_elts(l2_bridge_dpo_pool)), "L2 DPO pool size is %d",
+             pool_elts(l2_bridge_dpo_pool));
 
     return 0;
 }
index 3692f57..5adc52e 100644 (file)
@@ -26,6 +26,7 @@ typedef enum fib_test_lb_bucket_type_t_ {
     FT_LB_SPECIAL,
     FT_LB_ADJ,
     FT_LB_INTF,
+    FT_LB_L2,
 } fib_test_lb_bucket_type_t;
 
 typedef struct fib_test_lb_bucket_t_ {
@@ -72,6 +73,7 @@ typedef struct fib_test_lb_bucket_t_ {
 
 typedef enum fib_test_rep_bucket_type_t_ {
     FT_REP_LABEL_O_ADJ,
+    FT_REP_DISP_MFIB_LOOKUP,
     FT_REP_INTF,
 } fib_test_rep_bucket_type_t;
 
index f26d794..4b7019f 100644 (file)
@@ -397,6 +397,7 @@ autoreply define ip_add_del_route
   u8 is_multipath;
   u8 is_resolve_host;
   u8 is_resolve_attached;
+  u8 is_l2_bridged;
   /* Is last/not-last message in group of multiple add/del messages. */
   u8 not_last;
   u8 next_hop_weight;
index 6981c84..c34ec57 100644 (file)
@@ -769,6 +769,7 @@ add_del_route_t_handler (u8 is_multipath,
                         u8 is_resolve_attached,
                         u8 is_interface_rx,
                         u8 is_rpf_id,
+                        u8 is_l2_bridged,
                         u32 fib_index,
                         const fib_prefix_t * prefix,
                         dpo_proto_t next_hop_proto,
@@ -806,6 +807,8 @@ add_del_route_t_handler (u8 is_multipath,
       path.frp_local_label = next_hop_via_label;
       path.frp_eos = MPLS_NON_EOS;
     }
+  if (is_l2_bridged)
+    path.frp_proto = DPO_PROTO_ETHERNET;
   if (is_resolve_host)
     path_flags |= FIB_ROUTE_PATH_RESOLVE_VIA_HOST;
   if (is_resolve_attached)
@@ -1043,6 +1046,7 @@ ip4_add_del_route_t_handler (vl_api_ip_add_del_route_t * mp)
                                   mp->classify_table_index,
                                   mp->is_resolve_host,
                                   mp->is_resolve_attached, 0, 0,
+                                  mp->is_l2_bridged,
                                   fib_index, &pfx, DPO_PROTO_IP4,
                                   &nh,
                                   ntohl (mp->next_hop_sw_if_index),
@@ -1102,6 +1106,7 @@ ip6_add_del_route_t_handler (vl_api_ip_add_del_route_t * mp)
                                   mp->classify_table_index,
                                   mp->is_resolve_host,
                                   mp->is_resolve_attached, 0, 0,
+                                  mp->is_l2_bridged,
                                   fib_index, &pfx, DPO_PROTO_IP6,
                                   &nh, ntohl (mp->next_hop_sw_if_index),
                                   next_hop_fib_index,
index 762c40f..e41466e 100644 (file)
@@ -223,13 +223,7 @@ mpls_route_add_del_t_handler (vnet_main_t * vnm,
                                   0,   // mp->is_unreach,
                                   0,   // mp->is_prohibit,
                                   0,   // mp->is_local,
-                                  mp->mr_is_multicast,
-                                  mp->mr_is_classify,
-                                  mp->mr_classify_table_index,
-                                  mp->mr_is_resolve_host,
-                                  mp->mr_is_resolve_attached,
-                                  mp->mr_is_interface_rx,
-                                  mp->mr_is_rpf_id,
+                                  mp->mr_is_multicast, mp->mr_is_classify, mp->mr_classify_table_index, mp->mr_is_resolve_host, mp->mr_is_resolve_attached, mp->mr_is_interface_rx, mp->mr_is_rpf_id, 0,       // l2_bridged
                                   fib_index, &pfx,
                                   mp->mr_next_hop_proto,
                                   &nh, ntohl (mp->mr_next_hop_sw_if_index),
diff --git a/test/test_dvr.py b/test/test_dvr.py
new file mode 100644 (file)
index 0000000..27522a5
--- /dev/null
@@ -0,0 +1,178 @@
+#!/usr/bin/env python
+import random
+import socket
+import unittest
+
+from framework import VppTestCase, VppTestRunner
+from vpp_sub_interface import VppSubInterface, VppDot1QSubint
+from vpp_ip_route import VppIpRoute, VppRoutePath, DpoProto
+from vpp_papi_provider import L2_VTR_OP
+
+from scapy.packet import Raw
+from scapy.layers.l2 import Ether, Dot1Q, ARP
+from scapy.layers.inet import IP, UDP
+from util import ppp
+
+
+class TestDVR(VppTestCase):
+    """ IPv4 Load-Balancing """
+
+    def setUp(self):
+        super(TestDVR, self).setUp()
+
+        self.create_pg_interfaces(range(4))
+        self.create_loopback_interfaces(range(1))
+
+        for i in self.pg_interfaces:
+            i.admin_up()
+
+        self.loop0.config_ip4()
+
+    def tearDown(self):
+        for i in self.pg_interfaces:
+            i.admin_down()
+        self.loop0.unconfig_ip4()
+
+        super(TestDVR, self).tearDown()
+
+    def test_dvr(self):
+        """ Distributed Virtual Router """
+
+        #
+        # A packet destined to an IP address that is L2 bridged via
+        # a non-tag interface
+        #
+        ip_non_tag_bridged = "10.10.10.10"
+        ip_tag_bridged = "10.10.10.11"
+        any_src_addr = "1.1.1.1"
+
+        pkt_no_tag = (Ether(src=self.pg0.remote_mac,
+                            dst=self.loop0.local_mac) /
+                      IP(src=any_src_addr,
+                         dst=ip_non_tag_bridged) /
+                      UDP(sport=1234, dport=1234) /
+                      Raw('\xa5' * 100))
+        pkt_tag = (Ether(src=self.pg0.remote_mac,
+                         dst=self.loop0.local_mac) /
+                   IP(src=any_src_addr,
+                      dst=ip_tag_bridged) /
+                   UDP(sport=1234, dport=1234) /
+                   Raw('\xa5' * 100))
+
+        #
+        # Two sub-interfaces so we can test VLAN tag push/pop
+        #
+        sub_if_on_pg2 = VppDot1QSubint(self, self.pg2, 92)
+        sub_if_on_pg3 = VppDot1QSubint(self, self.pg3, 93)
+        sub_if_on_pg2.admin_up()
+        sub_if_on_pg3.admin_up()
+
+        #
+        # Put all the interfaces into a new bridge domain
+        #
+        self.vapi.sw_interface_set_l2_bridge(self.pg0.sw_if_index, 1)
+        self.vapi.sw_interface_set_l2_bridge(self.pg1.sw_if_index, 1)
+        self.vapi.sw_interface_set_l2_bridge(sub_if_on_pg2.sw_if_index, 1)
+        self.vapi.sw_interface_set_l2_bridge(sub_if_on_pg3.sw_if_index, 1)
+        self.vapi.sw_interface_set_l2_bridge(self.loop0.sw_if_index, 1, bvi=1)
+
+        self.vapi.sw_interface_set_l2_tag_rewrite(sub_if_on_pg2.sw_if_index,
+                                                  L2_VTR_OP.L2_POP_1,
+                                                  92)
+        self.vapi.sw_interface_set_l2_tag_rewrite(sub_if_on_pg3.sw_if_index,
+                                                  L2_VTR_OP.L2_POP_1,
+                                                  93)
+
+        self.logger.error(self.vapi.ppcli("show bridge-domain 1 detail"))
+
+        #
+        # Add routes to bridge the traffic via a tagged an nontagged interface
+        #
+        route_no_tag = VppIpRoute(
+            self, ip_non_tag_bridged, 32,
+            [VppRoutePath("0.0.0.0",
+                          self.pg1.sw_if_index,
+                          proto=DpoProto.DPO_PROTO_ETHERNET)])
+        route_no_tag.add_vpp_config()
+
+        #
+        # Inject the packet that arrives and leaves on a non-tagged interface
+        # Since it's 'bridged' expect that the MAC headed is unchanged.
+        #
+        self.pg0.add_stream(pkt_no_tag)
+
+        self.pg_enable_capture(self.pg_interfaces)
+        self.pg_start()
+
+        rx = self.pg1.get_capture(1)
+
+        self.assertEqual(rx[0][Ether].dst, pkt_no_tag[Ether].dst)
+        self.assertEqual(rx[0][Ether].src, pkt_no_tag[Ether].src)
+
+        #
+        # Add routes to bridge the traffic via a tagged interface
+        #
+        route_no_tag = VppIpRoute(
+            self, ip_tag_bridged, 32,
+            [VppRoutePath("0.0.0.0",
+                          sub_if_on_pg3.sw_if_index,
+                          proto=DpoProto.DPO_PROTO_ETHERNET)])
+        route_no_tag.add_vpp_config()
+
+        #
+        # Inject the packet that arrives and leaves on a non-tagged interface
+        # Since it's 'bridged' expect that the MAC headed is unchanged.
+        #
+        self.pg0.add_stream(pkt_tag)
+
+        self.pg_enable_capture(self.pg_interfaces)
+        self.pg_start()
+
+        rx = self.pg3.get_capture(1)
+
+        self.assertEqual(rx[0][Ether].dst, pkt_tag[Ether].dst)
+        self.assertEqual(rx[0][Ether].src, pkt_tag[Ether].src)
+        self.assertEqual(rx[0][Dot1Q].vlan, 93)
+
+        #
+        # Tag to tag
+        #
+        pkt_tag_to_tag = (Ether(src=self.pg2.remote_mac,
+                                dst=self.loop0.local_mac) /
+                          Dot1Q(vlan=92) /
+                          IP(src=any_src_addr,
+                             dst=ip_tag_bridged) /
+                          UDP(sport=1234, dport=1234) /
+                          Raw('\xa5' * 100))
+
+        self.pg2.add_stream(pkt_tag_to_tag)
+        self.pg_enable_capture(self.pg_interfaces)
+        self.pg_start()
+        rx = self.pg3.get_capture(1)
+
+        self.assertEqual(rx[0][Ether].dst, pkt_tag_to_tag[Ether].dst)
+        self.assertEqual(rx[0][Ether].src, pkt_tag_to_tag[Ether].src)
+        self.assertEqual(rx[0][Dot1Q].vlan, 93)
+
+        #
+        # Tag to non-Tag
+        #
+        pkt_tag_to_non_tag = (Ether(src=self.pg2.remote_mac,
+                                    dst=self.loop0.local_mac) /
+                              Dot1Q(vlan=92) /
+                              IP(src=any_src_addr,
+                                 dst=ip_non_tag_bridged) /
+                              UDP(sport=1234, dport=1234) /
+                              Raw('\xa5' * 100))
+
+        self.pg2.add_stream(pkt_tag_to_non_tag)
+        self.pg_enable_capture(self.pg_interfaces)
+        self.pg_start()
+        rx = self.pg1.get_capture(1)
+
+        self.assertEqual(rx[0][Ether].dst, pkt_tag_to_tag[Ether].dst)
+        self.assertEqual(rx[0][Ether].src, pkt_tag_to_tag[Ether].src)
+        self.assertFalse(rx[0].haslayer(Dot1Q))
+
+if __name__ == '__main__':
+    unittest.main(testRunner=VppTestRunner)
index b799379..e670230 100644 (file)
@@ -193,6 +193,8 @@ class VppIpRoute(VppObject):
                     next_hop_via_label=path.nh_via_label,
                     next_hop_table_id=path.nh_table_id,
                     is_ipv6=self.is_ip6,
+                    is_l2_bridged=1
+                    if path.proto == DpoProto.DPO_PROTO_ETHERNET else 0,
                     is_resolve_host=path.is_resolve_host,
                     is_resolve_attached=path.is_resolve_attached,
                     is_multipath=1 if len(self.paths) > 1 else 0)
index b6759ec..f9f54da 100644 (file)
@@ -714,6 +714,7 @@ class VppPapiProvider(object):
             is_local=0,
             is_classify=0,
             is_multipath=0,
+            is_l2_bridged=0,
             not_last=0):
         """
 
@@ -754,6 +755,7 @@ class VppPapiProvider(object):
              'is_multipath': is_multipath,
              'is_resolve_host': is_resolve_host,
              'is_resolve_attached': is_resolve_attached,
+             'is_l2_bridged': is_l2_bridged,
              'not_last': not_last,
              'next_hop_weight': next_hop_weight,
              'dst_address_length': dst_address_length,