MAP pre-resolve - use FIB to track pre-resolved next-hop 23/5623/4
authorNeale Ranns <nranns@cisco.com>
Tue, 21 Feb 2017 02:23:41 +0000 (18:23 -0800)
committerOle Trøan <otroan@employees.org>
Thu, 9 Mar 2017 13:24:11 +0000 (13:24 +0000)
Change-Id: I9ea16881caf7aee57f0daf4ac2e8b82c672f87e9
Signed-off-by: Neale Ranns <nranns@cisco.com>
src/vnet/fib/fib_node.h
src/vnet/map/ip4_map.c
src/vnet/map/ip6_map.c
src/vnet/map/map.c
src/vnet/map/map.h
test/test_map.py [new file with mode: 0644]
test/vpp_papi_provider.py

index 457dfb7..496929a 100644 (file)
@@ -41,6 +41,7 @@ typedef enum fib_node_type_t_ {
     FIB_NODE_TYPE_LISP_ADJ,
     FIB_NODE_TYPE_GRE_TUNNEL,
     FIB_NODE_TYPE_VXLAN_TUNNEL,
+    FIB_NODE_TYPE_MAP_E,
     /**
      * Marker. New types before this one. leave the test last.
      */
@@ -63,6 +64,7 @@ typedef enum fib_node_type_t_ {
     [FIB_NODE_TYPE_LISP_ADJ] = "lisp-adj", \
     [FIB_NODE_TYPE_GRE_TUNNEL] = "gre-tunnel", \
     [FIB_NODE_TYPE_VXLAN_TUNNEL] = "vxlan-tunnel", \
+    [FIB_NODE_TYPE_MAP_E] = "map-e", \
 }
 
 /**
index 9fd10f6..2be9ad3 100644 (file)
@@ -173,18 +173,10 @@ static_always_inline bool
 ip4_map_ip6_lookup_bypass (vlib_buffer_t * p0, ip4_header_t * ip)
 {
 #ifdef MAP_SKIP_IP6_LOOKUP
-  map_main_t *mm = &map_main;
-  u32 adj_index0 = mm->adj6_index;
-  if (adj_index0 > 0)
+  if (FIB_NODE_INDEX_INVALID != pre_resolved[FIB_PROTOCOL_IP6].fei)
     {
-      ip_lookup_main_t *lm6 = &ip6_main.lookup_main;
-      ip_adjacency_t *adj = ip_get_adjacency (lm6, mm->adj6_index);
-      if (adj->n_adj > 1)
-       {
-         u32 hash_c0 = ip4_compute_flow_hash (ip, IP_FLOW_HASH_DEFAULT);
-         adj_index0 += (hash_c0 & (adj->n_adj - 1));
-       }
-      vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
+      vnet_buffer (p0)->ip.adj_index[VLIB_TX] =
+       pre_resolved[FIB_PROTOCOL_IP6].dpo.dpoi_index;
       return (true);
     }
 #endif
@@ -773,7 +765,7 @@ VLIB_REGISTER_NODE(ip4_map_node) = {
   .next_nodes = {
     [IP4_MAP_NEXT_IP6_LOOKUP] = "ip6-lookup",
 #ifdef MAP_SKIP_IP6_LOOKUP
-    [IP4_MAP_NEXT_IP6_REWRITE] = "ip6-rewrite",
+    [IP4_MAP_NEXT_IP6_REWRITE] = "ip6-load-balance",
 #endif
     [IP4_MAP_NEXT_IP4_FRAGMENT] = "ip4-frag",
     [IP4_MAP_NEXT_IP6_FRAGMENT] = "ip6-frag",
index d294505..f7eb768 100644 (file)
@@ -151,18 +151,10 @@ static_always_inline bool
 ip6_map_ip4_lookup_bypass (vlib_buffer_t * p0, ip4_header_t * ip)
 {
 #ifdef MAP_SKIP_IP6_LOOKUP
-  map_main_t *mm = &map_main;
-  u32 adj_index0 = mm->adj4_index;
-  if (adj_index0 > 0)
+  if (FIB_NODE_INDEX_INVALID != pre_resolved[FIB_PROTOCOL_IP4].fei)
     {
-      ip_lookup_main_t *lm4 = &ip4_main.lookup_main;
-      ip_adjacency_t *adj = ip_get_adjacency (lm4, mm->adj4_index);
-      if (adj->n_adj > 1)
-       {
-         u32 hash_c0 = ip4_compute_flow_hash (ip, IP_FLOW_HASH_DEFAULT);
-         adj_index0 += (hash_c0 & (adj->n_adj - 1));
-       }
-      vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
+      vnet_buffer (p0)->ip.adj_index[VLIB_TX] =
+       pre_resolved[FIB_PROTOCOL_IP4].dpo.dpoi_index;
       return (true);
     }
 #endif
@@ -1195,7 +1187,7 @@ VLIB_REGISTER_NODE(ip6_map_node) = {
   .next_nodes = {
     [IP6_MAP_NEXT_IP4_LOOKUP] = "ip4-lookup",
 #ifdef MAP_SKIP_IP6_LOOKUP
-    [IP6_MAP_NEXT_IP4_REWRITE] = "ip4-rewrite",
+    [IP6_MAP_NEXT_IP4_REWRITE] = "ip4-load-balance",
 #endif
     [IP6_MAP_NEXT_IP6_REASS] = "ip6-map-ip6-reass",
     [IP6_MAP_NEXT_IP4_REASS] = "ip6-map-ip4-reass",
index a2d2811..6823a46 100644 (file)
@@ -41,6 +41,7 @@ crc_u32 (u32 data, u32 value)
 }
 #endif
 
+
 /*
  * This code supports the following MAP modes:
  *
@@ -437,23 +438,141 @@ map_add_del_psid (u32 map_domain_index, u16 psid, ip6_address_t * tep,
 }
 
 #ifdef MAP_SKIP_IP6_LOOKUP
+/**
+ * Pre-resolvd per-protocol global next-hops
+ */
+map_main_pre_resolved_t pre_resolved[FIB_PROTOCOL_MAX];
+
 static void
-map_pre_resolve (ip4_address_t * ip4, ip6_address_t * ip6)
+map_pre_resolve_init (map_main_pre_resolved_t * pr)
 {
-  map_main_t *mm = &map_main;
-  ip6_main_t *im6 = &ip6_main;
+  pr->fei = FIB_NODE_INDEX_INVALID;
+  fib_node_init (&pr->node, FIB_NODE_TYPE_MAP_E);
+}
+
+static u8 *
+format_map_pre_resolve (u8 * s, va_list ap)
+{
+  map_main_pre_resolved_t *pr = va_arg (ap, map_main_pre_resolved_t *);
+
+  if (FIB_NODE_INDEX_INVALID != pr->fei)
+    {
+      fib_prefix_t pfx;
+
+      fib_entry_get_prefix (pr->fei, &pfx);
+
+      return (format (s, "%U (%u)",
+                     format_ip46_address, &pfx.fp_addr, IP46_TYPE_ANY,
+                     pr->dpo.dpoi_index));
+    }
+  else
+    {
+      return (format (s, "un-set"));
+    }
+}
+
+
+/**
+ * Function definition to inform the FIB node that its last lock has gone.
+ */
+static void
+map_last_lock_gone (fib_node_t * node)
+{
+  /*
+   * The MAP is a root of the graph. As such
+   * it never has children and thus is never locked.
+   */
+  ASSERT (0);
+}
+
+static map_main_pre_resolved_t *
+map_from_fib_node (fib_node_t * node)
+{
+#if (CLIB_DEBUG > 0)
+  ASSERT (FIB_NODE_TYPE_MAP_E == node->fn_type);
+#endif
+  return ((map_main_pre_resolved_t *)
+         (((char *) node) -
+          STRUCT_OFFSET_OF (map_main_pre_resolved_t, node)));
+}
+
+static void
+map_stack (map_main_pre_resolved_t * pr)
+{
+  const dpo_id_t *dpo;
 
-  if (ip6->as_u64[0] != 0 || ip6->as_u64[1] != 0)
+  dpo = fib_entry_contribute_ip_forwarding (pr->fei);
+
+  dpo_copy (&pr->dpo, dpo);
+}
+
+/**
+ * Function definition to backwalk a FIB node
+ */
+static fib_node_back_walk_rc_t
+map_back_walk (fib_node_t * node, fib_node_back_walk_ctx_t * ctx)
+{
+  map_stack (map_from_fib_node (node));
+
+  return (FIB_NODE_BACK_WALK_CONTINUE);
+}
+
+/**
+ * Function definition to get a FIB node from its index
+ */
+static fib_node_t *
+map_fib_node_get (fib_node_index_t index)
+{
+  return (&pre_resolved[index].node);
+}
+
+/*
+ * Virtual function table registered by MPLS GRE tunnels
+ * for participation in the FIB object graph.
+ */
+const static fib_node_vft_t map_vft = {
+  .fnv_get = map_fib_node_get,
+  .fnv_last_lock = map_last_lock_gone,
+  .fnv_back_walk = map_back_walk,
+};
+
+static void
+map_fib_resolve (map_main_pre_resolved_t * pr,
+                fib_protocol_t proto, u8 len, const ip46_address_t * addr)
+{
+  fib_prefix_t pfx = {
+    .fp_proto = proto,
+    .fp_len = len,
+    .fp_addr = *addr,
+  };
+
+  pr->fei = fib_table_entry_special_add (0,    // default fib
+                                        &pfx,
+                                        FIB_SOURCE_RR,
+                                        FIB_ENTRY_FLAG_NONE,
+                                        ADJ_INDEX_INVALID);
+  pr->sibling = fib_entry_child_add (pr->fei, FIB_NODE_TYPE_MAP_E, proto);
+  map_stack (pr);
+}
+
+static void
+map_pre_resolve (ip4_address_t * ip4, ip6_address_t * ip6)
+{
+  if (ip6 && (ip6->as_u64[0] != 0 || ip6->as_u64[1] != 0))
     {
-      // FIXME NOT an ADJ
-      mm->adj6_index = ip6_fib_table_fwding_lookup (im6, 0, ip6);
-      clib_warning ("FIB lookup results in: %u", mm->adj6_index);
+      ip46_address_t addr = {
+       .ip6 = *ip6,
+      };
+      map_fib_resolve (&pre_resolved[FIB_PROTOCOL_IP6],
+                      FIB_PROTOCOL_IP6, 128, &addr);
     }
-  if (ip4->as_u32 != 0)
+  if (ip4 && (ip4->as_u32 != 0))
     {
-      // FIXME NOT an ADJ
-      mm->adj4_index = ip4_fib_table_lookup_lb (0, ip4);
-      clib_warning ("FIB lookup results in: %u", mm->adj4_index);
+      ip46_address_t addr = {
+       .ip4 = *ip4,
+      };
+      map_fib_resolve (&pre_resolved[FIB_PROTOCOL_IP4],
+                      FIB_PROTOCOL_IP4, 32, &addr);
     }
 }
 #endif
@@ -695,9 +814,8 @@ map_pre_resolve_command_fn (vlib_main_t * vm,
                            vlib_cli_command_t * cmd)
 {
   unformat_input_t _line_input, *line_input = &_line_input;
-  ip4_address_t ip4nh;
-  ip6_address_t ip6nh;
-  map_main_t *mm = &map_main;
+  ip4_address_t ip4nh, *p_v4 = NULL;
+  ip6_address_t ip6nh, *p_v6 = NULL;
   clib_error_t *error = NULL;
 
   memset (&ip4nh, 0, sizeof (ip4nh));
@@ -710,10 +828,10 @@ map_pre_resolve_command_fn (vlib_main_t * vm,
   while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
     {
       if (unformat (line_input, "ip4-nh %U", unformat_ip4_address, &ip4nh))
-       mm->preresolve_ip4 = ip4nh;
+       p_v4 = &ip4nh;
       else
        if (unformat (line_input, "ip6-nh %U", unformat_ip6_address, &ip6nh))
-       mm->preresolve_ip6 = ip6nh;
+       p_v6 = &ip6nh;
       else
        {
          error = clib_error_return (0, "unknown input `%U'",
@@ -722,7 +840,7 @@ map_pre_resolve_command_fn (vlib_main_t * vm,
        }
     }
 
-  map_pre_resolve (&ip4nh, &ip6nh);
+  map_pre_resolve (p_v4, p_v6);
 
 done:
   unformat_free (line_input);
@@ -1113,9 +1231,10 @@ show_map_stats_command_fn (vlib_main_t * vm, unformat_input_t * input,
 
 #if MAP_SKIP_IP6_LOOKUP
   vlib_cli_output (vm,
-                  "MAP pre-resolve: IP6 next-hop: %U (%u), IP4 next-hop: %U (%u)\n",
-                  format_ip6_address, &mm->preresolve_ip6, mm->adj6_index,
-                  format_ip4_address, &mm->preresolve_ip4, mm->adj4_index);
+                  "MAP pre-resolve: IP6 next-hop: %U, IP4 next-hop: %U\n",
+                  format_map_pre_resolve, &pre_resolved[FIB_PROTOCOL_IP6],
+                  format_map_pre_resolve, &pre_resolved[FIB_PROTOCOL_IP4]);
+
 #endif
 
   if (mm->tc_copy)
@@ -2180,10 +2299,12 @@ map_init (vlib_main_t * vm)
   mm->vlib_main = vm;
 
 #ifdef MAP_SKIP_IP6_LOOKUP
-  memset (&mm->preresolve_ip4, 0, sizeof (mm->preresolve_ip4));
-  memset (&mm->preresolve_ip6, 0, sizeof (mm->preresolve_ip6));
-  mm->adj4_index = 0;
-  mm->adj6_index = 0;
+  fib_protocol_t proto;
+
+  FOR_EACH_FIB_PROTOCOL (proto)
+  {
+    map_pre_resolve_init (&pre_resolved[proto]);
+  }
 #endif
 
   /* traffic class */
@@ -2238,6 +2359,9 @@ map_init (vlib_main_t * vm)
   mm->ip6_reass_fifo_last = MAP_REASS_INDEX_NONE;
   map_ip6_reass_reinit (NULL, NULL);
 
+#ifdef MAP_SKIP_IP6_LOOKUP
+  fib_node_register_type (FIB_NODE_TYPE_MAP_E, &map_vft);
+#endif
   map_dpo_module_init ();
 
   return 0;
index f446b73..616d42c 100644 (file)
@@ -198,6 +198,40 @@ typedef struct {
   map_ip6_fragment_t fragments[MAP_IP6_REASS_MAX_FRAGMENTS_PER_REASSEMBLY];
 } map_ip6_reass_t;
 
+#ifdef MAP_SKIP_IP6_LOOKUP
+/**
+ * A pre-resolved next-hop
+ */
+typedef struct map_main_pre_resolved_t_
+{
+  /**
+   * Linkage into the FIB graph
+   */
+  fib_node_t node;
+
+  /**
+   * The FIB entry index of the next-hop
+   */
+  fib_node_index_t fei;
+
+  /**
+   * This object sibling index on the FIB entry's child dependency list
+   */
+  u32 sibling;
+
+  /**
+   * The Load-balance object index to use to forward
+   */
+  dpo_id_t dpo;
+} map_main_pre_resolved_t;
+
+/**
+ * Pre-resolved next hops for v4 and v6. Why these are global and not
+ * per-domain is beyond me.
+ */
+extern map_main_pre_resolved_t pre_resolved[FIB_PROTOCOL_MAX];
+#endif
+
 typedef struct {
   /* pool of MAP domains */
   map_domain_t *domains;
@@ -207,13 +241,6 @@ typedef struct {
   vlib_combined_counter_main_t *domain_counters;
   volatile u32 *counter_lock;
 
-#ifdef MAP_SKIP_IP6_LOOKUP
-  /* pre-presolve */
-  u32 adj6_index, adj4_index;
-  ip4_address_t preresolve_ip4;
-  ip6_address_t preresolve_ip6;
-#endif
-
   /* Traffic class: zero, copy (~0) or fixed value */
   u8 tc;
   bool tc_copy;
diff --git a/test/test_map.py b/test/test_map.py
new file mode 100644 (file)
index 0000000..bc6cd81
--- /dev/null
@@ -0,0 +1,171 @@
+#!/usr/bin/env python
+
+import unittest
+import socket
+
+from framework import VppTestCase, VppTestRunner
+from vpp_ip_route import VppIpRoute, VppRoutePath
+
+from scapy.layers.l2 import Ether, Raw
+from scapy.layers.inet import IP, UDP, ICMP
+from scapy.layers.inet6 import IPv6
+
+
+class TestMAP(VppTestCase):
+    """ MAP Test Case """
+
+    def setUp(self):
+        super(TestMAP, self).setUp()
+
+        # create 2 pg interfaces
+        self.create_pg_interfaces(range(4))
+
+        # pg0 is 'inside' IPv4
+        self.pg0.admin_up()
+        self.pg0.config_ip4()
+        self.pg0.resolve_arp()
+
+        # pg1 is 'outside' IPv6
+        self.pg1.admin_up()
+        self.pg1.config_ip6()
+        self.pg1.generate_remote_hosts(4)
+        self.pg1.configure_ipv6_neighbors()
+
+    def tearDown(self):
+        super(TestMAP, self).tearDown()
+        for i in self.pg_interfaces:
+            i.unconfig_ip4()
+            i.unconfig_ip6()
+            i.admin_down()
+
+    def send_and_assert_no_replies(self, intf, pkts, remark):
+        intf.add_stream(pkts)
+        self.pg_enable_capture(self.pg_interfaces)
+        self.pg_start()
+        for i in self.pg_interfaces:
+            i.assert_nothing_captured(remark=remark)
+
+    def send_and_assert_encapped(self, tx, ip6_src, ip6_dst, dmac=None):
+        if not dmac:
+            dmac = self.pg1.remote_mac
+
+        self.pg0.add_stream(tx)
+
+        self.pg_enable_capture(self.pg_interfaces)
+        self.pg_start()
+
+        rx = self.pg1.get_capture(1)
+        rx = rx[0]
+
+        self.assertEqual(rx[Ether].dst, dmac)
+        self.assertEqual(rx[IP].src, tx[IP].src)
+        self.assertEqual(rx[IPv6].src, ip6_src)
+        self.assertEqual(rx[IPv6].dst, ip6_dst)
+
+    def test_map_e(self):
+        """ MAP-E """
+
+        #
+        # Add a route to the MAP-BR
+        #
+        map_br_pfx = "2001::"
+        map_br_pfx_len = 64
+        map_route = VppIpRoute(self,
+                               map_br_pfx,
+                               map_br_pfx_len,
+                               [VppRoutePath(self.pg1.remote_ip6,
+                                             self.pg1.sw_if_index,
+                                             is_ip6=1)],
+                               is_ip6=1)
+        map_route.add_vpp_config()
+
+        #
+        # Add a domain that maps from pg0 to pg1
+        #
+        map_dst = socket.inet_pton(socket.AF_INET6, map_br_pfx)
+        map_src = "3001::1"
+        map_src_n = socket.inet_pton(socket.AF_INET6, map_src)
+        client_pfx = socket.inet_pton(socket.AF_INET, "192.168.0.0")
+
+        self.vapi.map_add_domain(map_dst,
+                                 map_br_pfx_len,
+                                 map_src_n,
+                                 128,
+                                 client_pfx,
+                                 16)
+
+        #
+        # Fire in a v4 packet that will be encapped to the BR
+        #
+        v4 = (Ether(dst=self.pg0.local_mac, src=self.pg0.remote_mac) /
+              IP(src=self.pg0.remote_ip4, dst='192.168.1.1') /
+              UDP(sport=20000, dport=10000) /
+              Raw('\xa5' * 100))
+
+        self.send_and_assert_encapped(v4, map_src, "2001::c0a8:0:0")
+
+        #
+        # Fire in a V6 encapped packet.
+        #  expect a decapped packet on the inside ip4 link
+        #
+        p = (Ether(dst=self.pg1.local_mac, src=self.pg1.remote_mac) /
+             IPv6(dst=map_src, src="2001::1") /
+             IP(dst=self.pg0.remote_ip4, src='192.168.1.1') /
+             UDP(sport=20000, dport=10000) /
+             Raw('\xa5' * 100))
+
+        self.pg1.add_stream(p)
+
+        self.pg_enable_capture(self.pg_interfaces)
+        self.pg_start()
+
+        rx = self.pg0.get_capture(1)
+        rx = rx[0]
+
+        self.assertFalse(rx.haslayer(IPv6))
+        self.assertEqual(rx[IP].src, p[IP].src)
+        self.assertEqual(rx[IP].dst, p[IP].dst)
+
+        #
+        # Pre-resolve. No API for this!!
+        #
+        self.vapi.ppcli("map params pre-resolve ip6-nh 4001::1")
+
+        self.send_and_assert_no_replies(self.pg0, v4,
+                                        "resovled via default route")
+
+        #
+        # Add a route to 4001::1. Expect the encapped traffic to be
+        # sent via that routes next-hop
+        #
+        pre_res_route = VppIpRoute(self,
+                                   "4001::1",
+                                   128,
+                                   [VppRoutePath(self.pg1.remote_hosts[2].ip6,
+                                                 self.pg1.sw_if_index,
+                                                 is_ip6=1)],
+                                   is_ip6=1)
+        pre_res_route.add_vpp_config()
+
+        self.send_and_assert_encapped(v4, map_src,
+                                      "2001::c0a8:0:0",
+                                      dmac=self.pg1.remote_hosts[2].mac)
+
+        #
+        # change the route to the pre-solved next-hop
+        #
+        pre_res_route1 = VppIpRoute(self,
+                                    "4001::1",
+                                    128,
+                                    [VppRoutePath(self.pg1.remote_hosts[3].ip6,
+                                                  self.pg1.sw_if_index,
+                                                  is_ip6=1)],
+                                    is_ip6=1)
+        pre_res_route1.add_vpp_config()
+
+        self.send_and_assert_encapped(v4, map_src,
+                                      "2001::c0a8:0:0",
+                                      dmac=self.pg1.remote_hosts[3].mac)
+
+if __name__ == '__main__':
+    unittest.main(testRunner=VppTestRunner)
index 0062b72..9207042 100644 (file)
@@ -1624,3 +1624,31 @@ class VppPapiProvider(object):
             {
                 'vni': vni
             })
+
+    def map_add_domain(self,
+                       ip6_prefix,
+                       ip6_prefix_len,
+                       ip6_src,
+                       ip6_src_prefix_len,
+                       ip4_prefix,
+                       ip4_prefix_len,
+                       ea_bits_len=0,
+                       psid_offset=0,
+                       psid_length=0,
+                       is_translation=0,
+                       mtu=1280):
+        return self.api(
+            self.papi.map_add_domain,
+            {
+                'ip6_prefix': ip6_prefix,
+                'ip6_prefix_len': ip6_prefix_len,
+                'ip4_prefix': ip4_prefix,
+                'ip4_prefix_len': ip4_prefix_len,
+                'ip6_src': ip6_src,
+                'ip6_src_prefix_len': ip6_src_prefix_len,
+                'ea_bits_len': ea_bits_len,
+                'psid_offset': psid_offset,
+                'psid_length': psid_length,
+                'is_translation': is_translation,
+                'mtu': mtu
+            })