MTRIE Optimisations 2 89/5889/6
authorNeale Ranns <nranns@cisco.com>
Tue, 28 Mar 2017 10:49:52 +0000 (03:49 -0700)
committerDamjan Marion <dmarion.lists@gmail.com>
Sat, 1 Apr 2017 16:48:31 +0000 (16:48 +0000)
1) 16-8-8 stride. Reduce trie depth walk traded with increased memory in the top PLY.
2) separate the vector of protocol-independent (PI) fib_table_t with the vector of protocol dependent (PD) FIBs. PD FIBs are large structures, we don't want to burn the memory for ech PD type
3) Go straight to the PD FIB in the data-path thus avoiding an indirection through, e.g., a PLY pool.

Change-Id: I800d1ed0b2049040d5da95213f3ed6b12bdd78b7
Signed-off-by: Neale Ranns <nranns@cisco.com>
28 files changed:
src/vnet/cop/ip4_whitelist.c
src/vnet/dpo/load_balance.c
src/vnet/dpo/lookup_dpo.c
src/vnet/fib/fib.c
src/vnet/fib/fib_entry.c
src/vnet/fib/fib_path.c
src/vnet/fib/fib_table.c
src/vnet/fib/fib_table.h
src/vnet/fib/fib_test.c
src/vnet/fib/ip4_fib.c
src/vnet/fib/ip4_fib.h
src/vnet/fib/ip6_fib.c
src/vnet/fib/ip6_fib.h
src/vnet/fib/mpls_fib.c
src/vnet/fib/mpls_fib.h
src/vnet/ip/ip4.h
src/vnet/ip/ip4_forward.c
src/vnet/ip/ip4_mtrie.c
src/vnet/ip/ip4_mtrie.h
src/vnet/ip/ip4_packet.h
src/vnet/ip/ip4_source_check.c
src/vnet/ip/ip6.h
src/vnet/ip/ip_api.c
src/vnet/mpls/interface.c
src/vnet/mpls/mpls.h
src/vnet/mpls/mpls_api.c
src/vpp/api/api.c
src/vpp/stats/stats.c

index ccb9dc0..6ef3d7d 100644 (file)
@@ -127,9 +127,6 @@ ip4_cop_whitelist_node_fn (vlib_main_t * vm,
 
           leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address);
 
-         leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0,
-                                             &ip0->src_address, 1);
-
          leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0,
                                              &ip0->src_address, 2);
 
@@ -166,9 +163,6 @@ ip4_cop_whitelist_node_fn (vlib_main_t * vm,
 
           leaf1 = ip4_fib_mtrie_lookup_step_one (mtrie1, &ip1->src_address);
 
-         leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1,
-                                             &ip1->src_address, 1);
-
          leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1,
                                              &ip1->src_address, 2);
 
@@ -263,9 +257,6 @@ ip4_cop_whitelist_node_fn (vlib_main_t * vm,
 
           leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address);
 
-         leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, 
-                                             &ip0->src_address, 1);
-
          leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, 
                                              &ip0->src_address, 2);
 
index d5e98e4..6b0eda0 100644 (file)
@@ -827,14 +827,18 @@ const static char* const * const load_balance_nodes[DPO_PROTO_NUM] =
 void
 load_balance_module_init (void)
 {
+    index_t lbi;
+
     dpo_register(DPO_LOAD_BALANCE, &lb_vft, load_balance_nodes);
 
     /*
      * Special LB with index zero. we need to define this since the v4 mtrie
      * assumes an index of 0 implies the ply is empty. therefore all 'real'
      * adjs need a non-zero index.
+     * This should never be used, but just in case, stack it on a drop.
      */
-    load_balance_create(0, DPO_PROTO_IP4, 0);
+    lbi = load_balance_create(1, DPO_PROTO_IP4, 0);
+    load_balance_set_bucket(lbi, 0, drop_dpo_get(DPO_PROTO_IP4));
 
     load_balance_map_module_init();
 }
index 3726c8f..e94e871 100644 (file)
@@ -211,7 +211,6 @@ ip4_src_fib_lookup_one (u32 src_fib_index0,
     mtrie0 = &ip4_fib_get (src_fib_index0)->mtrie;
 
     leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, addr0);
-    leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, addr0, 1);
     leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, addr0, 2);
     leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, addr0, 3);
 
@@ -235,9 +234,6 @@ ip4_src_fib_lookup_two (u32 src_fib_index0,
     leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, addr0);
     leaf1 = ip4_fib_mtrie_lookup_step_one (mtrie1, addr1);
 
-    leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, addr0, 1);
-    leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, addr1, 1);
-
     leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, addr0, 2);
     leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, addr1, 2);
 
index 413f93e..b430e11 100644 (file)
@@ -28,6 +28,8 @@ fib_module_init (vlib_main_t * vm)
        return (error);
     if ((error = vlib_call_init_function (vm, adj_module_init)))
        return (error);
+    if ((error = vlib_call_init_function (vm, ip4_mtrie_module_init)))
+       return (error);
 
     fib_entry_module_init();
     fib_entry_src_module_init();
index 25005e1..6ac5461 100644 (file)
@@ -924,10 +924,10 @@ fib_entry_path_remove (fib_node_index_t fib_entry_index,
                /*
                 * no more sources left. this entry is toast.
                 */
-               fib_entry_src_action_uninstall(fib_entry);
                fib_entry = fib_entry_post_flag_update_actions(fib_entry,
                                                                source,
                                                                bflags);
+               fib_entry_src_action_uninstall(fib_entry);
 
                return (FIB_ENTRY_SRC_FLAG_NONE);
            }
@@ -1014,10 +1014,10 @@ fib_entry_special_remove (fib_node_index_t fib_entry_index,
                /*
                 * no more sources left. this entry is toast.
                 */
-               fib_entry_src_action_uninstall(fib_entry);
                fib_entry = fib_entry_post_flag_update_actions(fib_entry,
                                                                source,
                                                                bflags);
+               fib_entry_src_action_uninstall(fib_entry);
 
                return (FIB_ENTRY_SRC_FLAG_NONE);
            }
index 3ed309f..928a9d4 100644 (file)
@@ -32,6 +32,7 @@
 #include <vnet/fib/fib_path_list.h>
 #include <vnet/fib/fib_internal.h>
 #include <vnet/fib/fib_urpf_list.h>
+#include <vnet/fib/mpls_fib.h>
 
 /**
  * Enurmeration of path types
index 7818d02..6c3162e 100644 (file)
@@ -47,7 +47,7 @@ fib_table_lookup_i (fib_table_t *fib_table,
     switch (prefix->fp_proto)
     {
     case FIB_PROTOCOL_IP4:
-       return (ip4_fib_table_lookup(&fib_table->v4,
+       return (ip4_fib_table_lookup(ip4_fib_get(fib_table->ft_index),
                                     &prefix->fp_addr.ip4,
                                     prefix->fp_len));
     case FIB_PROTOCOL_IP6:
@@ -55,7 +55,7 @@ fib_table_lookup_i (fib_table_t *fib_table,
                                     &prefix->fp_addr.ip6,
                                     prefix->fp_len));
     case FIB_PROTOCOL_MPLS:
-       return (mpls_fib_table_lookup(&fib_table->mpls,
+       return (mpls_fib_table_lookup(mpls_fib_get(fib_table->ft_index),
                                      prefix->fp_label,
                                      prefix->fp_eos));
     }
@@ -76,7 +76,7 @@ fib_table_lookup_exact_match_i (const fib_table_t *fib_table,
     switch (prefix->fp_proto)
     {
     case FIB_PROTOCOL_IP4:
-       return (ip4_fib_table_lookup_exact_match(&fib_table->v4,
+       return (ip4_fib_table_lookup_exact_match(ip4_fib_get(fib_table->ft_index),
                                                 &prefix->fp_addr.ip4,
                                                 prefix->fp_len));
     case FIB_PROTOCOL_IP6:
@@ -84,7 +84,7 @@ fib_table_lookup_exact_match_i (const fib_table_t *fib_table,
                                                 &prefix->fp_addr.ip6,
                                                 prefix->fp_len));
     case FIB_PROTOCOL_MPLS:
-       return (mpls_fib_table_lookup(&fib_table->mpls,
+       return (mpls_fib_table_lookup(mpls_fib_get(fib_table->ft_index),
                                      prefix->fp_label,
                                      prefix->fp_eos));
     }
@@ -148,7 +148,7 @@ fib_table_entry_remove (fib_table_t *fib_table,
     switch (prefix->fp_proto)
     {
     case FIB_PROTOCOL_IP4:
-       ip4_fib_table_entry_remove(&fib_table->v4,
+       ip4_fib_table_entry_remove(ip4_fib_get(fib_table->ft_index),
                                   &prefix->fp_addr.ip4,
                                   prefix->fp_len);
        break;
@@ -158,7 +158,7 @@ fib_table_entry_remove (fib_table_t *fib_table,
                                   prefix->fp_len);
        break;
     case FIB_PROTOCOL_MPLS:
-       mpls_fib_table_entry_remove(&fib_table->mpls,
+       mpls_fib_table_entry_remove(mpls_fib_get(fib_table->ft_index),
                                    prefix->fp_label,
                                    prefix->fp_eos);
        break;
@@ -208,7 +208,7 @@ fib_table_entry_insert (fib_table_t *fib_table,
     switch (prefix->fp_proto)
     {
     case FIB_PROTOCOL_IP4:
-       ip4_fib_table_entry_insert(&fib_table->v4,
+       ip4_fib_table_entry_insert(ip4_fib_get(fib_table->ft_index),
                                   &prefix->fp_addr.ip4,
                                   prefix->fp_len,
                                   fib_entry_index);
@@ -220,7 +220,7 @@ fib_table_entry_insert (fib_table_t *fib_table,
                                   fib_entry_index);
        break;
     case FIB_PROTOCOL_MPLS:
-       mpls_fib_table_entry_insert(&fib_table->mpls,
+       mpls_fib_table_entry_insert(mpls_fib_get(fib_table->ft_index),
                                    prefix->fp_label,
                                    prefix->fp_eos,
                                    fib_entry_index);
@@ -270,7 +270,9 @@ fib_table_fwding_dpo_remove (u32 fib_index,
        return (ip4_fib_table_fwding_dpo_remove(ip4_fib_get(fib_index),
                                                &prefix->fp_addr.ip4,
                                                prefix->fp_len,
-                                               dpo));
+                                               dpo,
+                                                fib_table_get_less_specific(fib_index,
+                                                                            prefix)));
     case FIB_PROTOCOL_IP6:
        return (ip6_fib_table_fwding_dpo_remove(fib_index,
                                                &prefix->fp_addr.ip6,
@@ -1034,13 +1036,13 @@ fib_table_destroy (fib_table_t *fib_table)
     switch (fib_table->ft_proto)
     {
     case FIB_PROTOCOL_IP4:
-       ip4_fib_table_destroy(&fib_table->v4);
+       ip4_fib_table_destroy(fib_table->ft_index);
        break;
     case FIB_PROTOCOL_IP6:
        ip6_fib_table_destroy(fib_table->ft_index);
        break;
     case FIB_PROTOCOL_MPLS:
-       mpls_fib_table_destroy(&fib_table->mpls);
+       mpls_fib_table_destroy(fib_table->ft_index);
        break;
     }
 }
index e7e66ac..b310aea 100644 (file)
  */
 typedef struct fib_table_t_
 {
-    /**
-     * A union of the protocol specific FIBs that provide the
-     * underlying LPM mechanism.
-     * This element is first in the struct so that it is in the
-     * first cache line.
-     */
-    union {
-       ip4_fib_t v4;
-       ip6_fib_t v6;
-       mpls_fib_t mpls;
-    };
-
     /**
      * Which protocol this table serves. Used to switch on the union above.
      */
index 1a9cce2..92141dd 100644 (file)
@@ -40,8 +40,6 @@
        fformat(stderr, "FAIL:%d: " _comment "\n",              \
                __LINE__, ##_args);                             \
     } else {                                                   \
-       fformat(stderr, "PASS:%d: " _comment "\n",              \
-               __LINE__, ##_args);                             \
     }                                                          \
     _evald;                                                    \
 })
@@ -5727,7 +5725,7 @@ fib_test_label (void)
                                     &a_o_10_10_11_1,
                                     &adj_o_10_10_11_2),
             "1.1.1.1/32 LB 2 buckets via: "
-            "adj over 10.10.11.1",
+            "adj over 10.10.11.1, "
             "adj-v4 over 10.10.11.2");
 
     fei = fib_table_lookup(MPLS_FIB_DEFAULT_TABLE_ID,
@@ -5738,7 +5736,7 @@ fib_test_label (void)
                                     &a_o_10_10_11_1,
                                     &adj_o_10_10_11_2),
             "24001/eos LB 2 buckets via: "
-            "adj over 10.10.11.1",
+            "adj over 10.10.11.1, "
             "adj-v4 over 10.10.11.2");
 
     fei = fib_table_lookup(MPLS_FIB_DEFAULT_TABLE_ID,
index a791562..98d4e52 100644 (file)
@@ -104,29 +104,35 @@ static u32
 ip4_create_fib_with_table_id (u32 table_id)
 {
     fib_table_t *fib_table;
+    ip4_fib_t *v4_fib;
 
     pool_get_aligned(ip4_main.fibs, fib_table, CLIB_CACHE_LINE_BYTES);
     memset(fib_table, 0, sizeof(*fib_table));
 
+    pool_get_aligned(ip4_main.v4_fibs, v4_fib, CLIB_CACHE_LINE_BYTES);
+
+    ASSERT((fib_table - ip4_main.fibs) ==
+           (v4_fib - ip4_main.v4_fibs));
+
     fib_table->ft_proto = FIB_PROTOCOL_IP4;
     fib_table->ft_index =
-       fib_table->v4.index =
+       v4_fib->index =
            (fib_table - ip4_main.fibs);
 
     hash_set (ip4_main.fib_index_by_table_id, table_id, fib_table->ft_index);
 
     fib_table->ft_table_id =
-       fib_table->v4.table_id =
+       v4_fib->table_id =
            table_id;
     fib_table->ft_flow_hash_config = 
-       fib_table->v4.flow_hash_config =
+       v4_fib->flow_hash_config =
            IP_FLOW_HASH_DEFAULT;
-    fib_table->v4.fwd_classify_table_index = ~0;
-    fib_table->v4.rev_classify_table_index = ~0;
+    v4_fib->fwd_classify_table_index = ~0;
+    v4_fib->rev_classify_table_index = ~0;
     
     fib_table_lock(fib_table->ft_index, FIB_PROTOCOL_IP4);
 
-    ip4_mtrie_init(&fib_table->v4.mtrie);
+    ip4_mtrie_init(&v4_fib->mtrie);
 
     /*
      * add the special entries into the new FIB
@@ -151,9 +157,10 @@ ip4_create_fib_with_table_id (u32 table_id)
 }
 
 void
-ip4_fib_table_destroy (ip4_fib_t *fib)
+ip4_fib_table_destroy (u32 fib_index)
 {
-    fib_table_t *fib_table = (fib_table_t*)fib;
+    fib_table_t *fib_table = pool_elt_at_index(ip4_main.fibs, fib_index);
+    ip4_fib_t *v4_fib = pool_elt_at_index(ip4_main.v4_fibs, fib_index);
     int ii;
 
     /*
@@ -185,6 +192,10 @@ ip4_fib_table_destroy (ip4_fib_t *fib)
     {
        hash_unset (ip4_main.fib_index_by_table_id, fib_table->ft_table_id);
     }
+
+    ip4_mtrie_free(&v4_fib->mtrie);
+
+    pool_put(ip4_main.v4_fibs, v4_fib);
     pool_put(ip4_main.fibs, fib_table);
 }
 
@@ -367,16 +378,33 @@ ip4_fib_table_fwding_dpo_update (ip4_fib_t *fib,
                                 u32 len,
                                 const dpo_id_t *dpo)
 {
-    ip4_fib_mtrie_add_del_route(fib, *addr, len, dpo->dpoi_index, 0); // ADD
+    ip4_fib_mtrie_route_add(&fib->mtrie, addr, len, dpo->dpoi_index);
 }
 
 void
 ip4_fib_table_fwding_dpo_remove (ip4_fib_t *fib,
                                 const ip4_address_t *addr,
                                 u32 len,
-                                const dpo_id_t *dpo)
+                                const dpo_id_t *dpo,
+                                 u32 cover_index)
 {
-    ip4_fib_mtrie_add_del_route(fib, *addr, len, dpo->dpoi_index, 1); // DELETE
+    fib_prefix_t cover_prefix = {
+        .fp_len = 0,
+    };
+    const dpo_id_t *cover_dpo;
+
+    /*
+     * We need to pass the MTRIE the LB index and address length of the
+     * covering prefix, so it can fill the plys with the correct replacement
+     * for the entry being removed
+     */
+    fib_entry_get_prefix(cover_index, &cover_prefix);
+    cover_dpo = fib_entry_contribute_ip_forwarding(cover_index);
+
+    ip4_fib_mtrie_route_del(&fib->mtrie,
+                            addr, len, dpo->dpoi_index,
+                            cover_prefix.fp_len,
+                            cover_dpo->dpoi_index);
 }
 
 void
@@ -498,7 +526,7 @@ ip4_show_fib (vlib_main_t * vm,
 
     pool_foreach (fib_table, im4->fibs,
     ({
-       ip4_fib_t *fib = &fib_table->v4;
+       ip4_fib_t *fib = pool_elt_at_index(im4->v4_fibs, fib_table->ft_index);
 
        if (table_id >= 0 && table_id != (int)fib->table_id)
            continue;
@@ -523,6 +551,11 @@ ip4_show_fib (vlib_main_t * vm,
            }
            continue;
        }
+       if (mtrie)
+        {
+           vlib_cli_output (vm, "%U", format_ip4_fib_mtrie, &fib->mtrie);
+            continue;
+        }
 
        if (!matching)
        {
@@ -532,9 +565,6 @@ ip4_show_fib (vlib_main_t * vm,
        {
            ip4_fib_table_show_one(fib, vm, &matching_address, matching_mask);
        }
-
-       if (mtrie)
-           vlib_cli_output (vm, "%U", format_ip4_fib_mtrie, &fib->mtrie);
     }));
 
     return 0;
index 243fd77..4cf9e58 100644 (file)
 #include <vnet/ip/ip.h>
 #include <vnet/fib/fib_entry.h>
 #include <vnet/fib/fib_table.h>
+#include <vnet/ip/ip4_mtrie.h>
+
+typedef struct ip4_fib_t_
+{
+  /**
+   * Mtrie for fast lookups. Hash is used to maintain overlapping prefixes.
+   * First member so it's in the first cacheline.
+   */
+  ip4_fib_mtrie_t mtrie;
+
+  /* Hash table for each prefix length mapping. */
+  uword *fib_entry_by_dst_address[33];
+
+  /* Table ID (hash key) for this FIB. */
+  u32 table_id;
+
+  /* Index into FIB vector. */
+  u32 index;
+
+  /* flow hash configuration */
+  flow_hash_config_t flow_hash_config;
+
+  /* N-tuple classifier indices */
+  u32 fwd_classify_table_index;
+  u32 rev_classify_table_index;
+
+} ip4_fib_t;
 
 extern fib_node_index_t ip4_fib_table_lookup(const ip4_fib_t *fib,
                                             const ip4_address_t *addr,
@@ -50,7 +77,7 @@ extern void ip4_fib_table_entry_insert(ip4_fib_t *fib,
                                       const ip4_address_t *addr,
                                       u32 len,
                                       fib_node_index_t fib_entry_index);
-extern void ip4_fib_table_destroy(ip4_fib_t *fib);
+extern void ip4_fib_table_destroy(u32 fib_index);
 
 extern void ip4_fib_table_fwding_dpo_update(ip4_fib_t *fib,
                                            const ip4_address_t *addr,
@@ -60,7 +87,8 @@ extern void ip4_fib_table_fwding_dpo_update(ip4_fib_t *fib,
 extern void ip4_fib_table_fwding_dpo_remove(ip4_fib_t *fib,
                                            const ip4_address_t *addr,
                                            u32 len,
-                                           const dpo_id_t *dpo);
+                                           const dpo_id_t *dpo,
+                                            fib_node_index_t cover_index);
 extern u32 ip4_fib_table_lookup_lb (ip4_fib_t *fib,
                                    const ip4_address_t * dst);
 
@@ -79,7 +107,7 @@ extern void ip4_fib_table_walk(ip4_fib_t *fib,
 static inline ip4_fib_t *
 ip4_fib_get (u32 index)
 {
-    return (&(pool_elt_at_index(ip4_main.fibs, index)->v4));
+    return (pool_elt_at_index(ip4_main.v4_fibs, index));
 }
 
 always_inline u32
@@ -134,7 +162,6 @@ ip4_fib_forwarding_lookup (u32 fib_index,
     mtrie = &ip4_fib_get(fib_index)->mtrie;
 
     leaf = ip4_fib_mtrie_lookup_step_one (mtrie, addr);
-    leaf = ip4_fib_mtrie_lookup_step (mtrie, leaf, addr, 1);
     leaf = ip4_fib_mtrie_lookup_step (mtrie, leaf, addr, 2);
     leaf = ip4_fib_mtrie_lookup_step (mtrie, leaf, addr, 3);
 
index 343ff55..0ee029d 100644 (file)
@@ -55,22 +55,29 @@ static u32
 create_fib_with_table_id (u32 table_id)
 {
     fib_table_t *fib_table;
+    ip6_fib_t *v6_fib;
 
     pool_get_aligned(ip6_main.fibs, fib_table, CLIB_CACHE_LINE_BYTES);
+    pool_get_aligned(ip6_main.v6_fibs, v6_fib, CLIB_CACHE_LINE_BYTES);
+
     memset(fib_table, 0, sizeof(*fib_table));
+    memset(v6_fib, 0, sizeof(*v6_fib));
 
+    ASSERT((fib_table - ip6_main.fibs) ==
+           (v6_fib - ip6_main.v6_fibs));
+    
     fib_table->ft_proto = FIB_PROTOCOL_IP6;
     fib_table->ft_index =
-       fib_table->v6.index =
-           (fib_table - ip6_main.fibs);
+           v6_fib->index =
+                (fib_table - ip6_main.fibs);
 
     hash_set(ip6_main.fib_index_by_table_id, table_id, fib_table->ft_index);
 
     fib_table->ft_table_id =
-       fib_table->v6.table_id =
+       v6_fib->table_id =
            table_id;
     fib_table->ft_flow_hash_config = 
-       fib_table->v6.flow_hash_config =
+       v6_fib->flow_hash_config =
            IP_FLOW_HASH_DEFAULT;
 
     vnet_ip6_fib_init(fib_table->ft_index);
@@ -188,6 +195,7 @@ ip6_fib_table_destroy (u32 fib_index)
     {
        hash_unset (ip6_main.fib_index_by_table_id, fib_table->ft_table_id);
     }
+    pool_put_index(ip6_main.v6_fibs, fib_table->ft_index);
     pool_put(ip6_main.fibs, fib_table);
 }
 
@@ -620,7 +628,7 @@ ip6_show_fib (vlib_main_t * vm,
 
     pool_foreach (fib_table, im6->fibs,
     ({
-       fib = &(fib_table->v6);
+       fib = pool_elt_at_index(im6->v6_fibs, fib_table->ft_index);
        if (table_id >= 0 && table_id != (int)fib->table_id)
            continue;
        if (fib_index != ~0 && fib_index != (int)fib->index)
index af864a7..e2f2845 100644 (file)
@@ -115,7 +115,7 @@ static inline ip6_fib_t *
 ip6_fib_get (fib_node_index_t index)
 {
     ASSERT(!pool_is_free_index(ip6_main.fibs, index));
-    return (&pool_elt_at_index (ip6_main.fibs, index)->v6);
+    return (pool_elt_at_index (ip6_main.v6_fibs, index));
 }
 
 static inline 
index 5cd0fd2..4b2b76e 100644 (file)
@@ -97,11 +97,15 @@ mpls_fib_create_with_table_id (u32 table_id)
     int i;
 
     pool_get_aligned(mpls_main.fibs, fib_table, CLIB_CACHE_LINE_BYTES);
+    pool_get_aligned(mpls_main.mpls_fibs, mf, CLIB_CACHE_LINE_BYTES);
+
+    ASSERT((fib_table - mpls_main.fibs) ==
+           (mf - mpls_main.mpls_fibs));
+
     memset(fib_table, 0, sizeof(*fib_table));
 
     fib_table->ft_proto = FIB_PROTOCOL_MPLS;
-    fib_table->ft_index =
-       (fib_table - mpls_main.fibs);
+    fib_table->ft_index = (fib_table - mpls_main.fibs);
 
     hash_set (mpls_main.fib_index_by_table_id, table_id, fib_table->ft_index);
 
@@ -109,8 +113,6 @@ mpls_fib_create_with_table_id (u32 table_id)
        table_id;
     fib_table->ft_flow_hash_config = 
        MPLS_FLOW_HASH_DEFAULT;
-    fib_table->v4.fwd_classify_table_index = ~0;
-    fib_table->v4.rev_classify_table_index = ~0;
     
     fib_table_lock(fib_table->ft_index, FIB_PROTOCOL_MPLS);
 
@@ -122,7 +124,6 @@ mpls_fib_create_with_table_id (u32 table_id)
                                 drop_dpo_get(DPO_PROTO_MPLS));
     }
 
-    mf = &fib_table->mpls;
     mf->mf_entries = hash_create(0, sizeof(fib_node_index_t));
     for (i = 0; i < MPLS_FIB_DB_SIZE; i++)
     {
@@ -241,9 +242,10 @@ mpls_fib_table_create_and_lock (void)
 }
 
 void
-mpls_fib_table_destroy (mpls_fib_t *mf)
+mpls_fib_table_destroy (u32 fib_index)
 {
-    fib_table_t *fib_table = (fib_table_t*)mf;
+    fib_table_t *fib_table = pool_elt_at_index(mpls_main.fibs, fib_index);
+    mpls_fib_t *mf = pool_elt_at_index(mpls_main.mpls_fibs, fib_index);
     fib_prefix_t prefix = {
        .fp_proto = FIB_PROTOCOL_MPLS,
     };
@@ -274,6 +276,7 @@ mpls_fib_table_destroy (mpls_fib_t *mf)
     }
     hash_free(mf->mf_entries);
 
+    pool_put(mpls_main.mpls_fibs, mf);
     pool_put(mpls_main.fibs, fib_table);
 }
 
@@ -436,11 +439,11 @@ mpls_fib_show (vlib_main_t * vm,
 
        if (MPLS_LABEL_INVALID == label)
        {
-           mpls_fib_table_show_all(&(fib_table->mpls), vm);
+           mpls_fib_table_show_all(mpls_fib_get(fib_table->ft_index), vm);
        }
        else
        {
-           mpls_fib_table_show_one(&(fib_table->mpls), label, vm);
+           mpls_fib_table_show_one(mpls_fib_get(fib_table->ft_index), label, vm);
        }
     }));
 
index 779deca..78a61a1 100644 (file)
 #include <vnet/mpls/mpls.h>
 #include <vnet/fib/fib_table.h>
 
+#define MPLS_FIB_DEFAULT_TABLE_ID 0
+
+/**
+ * Type exposure is to allow the DP fast/inlined access
+ */
+#define MPLS_FIB_KEY_SIZE 21
+#define MPLS_FIB_DB_SIZE (1 << (MPLS_FIB_KEY_SIZE-1))
+
+typedef struct mpls_fib_t_
+{
+  /**
+   * A hash table of entries. 21 bit key
+   * Hash table for reduced memory footprint
+   */
+  uword * mf_entries;
+
+  /**
+   * The load-balance indices keyed by 21 bit label+eos bit.
+   * A flat array for maximum lookup performace.
+   */
+  index_t mf_lbs[MPLS_FIB_DB_SIZE];
+} mpls_fib_t;
+
 static inline mpls_fib_t*
 mpls_fib_get (fib_node_index_t index)
 {
-    return (&(pool_elt_at_index(mpls_main.fibs, index)->mpls));
+    return (pool_elt_at_index(mpls_main.mpls_fibs, index));
 }
 
 extern u32 mpls_fib_table_find_or_create_and_lock(u32 table_id);
@@ -56,8 +79,7 @@ extern void mpls_fib_table_entry_insert(mpls_fib_t *mf,
                                        mpls_label_t label,
                                        mpls_eos_bit_t eos,
                                        fib_node_index_t fei);
-extern void mpls_fib_table_destroy(mpls_fib_t *mf);
-
+extern void mpls_fib_table_destroy(u32 fib_index);
 
 
 extern void mpls_fib_forwarding_table_update(mpls_fib_t *mf,
index 4e075d0..71640de 100644 (file)
 #ifndef included_ip_ip4_h
 #define included_ip_ip4_h
 
-#include <vnet/ip/ip4_mtrie.h>
 #include <vnet/ip/ip4_packet.h>
 #include <vnet/ip/lookup.h>
 #include <vnet/feature/feature.h>
 
-typedef struct ip4_fib_t
-{
-  /* Hash table for each prefix length mapping. */
-  uword *fib_entry_by_dst_address[33];
-
-  /* Mtrie for fast lookups.  Hash is used to maintain overlapping prefixes. */
-  ip4_fib_mtrie_t mtrie;
-
-  /* Table ID (hash key) for this FIB. */
-  u32 table_id;
-
-  /* Index into FIB vector. */
-  u32 index;
-
-  /* flow hash configuration */
-  flow_hash_config_t flow_hash_config;
-
-  /* N-tuple classifier indices */
-  u32 fwd_classify_table_index;
-  u32 rev_classify_table_index;
-
-} ip4_fib_t;
-
 typedef struct ip4_mfib_t
 {
   /* Hash table for each prefix length mapping. */
@@ -111,6 +87,9 @@ typedef struct ip4_main_t
   /** Vector of FIBs. */
   struct fib_table_t_ *fibs;
 
+  /** Vector of MTries. */
+  struct ip4_fib_t_ *v4_fibs;
+
   /** Vector of MFIBs. */
   struct mfib_table_t_ *mfibs;
 
@@ -284,8 +263,6 @@ serialize_function_t serialize_vnet_ip4_main, unserialize_vnet_ip4_main;
 int vnet_set_ip4_flow_hash (u32 table_id,
                            flow_hash_config_t flow_hash_config);
 
-void ip4_mtrie_init (ip4_fib_mtrie_t * m);
-
 int vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index,
                                 u32 table_index);
 
index ef6dded..ee1703e 100644 (file)
@@ -182,7 +182,6 @@ ip4_lookup_inline (vlib_main_t * vm,
              mtrie2 = &ip4_fib_get (fib_index2)->mtrie;
              mtrie3 = &ip4_fib_get (fib_index3)->mtrie;
 
-
              leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, dst_addr0);
              leaf1 = ip4_fib_mtrie_lookup_step_one (mtrie1, dst_addr1);
              leaf2 = ip4_fib_mtrie_lookup_step_one (mtrie2, dst_addr2);
@@ -194,14 +193,6 @@ ip4_lookup_inline (vlib_main_t * vm,
          tcp2 = (void *) (ip2 + 1);
          tcp3 = (void *) (ip3 + 1);
 
-         if (!lookup_for_responses_to_locally_received_packets)
-           {
-             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 1);
-             leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 1);
-             leaf2 = ip4_fib_mtrie_lookup_step (mtrie2, leaf2, dst_addr2, 1);
-             leaf3 = ip4_fib_mtrie_lookup_step (mtrie3, leaf3, dst_addr3, 1);
-           }
-
          if (!lookup_for_responses_to_locally_received_packets)
            {
              leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
@@ -363,9 +354,6 @@ ip4_lookup_inline (vlib_main_t * vm,
 
          tcp0 = (void *) (ip0 + 1);
 
-         if (!lookup_for_responses_to_locally_received_packets)
-           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 1);
-
          if (!lookup_for_responses_to_locally_received_packets)
            leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
 
@@ -1622,11 +1610,6 @@ ip4_local_inline (vlib_main_t * vm,
          good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
          good_tcp_udp1 |= is_udp1 && udp1->checksum == 0;
 
-         leaf0 =
-           ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
-         leaf1 =
-           ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 1);
-
          /* Verify UDP length. */
          ip_len0 = clib_net_to_host_u16 (ip0->length);
          ip_len1 = clib_net_to_host_u16 (ip1->length);
@@ -1812,9 +1795,6 @@ ip4_local_inline (vlib_main_t * vm,
          /* Don't verify UDP checksum for packets with explicit zero checksum. */
          good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
 
-         leaf0 =
-           ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
-
          /* Verify UDP length. */
          ip_len0 = clib_net_to_host_u16 (ip0->length);
          udp_len0 = clib_net_to_host_u16 (udp0->length);
@@ -2913,7 +2893,6 @@ ip4_lookup_validate (ip4_address_t * a, u32 fib_index0)
   mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
 
   leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, a);
-  leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 1);
   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2);
   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3);
 
index 317d8f1..adc9512 100644 (file)
  */
 
 #include <vnet/ip/ip.h>
-#include <vnet/fib/fib_entry.h>
+#include <vnet/ip/ip4_mtrie.h>
+#include <vnet/fib/ip4_fib.h>
+
+
+/**
+ * Global pool of IPv4 8bit PLYs
+ */
+ip4_fib_mtrie_8_ply_t *ip4_ply_pool;
 
 always_inline u32
-ip4_fib_mtrie_leaf_is_non_empty (ip4_fib_mtrie_ply_t * p, u8 dst_byte)
+ip4_fib_mtrie_leaf_is_non_empty (ip4_fib_mtrie_8_ply_t * p, u8 dst_byte)
 {
   /*
    * It's 'non-empty' if the length of the leaf stored is greater than the
@@ -84,61 +91,83 @@ ip4_fib_mtrie_leaf_set_next_ply_index (u32 i)
   return l;
 }
 
-static void
-ply_init (ip4_fib_mtrie_ply_t * p,
-         ip4_fib_mtrie_leaf_t init, u32 prefix_len, u32 ply_base_len)
-{
-  /*
-   * A leaf is 'empty' if it represents a leaf from the covering PLY
-   * i.e. if the prefix length of the leaf is less than or equal to
-   * the prefix length of the PLY
-   */
-  p->n_non_empty_leafs = (prefix_len > ply_base_len ?
-                         ARRAY_LEN (p->leaves) : 0);
-  memset (p->dst_address_bits_of_leaves, prefix_len,
-         sizeof (p->dst_address_bits_of_leaves));
-  p->dst_address_bits_base = ply_base_len;
-
-  /* Initialize leaves. */
-#ifdef CLIB_HAVE_VEC128
-  {
-    u32x4 *l, init_x4;
-
 #ifndef __ALTIVEC__
-    init_x4 = u32x4_splat (init);
+#define PLY_X4_SPLAT_INIT(init_x4, init) \
+  init_x4 = u32x4_splat (init);
 #else
-    {
-      u32x4_union_t y;
-      y.as_u32[0] = init;
-      y.as_u32[1] = init;
-      y.as_u32[2] = init;
-      y.as_u32[3] = init;
-      init_x4 = y.as_u32x4;
-    }
+#define PLY_X4_SPLAT_INIT(init_x4, init)                                \
+{                                                                       \
+  u32x4_union_t y;                                                      \
+  y.as_u32[0] = init;                                                   \
+  y.as_u32[1] = init;                                                   \
+  y.as_u32[2] = init;                                                   \
+  y.as_u32[3] = init;                                                   \
+  init_x4 = y.as_u32x4;                                                 \
+}
 #endif
 
-    for (l = p->leaves_as_u32x4;
-        l < p->leaves_as_u32x4 + ARRAY_LEN (p->leaves_as_u32x4); l += 4)
-      {
-       l[0] = init_x4;
-       l[1] = init_x4;
-       l[2] = init_x4;
-       l[3] = init_x4;
-      }
-  }
+#ifdef CLIB_HAVE_VEC128
+#define PLY_INIT_LEAVES(p)                                              \
+{                                                                       \
+    u32x4 *l, init_x4;                                                  \
+                                                                        \
+    PLY_X4_SPLAT_INIT(init_x4, init);                                   \
+    for (l = p->leaves_as_u32x4;                                        \
+        l < p->leaves_as_u32x4 + ARRAY_LEN (p->leaves_as_u32x4);       \
+         l += 4)                                                        \
+      {                                                                 \
+       l[0] = init_x4;                                                 \
+       l[1] = init_x4;                                                 \
+       l[2] = init_x4;                                                 \
+       l[3] = init_x4;                                                 \
+      }                                                                 \
+}
 #else
-  {
-    u32 *l;
-
-    for (l = p->leaves; l < p->leaves + ARRAY_LEN (p->leaves); l += 4)
-      {
-       l[0] = init;
-       l[1] = init;
-       l[2] = init;
-       l[3] = init;
-      }
-  }
+#define PLY_INIT_LEAVES(p)                                              \
+{                                                                       \
+  u32 *l;                                                               \
+                                                                        \
+  for (l = p->leaves; l < p->leaves + ARRAY_LEN (p->leaves); l += 4)    \
+    {                                                                   \
+      l[0] = init;                                                      \
+      l[1] = init;                                                      \
+      l[2] = init;                                                      \
+      l[3] = init;                                                      \
+      }                                                                 \
+}
 #endif
+
+#define PLY_INIT(p, init, prefix_len, ply_base_len)                     \
+{                                                                       \
+  /*                                                                    \
+   * A leaf is 'empty' if it represents a leaf from the covering PLY    \
+   * i.e. if the prefix length of the leaf is less than or equal to     \
+   * the prefix length of the PLY                                       \
+   */                                                                   \
+  p->n_non_empty_leafs = (prefix_len > ply_base_len ?                   \
+                         ARRAY_LEN (p->leaves) : 0);                   \
+  memset (p->dst_address_bits_of_leaves, prefix_len,                    \
+         sizeof (p->dst_address_bits_of_leaves));                      \
+  p->dst_address_bits_base = ply_base_len;                              \
+                                                                        \
+  /* Initialize leaves. */                                              \
+  PLY_INIT_LEAVES(p);                                                   \
+}
+
+static void
+ply_8_init (ip4_fib_mtrie_8_ply_t * p,
+           ip4_fib_mtrie_leaf_t init, uword prefix_len, u32 ply_base_len)
+{
+  PLY_INIT (p, init, prefix_len, ply_base_len);
+}
+
+static void
+ply_16_init (ip4_fib_mtrie_16_ply_t * p,
+            ip4_fib_mtrie_leaf_t init, uword prefix_len)
+{
+  memset (p->dst_address_bits_of_leaves, prefix_len,
+         sizeof (p->dst_address_bits_of_leaves));
+  PLY_INIT_LEAVES (p);
 }
 
 static ip4_fib_mtrie_leaf_t
@@ -146,49 +175,43 @@ ply_create (ip4_fib_mtrie_t * m,
            ip4_fib_mtrie_leaf_t init_leaf,
            u32 leaf_prefix_len, u32 ply_base_len)
 {
-  ip4_fib_mtrie_ply_t *p;
+  ip4_fib_mtrie_8_ply_t *p;
 
   /* Get cache aligned ply. */
-  pool_get_aligned (m->ply_pool, p, sizeof (p[0]));
+  pool_get_aligned (ip4_ply_pool, p, CLIB_CACHE_LINE_BYTES);
 
-  ply_init (p, init_leaf, leaf_prefix_len, ply_base_len);
-  return ip4_fib_mtrie_leaf_set_next_ply_index (p - m->ply_pool);
+  ply_8_init (p, init_leaf, leaf_prefix_len, ply_base_len);
+  return ip4_fib_mtrie_leaf_set_next_ply_index (p - ip4_ply_pool);
 }
 
-always_inline ip4_fib_mtrie_ply_t *
+always_inline ip4_fib_mtrie_8_ply_t *
 get_next_ply_for_leaf (ip4_fib_mtrie_t * m, ip4_fib_mtrie_leaf_t l)
 {
   uword n = ip4_fib_mtrie_leaf_get_next_ply_index (l);
-  /* It better not be the root ply. */
-  ASSERT (n != 0);
-  return pool_elt_at_index (m->ply_pool, n);
+
+  return pool_elt_at_index (ip4_ply_pool, n);
 }
 
-static void
-ply_free (ip4_fib_mtrie_t * m, ip4_fib_mtrie_ply_t * p)
+void
+ip4_mtrie_free (ip4_fib_mtrie_t * m)
 {
-  uword i, is_root;
-
-  is_root = p - m->ply_pool == 0;
-
-  for (i = 0; i < ARRAY_LEN (p->leaves); i++)
+  /* the root ply is embedded so the is nothing to do,
+   * the assumption being that the IP4 FIB table has emptied the trie
+   * before deletion.
+   */
+#if CLIB_DEBUG > 0
+  int i;
+  for (i = 0; i < ARRAY_LEN (m->root_ply.leaves); i++)
     {
-      ip4_fib_mtrie_leaf_t l = p->leaves[i];
-      if (ip4_fib_mtrie_leaf_is_next_ply (l))
-       ply_free (m, get_next_ply_for_leaf (m, l));
+      ASSERT (!ip4_fib_mtrie_leaf_is_next_ply (m->root_ply.leaves[i]));
     }
-
-  if (is_root)
-    ply_init (p, IP4_FIB_MTRIE_LEAF_EMPTY, /* prefix_len */ 0, 0);
-  else
-    pool_put (m->ply_pool, p);
+#endif
 }
 
 void
-ip4_fib_free (ip4_fib_mtrie_t * m)
+ip4_mtrie_init (ip4_fib_mtrie_t * m)
 {
-  ip4_fib_mtrie_ply_t *root_ply = pool_elt_at_index (m->ply_pool, 0);
-  ply_free (m, root_ply);
+  ply_16_init (&m->root_ply, IP4_FIB_MTRIE_LEAF_EMPTY, 0);
 }
 
 typedef struct
@@ -202,7 +225,7 @@ typedef struct
 
 static void
 set_ply_with_more_specific_leaf (ip4_fib_mtrie_t * m,
-                                ip4_fib_mtrie_ply_t * ply,
+                                ip4_fib_mtrie_8_ply_t * ply,
                                 ip4_fib_mtrie_leaf_t new_leaf,
                                 uword new_leaf_dst_address_bits)
 {
@@ -218,7 +241,8 @@ set_ply_with_more_specific_leaf (ip4_fib_mtrie_t * m,
       /* Recurse into sub plies. */
       if (!ip4_fib_mtrie_leaf_is_terminal (old_leaf))
        {
-         ip4_fib_mtrie_ply_t *sub_ply = get_next_ply_for_leaf (m, old_leaf);
+         ip4_fib_mtrie_8_ply_t *sub_ply =
+           get_next_ply_for_leaf (m, old_leaf);
          set_ply_with_more_specific_leaf (m, sub_ply, new_leaf,
                                           new_leaf_dst_address_bits);
        }
@@ -237,16 +261,20 @@ set_ply_with_more_specific_leaf (ip4_fib_mtrie_t * m,
 
 static void
 set_leaf (ip4_fib_mtrie_t * m,
-         ip4_fib_mtrie_set_unset_leaf_args_t * a,
+         const ip4_fib_mtrie_set_unset_leaf_args_t * a,
          u32 old_ply_index, u32 dst_address_byte_index)
 {
   ip4_fib_mtrie_leaf_t old_leaf, new_leaf;
   i32 n_dst_bits_next_plies;
   u8 dst_byte;
+  ip4_fib_mtrie_8_ply_t *old_ply;
+
+  old_ply = pool_elt_at_index (ip4_ply_pool, old_ply_index);
 
   ASSERT (a->dst_address_length >= 0 && a->dst_address_length <= 32);
   ASSERT (dst_address_byte_index < ARRAY_LEN (a->dst_address.as_u8));
 
+  /* how many bits of the destination address are in the next PLY */
   n_dst_bits_next_plies =
     a->dst_address_length - BITS (u8) * (dst_address_byte_index + 1);
 
@@ -255,30 +283,36 @@ set_leaf (ip4_fib_mtrie_t * m,
   /* Number of bits next plies <= 0 => insert leaves this ply. */
   if (n_dst_bits_next_plies <= 0)
     {
+      /* The mask length of the address to insert maps to this ply */
       uword i, n_dst_bits_this_ply, old_leaf_is_terminal;
 
+      /* The number of bits, and hence slots/buckets, we will fill */
       n_dst_bits_this_ply = clib_min (8, -n_dst_bits_next_plies);
       ASSERT ((a->dst_address.as_u8[dst_address_byte_index] &
               pow2_mask (n_dst_bits_this_ply)) == 0);
 
+      /* Starting at the value of the byte at this section of the v4 address
+       * fill the buckets/slots of the ply */
       for (i = dst_byte; i < dst_byte + (1 << n_dst_bits_this_ply); i++)
        {
-         ip4_fib_mtrie_ply_t *old_ply, *new_ply;
-
-         old_ply = pool_elt_at_index (m->ply_pool, old_ply_index);
+         ip4_fib_mtrie_8_ply_t *new_ply;
 
          old_leaf = old_ply->leaves[i];
          old_leaf_is_terminal = ip4_fib_mtrie_leaf_is_terminal (old_leaf);
 
-         /* Is leaf to be inserted more specific? */
          if (a->dst_address_length >= old_ply->dst_address_bits_of_leaves[i])
            {
+             /* The new leaf is more or equally specific than the one currently
+              * occupying the slot */
              new_leaf = ip4_fib_mtrie_leaf_set_adj_index (a->adj_index);
 
              if (old_leaf_is_terminal)
                {
+                 /* The current leaf is terminal, we can replace it with
+                  * the new one */
                  old_ply->n_non_empty_leafs -=
                    ip4_fib_mtrie_leaf_is_non_empty (old_ply, i);
+
                  old_ply->dst_address_bits_of_leaves[i] =
                    a->dst_address_length;
                  __sync_val_compare_and_swap (&old_ply->leaves[i], old_leaf,
@@ -292,32 +326,42 @@ set_leaf (ip4_fib_mtrie_t * m,
                }
              else
                {
-                 /* Existing leaf points to another ply.  We need to place new_leaf into all
-                    more specific slots. */
+                 /* Existing leaf points to another ply.  We need to place
+                  * new_leaf into all more specific slots. */
                  new_ply = get_next_ply_for_leaf (m, old_leaf);
                  set_ply_with_more_specific_leaf (m, new_ply, new_leaf,
                                                   a->dst_address_length);
                }
            }
-
          else if (!old_leaf_is_terminal)
            {
+             /* The current leaf is less specific and not termial (i.e. a ply),
+              * recurse on down the trie */
              new_ply = get_next_ply_for_leaf (m, old_leaf);
-             set_leaf (m, a, new_ply - m->ply_pool,
+             set_leaf (m, a, new_ply - ip4_ply_pool,
                        dst_address_byte_index + 1);
            }
+         /*
+          * else
+          *  the route we are adding is less specific than the leaf currently
+          *  occupying this slot. leave it there
+          */
        }
     }
   else
     {
-      ip4_fib_mtrie_ply_t *old_ply, *new_ply;
+      /* The address to insert requires us to move down at a lower level of
+       * the trie - recurse on down */
+      ip4_fib_mtrie_8_ply_t *new_ply;
       u8 ply_base_len;
 
       ply_base_len = 8 * (dst_address_byte_index + 1);
-      old_ply = pool_elt_at_index (m->ply_pool, old_ply_index);
+
       old_leaf = old_ply->leaves[dst_byte];
+
       if (ip4_fib_mtrie_leaf_is_terminal (old_leaf))
        {
+         /* There is a leaf occupying the slot. Replace it with a new ply */
          old_ply->n_non_empty_leafs -=
            ip4_fib_mtrie_leaf_is_non_empty (old_ply, dst_byte);
 
@@ -328,28 +372,143 @@ set_leaf (ip4_fib_mtrie_t * m,
          new_ply = get_next_ply_for_leaf (m, new_leaf);
 
          /* Refetch since ply_create may move pool. */
-         old_ply = pool_elt_at_index (m->ply_pool, old_ply_index);
+         old_ply = pool_elt_at_index (ip4_ply_pool, old_ply_index);
 
          __sync_val_compare_and_swap (&old_ply->leaves[dst_byte], old_leaf,
                                       new_leaf);
          ASSERT (old_ply->leaves[dst_byte] == new_leaf);
          old_ply->dst_address_bits_of_leaves[dst_byte] = ply_base_len;
 
-         /* Account for the ply we just created. */
-         old_ply->n_non_empty_leafs += 1;
+         old_ply->n_non_empty_leafs +=
+           ip4_fib_mtrie_leaf_is_non_empty (old_ply, dst_byte);
          ASSERT (old_ply->n_non_empty_leafs >= 0);
        }
       else
        new_ply = get_next_ply_for_leaf (m, old_leaf);
 
-      set_leaf (m, a, new_ply - m->ply_pool, dst_address_byte_index + 1);
+      set_leaf (m, a, new_ply - ip4_ply_pool, dst_address_byte_index + 1);
+    }
+}
+
+static void
+set_root_leaf (ip4_fib_mtrie_t * m,
+              const ip4_fib_mtrie_set_unset_leaf_args_t * a)
+{
+  ip4_fib_mtrie_leaf_t old_leaf, new_leaf;
+  ip4_fib_mtrie_16_ply_t *old_ply;
+  i32 n_dst_bits_next_plies;
+  u16 dst_byte;
+
+  old_ply = &m->root_ply;
+
+  ASSERT (a->dst_address_length >= 0 && a->dst_address_length <= 32);
+
+  /* how many bits of the destination address are in the next PLY */
+  n_dst_bits_next_plies = a->dst_address_length - BITS (u16);
+
+  dst_byte = a->dst_address.as_u16[0];
+
+  /* Number of bits next plies <= 0 => insert leaves this ply. */
+  if (n_dst_bits_next_plies <= 0)
+    {
+      /* The mask length of the address to insert maps to this ply */
+      uword i, n_dst_bits_this_ply, old_leaf_is_terminal;
+
+      /* The number of bits, and hence slots/buckets, we will fill */
+      n_dst_bits_this_ply = 16 - a->dst_address_length;
+      ASSERT ((clib_host_to_net_u16 (a->dst_address.as_u16[0]) &
+              pow2_mask (n_dst_bits_this_ply)) == 0);
+
+      /* Starting at the value of the byte at this section of the v4 address
+       * fill the buckets/slots of the ply */
+      for (i = 0; i < (1 << n_dst_bits_this_ply); i++)
+       {
+         ip4_fib_mtrie_8_ply_t *new_ply;
+         u16 slot;
+
+         slot = clib_net_to_host_u16 (dst_byte);
+         slot += i;
+         slot = clib_host_to_net_u16 (slot);
+
+         old_leaf = old_ply->leaves[slot];
+         old_leaf_is_terminal = ip4_fib_mtrie_leaf_is_terminal (old_leaf);
+
+         if (a->dst_address_length >=
+             old_ply->dst_address_bits_of_leaves[slot])
+           {
+             /* The new leaf is more or equally specific than the one currently
+              * occupying the slot */
+             new_leaf = ip4_fib_mtrie_leaf_set_adj_index (a->adj_index);
+
+             if (old_leaf_is_terminal)
+               {
+                 /* The current leaf is terminal, we can replace it with
+                  * the new one */
+                 old_ply->dst_address_bits_of_leaves[slot] =
+                   a->dst_address_length;
+                 __sync_val_compare_and_swap (&old_ply->leaves[slot],
+                                              old_leaf, new_leaf);
+                 ASSERT (old_ply->leaves[slot] == new_leaf);
+               }
+             else
+               {
+                 /* Existing leaf points to another ply.  We need to place
+                  * new_leaf into all more specific slots. */
+                 new_ply = get_next_ply_for_leaf (m, old_leaf);
+                 set_ply_with_more_specific_leaf (m, new_ply, new_leaf,
+                                                  a->dst_address_length);
+               }
+           }
+         else if (!old_leaf_is_terminal)
+           {
+             /* The current leaf is less specific and not termial (i.e. a ply),
+              * recurse on down the trie */
+             new_ply = get_next_ply_for_leaf (m, old_leaf);
+             set_leaf (m, a, new_ply - ip4_ply_pool, 2);
+           }
+         /*
+          * else
+          *  the route we are adding is less specific than the leaf currently
+          *  occupying this slot. leave it there
+          */
+       }
+    }
+  else
+    {
+      /* The address to insert requires us to move down at a lower level of
+       * the trie - recurse on down */
+      ip4_fib_mtrie_8_ply_t *new_ply;
+      u8 ply_base_len;
+
+      ply_base_len = 16;
+
+      old_leaf = old_ply->leaves[dst_byte];
+
+      if (ip4_fib_mtrie_leaf_is_terminal (old_leaf))
+       {
+         /* There is a leaf occupying the slot. Replace it with a new ply */
+         new_leaf = ply_create (m, old_leaf,
+                                clib_max (old_ply->dst_address_bits_of_leaves
+                                          [dst_byte], ply_base_len),
+                                ply_base_len);
+         new_ply = get_next_ply_for_leaf (m, new_leaf);
+
+         __sync_val_compare_and_swap (&old_ply->leaves[dst_byte], old_leaf,
+                                      new_leaf);
+         ASSERT (old_ply->leaves[dst_byte] == new_leaf);
+         old_ply->dst_address_bits_of_leaves[dst_byte] = ply_base_len;
+       }
+      else
+       new_ply = get_next_ply_for_leaf (m, old_leaf);
+
+      set_leaf (m, a, new_ply - ip4_ply_pool, 2);
     }
 }
 
 static uword
 unset_leaf (ip4_fib_mtrie_t * m,
-           ip4_fib_mtrie_set_unset_leaf_args_t * a,
-           ip4_fib_mtrie_ply_t * old_ply, u32 dst_address_byte_index)
+           const ip4_fib_mtrie_set_unset_leaf_args_t * a,
+           ip4_fib_mtrie_8_ply_t * old_ply, u32 dst_address_byte_index)
 {
   ip4_fib_mtrie_leaf_t old_leaf, del_leaf;
   i32 n_dst_bits_next_plies;
@@ -397,7 +556,7 @@ unset_leaf (ip4_fib_mtrie_t * m,
          ASSERT (old_ply->n_non_empty_leafs >= 0);
          if (old_ply->n_non_empty_leafs == 0 && dst_address_byte_index > 0)
            {
-             pool_put (m->ply_pool, old_ply);
+             pool_put (ip4_ply_pool, old_ply);
              /* Old ply was deleted. */
              return 1;
            }
@@ -419,106 +578,120 @@ unset_leaf (ip4_fib_mtrie_t * m,
   return 0;
 }
 
-void
-ip4_mtrie_init (ip4_fib_mtrie_t * m)
+static void
+unset_root_leaf (ip4_fib_mtrie_t * m,
+                const ip4_fib_mtrie_set_unset_leaf_args_t * a)
 {
-  ip4_fib_mtrie_leaf_t root;
-  memset (m, 0, sizeof (m[0]));
-  root = ply_create (m, IP4_FIB_MTRIE_LEAF_EMPTY, 0, 0);
-  ASSERT (ip4_fib_mtrie_leaf_get_next_ply_index (root) == 0);
-}
+  ip4_fib_mtrie_leaf_t old_leaf, del_leaf;
+  i32 n_dst_bits_next_plies;
+  i32 i, n_dst_bits_this_ply, old_leaf_is_terminal;
+  u16 dst_byte;
+  ip4_fib_mtrie_16_ply_t *old_ply;
 
-void
-ip4_fib_mtrie_add_del_route (ip4_fib_t * fib,
-                            ip4_address_t dst_address,
-                            u32 dst_address_length,
-                            u32 adj_index, u32 is_del)
-{
-  ip4_fib_mtrie_t *m = &fib->mtrie;
-  ip4_fib_mtrie_ply_t *root_ply;
-  ip4_fib_mtrie_set_unset_leaf_args_t a;
-  ip4_main_t *im = &ip4_main;
+  ASSERT (a->dst_address_length >= 0 && a->dst_address_length <= 32);
 
-  ASSERT (m->ply_pool != 0);
+  old_ply = &m->root_ply;
+  n_dst_bits_next_plies = a->dst_address_length - BITS (u16);
 
-  root_ply = pool_elt_at_index (m->ply_pool, 0);
+  dst_byte = a->dst_address.as_u16[0];
 
-  /* Honor dst_address_length. Fib masks are in network byte order */
-  dst_address.as_u32 &= im->fib_masks[dst_address_length];
-  a.dst_address = dst_address;
-  a.dst_address_length = dst_address_length;
-  a.adj_index = adj_index;
+  n_dst_bits_this_ply = (n_dst_bits_next_plies <= 0 ?
+                        (16 - a->dst_address_length) : 0);
 
-  if (!is_del)
-    {
-      set_leaf (m, &a, /* ply_index */ 0, /* dst_address_byte_index */ 0);
-    }
-  else
+  del_leaf = ip4_fib_mtrie_leaf_set_adj_index (a->adj_index);
+
+  /* Starting at the value of the byte at this section of the v4 address
+   * fill the buckets/slots of the ply */
+  for (i = 0; i < (1 << n_dst_bits_this_ply); i++)
     {
-      ip4_main_t *im = &ip4_main;
+      u16 slot;
+
+      slot = clib_net_to_host_u16 (dst_byte);
+      slot += i;
+      slot = clib_host_to_net_u16 (slot);
 
-      if (dst_address_length)
+      old_leaf = old_ply->leaves[slot];
+      old_leaf_is_terminal = ip4_fib_mtrie_leaf_is_terminal (old_leaf);
+
+      if (old_leaf == del_leaf
+         || (!old_leaf_is_terminal
+             && unset_leaf (m, a, get_next_ply_for_leaf (m, old_leaf), 2)))
        {
-         word i;
+         old_ply->leaves[slot] =
+           ip4_fib_mtrie_leaf_set_adj_index (a->cover_adj_index);
+         old_ply->dst_address_bits_of_leaves[slot] = a->cover_address_length;
+       }
+    }
+}
 
-         /* If the ply was not deleted, then we need to fill the
-          * bucket just reset will the leaf from the less specfic
-          * cover.
-          * Find next less specific route and insert into mtrie. */
-         for (i = dst_address_length - 1; i >= 0; i--)
-           {
-             uword *p;
-             index_t lbi;
-             ip4_address_t key;
+void
+ip4_fib_mtrie_route_add (ip4_fib_mtrie_t * m,
+                        const ip4_address_t * dst_address,
+                        u32 dst_address_length, u32 adj_index)
+{
+  ip4_fib_mtrie_set_unset_leaf_args_t a;
+  ip4_main_t *im = &ip4_main;
 
-             if (!fib->fib_entry_by_dst_address[i])
-               continue;
+  /* Honor dst_address_length. Fib masks are in network byte order */
+  a.dst_address.as_u32 = (dst_address->as_u32 &
+                         im->fib_masks[dst_address_length]);
+  a.dst_address_length = dst_address_length;
+  a.adj_index = adj_index;
 
-             key.as_u32 = dst_address.as_u32 & im->fib_masks[i];
-             p = hash_get (fib->fib_entry_by_dst_address[i], key.as_u32);
-             if (p)
-               {
-                 lbi = fib_entry_contribute_ip_forwarding (p[0])->dpoi_index;
-                 if (INDEX_INVALID == lbi)
-                   continue;
+  set_root_leaf (m, &a);
+}
 
-                 a.cover_adj_index = lbi;
-                 a.cover_address_length = i;
+void
+ip4_fib_mtrie_route_del (ip4_fib_mtrie_t * m,
+                        const ip4_address_t * dst_address,
+                        u32 dst_address_length,
+                        u32 adj_index,
+                        u32 cover_address_length, u32 cover_adj_index)
+{
+  ip4_fib_mtrie_set_unset_leaf_args_t a;
+  ip4_main_t *im = &ip4_main;
 
-                 break;
-               }
-           }
-       }
-      else
-       {
-         a.cover_adj_index = 0;
-         a.cover_address_length = 0;
-       }
+  /* Honor dst_address_length. Fib masks are in network byte order */
+  a.dst_address.as_u32 = (dst_address->as_u32 &
+                         im->fib_masks[dst_address_length]);
+  a.dst_address_length = dst_address_length;
+  a.adj_index = adj_index;
+  a.cover_adj_index = cover_adj_index;
+  a.cover_address_length = cover_address_length;
 
-      /* the top level ply is never removed, so we can ignore the return code */
-      unset_leaf (m, &a, root_ply, 0);
-    }
+  /* the top level ply is never removed */
+  unset_root_leaf (m, &a);
 }
 
 /* Returns number of bytes of memory used by mtrie. */
 static uword
-mtrie_memory_usage (ip4_fib_mtrie_t * m, ip4_fib_mtrie_ply_t * p)
+mtrie_ply_memory_usage (ip4_fib_mtrie_t * m, ip4_fib_mtrie_8_ply_t * p)
 {
   uword bytes, i;
 
-  if (!p)
-    {
-      if (pool_is_free_index (m->ply_pool, 0))
-       return 0;
-      p = pool_elt_at_index (m->ply_pool, 0);
-    }
-
   bytes = sizeof (p[0]);
   for (i = 0; i < ARRAY_LEN (p->leaves); i++)
     {
       ip4_fib_mtrie_leaf_t l = p->leaves[i];
       if (ip4_fib_mtrie_leaf_is_next_ply (l))
-       bytes += mtrie_memory_usage (m, get_next_ply_for_leaf (m, l));
+       bytes += mtrie_ply_memory_usage (m, get_next_ply_for_leaf (m, l));
+    }
+
+  return bytes;
+}
+
+/* Returns number of bytes of memory used by mtrie. */
+static uword
+mtrie_memory_usage (ip4_fib_mtrie_t * m)
+{
+  uword bytes, i;
+
+  bytes = sizeof (*m);
+  for (i = 0; i < ARRAY_LEN (m->root_ply.leaves); i++)
+    {
+      ip4_fib_mtrie_leaf_t l = m->root_ply.leaves[i];
+      if (ip4_fib_mtrie_leaf_is_next_ply (l))
+       bytes += mtrie_ply_memory_usage (m, get_next_ply_for_leaf (m, l));
     }
 
   return bytes;
@@ -536,47 +709,49 @@ format_ip4_fib_mtrie_leaf (u8 * s, va_list * va)
   return s;
 }
 
+#define FORMAT_PLY(s, _p, _i, _base_address, _ply_max_len, _indent)     \
+({                                                                      \
+  u32 a, ia_length;                                                     \
+  ip4_address_t ia;                                                     \
+  ip4_fib_mtrie_leaf_t _l = p->leaves[(_i)];                            \
+                                                                        \
+  a = (_base_address) + ((_i) << (32 - (_ply_max_len)));                \
+  ia.as_u32 = clib_host_to_net_u32 (a);                                 \
+  ia_length = (_p)->dst_address_bits_of_leaves[(_i)];                   \
+  s = format (s, "\n%U%20U %U",                                         \
+              format_white_space, (_indent) + 2,                        \
+              format_ip4_address_and_length, &ia, ia_length,            \
+              format_ip4_fib_mtrie_leaf, _l);                           \
+                                                                        \
+  if (ip4_fib_mtrie_leaf_is_next_ply (_l))                              \
+    s = format (s, "\n%U%U",                                            \
+                format_white_space, (_indent) + 2,                      \
+                format_ip4_fib_mtrie_ply, m, a,                         \
+                ip4_fib_mtrie_leaf_get_next_ply_index (_l));            \
+  s;                                                                    \
+})
+
 static u8 *
 format_ip4_fib_mtrie_ply (u8 * s, va_list * va)
 {
   ip4_fib_mtrie_t *m = va_arg (*va, ip4_fib_mtrie_t *);
   u32 base_address = va_arg (*va, u32);
   u32 ply_index = va_arg (*va, u32);
-  u32 dst_address_byte_index = va_arg (*va, u32);
-  ip4_fib_mtrie_ply_t *p;
-  uword i, indent;
+  ip4_fib_mtrie_8_ply_t *p;
+  uword indent;
+  int i;
 
-  p = pool_elt_at_index (m->ply_pool, ply_index);
+  p = pool_elt_at_index (ip4_ply_pool, ply_index);
   indent = format_get_indent (s);
-  s =
-    format (s, "ply index %d, %d non-empty leaves", ply_index,
-           p->n_non_empty_leafs);
+  s = format (s, "ply index %d, %d non-empty leaves", ply_index,
+             p->n_non_empty_leafs);
+
   for (i = 0; i < ARRAY_LEN (p->leaves); i++)
     {
-      ip4_fib_mtrie_leaf_t l = p->leaves[i];
-
       if (ip4_fib_mtrie_leaf_is_non_empty (p, i))
        {
-         u32 a, ia_length;
-         ip4_address_t ia;
-
-         a = base_address + (i << (24 - 8 * dst_address_byte_index));
-         ia.as_u32 = clib_host_to_net_u32 (a);
-         if (ip4_fib_mtrie_leaf_is_terminal (l))
-           ia_length = p->dst_address_bits_of_leaves[i];
-         else
-           ia_length = 8 * (1 + dst_address_byte_index);
-         s = format (s, "\n%U%20U %U",
-                     format_white_space, indent + 2,
-                     format_ip4_address_and_length, &ia, ia_length,
-                     format_ip4_fib_mtrie_leaf, l);
-
-         if (ip4_fib_mtrie_leaf_is_next_ply (l))
-           s = format (s, "\n%U%U",
-                       format_white_space, indent + 2,
-                       format_ip4_fib_mtrie_ply, m, a,
-                       ip4_fib_mtrie_leaf_get_next_ply_index (l),
-                       dst_address_byte_index + 1);
+         FORMAT_PLY (s, p, i, base_address,
+                     p->dst_address_bits_base + 8, indent);
        }
     }
 
@@ -587,22 +762,44 @@ u8 *
 format_ip4_fib_mtrie (u8 * s, va_list * va)
 {
   ip4_fib_mtrie_t *m = va_arg (*va, ip4_fib_mtrie_t *);
+  ip4_fib_mtrie_16_ply_t *p;
+  u32 base_address = 0;
+  int i;
 
-  s = format (s, "%d plies, memory usage %U",
-             pool_elts (m->ply_pool),
-             format_memory_size, mtrie_memory_usage (m, 0));
+  s = format (s, "%d plies, memory usage %U\n",
+             pool_elts (ip4_ply_pool),
+             format_memory_size, mtrie_memory_usage (m));
+  s = format (s, "root-ply");
+  p = &m->root_ply;
 
-  if (pool_elts (m->ply_pool) > 0)
+  for (i = 0; i < ARRAY_LEN (p->leaves); i++)
     {
-      ip4_address_t base_address;
-      base_address.as_u32 = 0;
-      s =
-       format (s, "\n  %U", format_ip4_fib_mtrie_ply, m, base_address, 0, 0);
+      u16 slot;
+
+      slot = clib_host_to_net_u16 (i);
+
+      if (p->dst_address_bits_of_leaves[slot] > 0)
+       {
+         FORMAT_PLY (s, p, slot, base_address, 16, 2);
+       }
     }
 
   return s;
 }
 
+static clib_error_t *
+ip4_mtrie_module_init (vlib_main_t * vm)
+{
+  /* Burn one ply so index 0 is taken */
+  CLIB_UNUSED (ip4_fib_mtrie_8_ply_t * p);
+
+  pool_get (ip4_ply_pool, p);
+
+  return (NULL);
+}
+
+VLIB_INIT_FUNCTION (ip4_mtrie_module_init);
+
 /*
  * fd.io coding-style-patch-verification: ON
  *
index 128195d..be262c2 100644 (file)
 
 /* ip4 fib leafs: 4 ply 8-8-8-8 mtrie.
    1 + 2*adj_index for terminal leaves.
-   0 + 2*next_ply_index for non-terminals.
+   0 + 2*next_ply_index for non-terminals, i.e. PLYs
    1 => empty (adjacency index of zero is special miss adjacency). */
 typedef u32 ip4_fib_mtrie_leaf_t;
 
 #define IP4_FIB_MTRIE_LEAF_EMPTY (1 + 2*0)
 
+/**
+ * @brief the 16 way stride that is the top PLY of the mtrie
+ * We do not maintain the count of 'real' leaves in this PLY, since
+ * it is never removed. The FIB will destroy the mtrie and the ply once
+ * the FIB is destroyed.
+ */
+#define PLY_16_SIZE (1<<16)
+typedef struct ip4_fib_mtrie_16_ply_t_
+{
+  /**
+   * The leaves/slots/buckets to be filed with leafs
+   */
+  union
+  {
+    ip4_fib_mtrie_leaf_t leaves[PLY_16_SIZE];
+
+#ifdef CLIB_HAVE_VEC128
+    u32x4 leaves_as_u32x4[PLY_16_SIZE / 4];
+#endif
+  };
+
+  /**
+   * Prefix length for terminal leaves.
+   */
+  u8 dst_address_bits_of_leaves[PLY_16_SIZE];
+} ip4_fib_mtrie_16_ply_t;
+
 /**
  * @brief One ply of the 4 ply mtrie fib.
  */
-typedef struct
+typedef struct ip4_fib_mtrie_8_ply_t_
 {
   /**
    * The leaves/slots/buckets to be filed with leafs
@@ -90,34 +117,72 @@ typedef struct
   /* Pad to cache line boundary. */
   u8 pad[CLIB_CACHE_LINE_BYTES - 2 * sizeof (i32)];
 }
-ip4_fib_mtrie_ply_t;
+ip4_fib_mtrie_8_ply_t;
 
-STATIC_ASSERT (0 == sizeof (ip4_fib_mtrie_ply_t) % CLIB_CACHE_LINE_BYTES,
+STATIC_ASSERT (0 == sizeof (ip4_fib_mtrie_8_ply_t) % CLIB_CACHE_LINE_BYTES,
               "IP4 Mtrie ply cache line");
 
+/**
+ * @brief The mutiway-TRIE.
+ * There is no data associated with the mtrie apart from the top PLY
+ */
 typedef struct
 {
-  /* Pool of plies.  Index zero is root ply. */
-  ip4_fib_mtrie_ply_t *ply_pool;
+  /**
+   * Embed the PLY with the mtrie struct. This means that the Data-plane
+   * 'get me the mtrie' returns the first ply, and not an indirect 'pointer'
+   * to it. therefore no cachline misses in the data-path.
+   */
+  ip4_fib_mtrie_16_ply_t root_ply;
 } ip4_fib_mtrie_t;
 
-void ip4_fib_mtrie_init (ip4_fib_mtrie_t * m);
+/**
+ * @brief Initialise an mtrie
+ */
+void ip4_mtrie_init (ip4_fib_mtrie_t * m);
 
-struct ip4_fib_t;
+/**
+ * @brief Free an mtrie, It must be emty when free'd
+ */
+void ip4_mtrie_free (ip4_fib_mtrie_t * m);
 
-void ip4_fib_mtrie_add_del_route (struct ip4_fib_t *f,
-                                 ip4_address_t dst_address,
-                                 u32 dst_address_length,
-                                 u32 adj_index, u32 is_del);
+/**
+ * @brief Add a route/rntry to the mtrie
+ */
+void ip4_fib_mtrie_route_add (ip4_fib_mtrie_t * m,
+                             const ip4_address_t * dst_address,
+                             u32 dst_address_length, u32 adj_index);
+/**
+ * @brief remove a route/rntry to the mtrie
+ */
+void ip4_fib_mtrie_route_del (ip4_fib_mtrie_t * m,
+                             const ip4_address_t * dst_address,
+                             u32 dst_address_length,
+                             u32 adj_index,
+                             u32 cover_address_length, u32 cover_adj_index);
 
+/**
+ * @brief Format/display the contents of the mtrie
+ */
 format_function_t format_ip4_fib_mtrie;
 
+/**
+ * @brief A global pool of 8bit stride plys
+ */
+extern ip4_fib_mtrie_8_ply_t *ip4_ply_pool;
+
+/**
+ * Is the leaf terminal (i.e. an LB index) or non-terminak (i.e. a PLY index)
+ */
 always_inline u32
 ip4_fib_mtrie_leaf_is_terminal (ip4_fib_mtrie_leaf_t n)
 {
   return n & 1;
 }
 
+/**
+ * From the stored slot value extract the LB index value
+ */
 always_inline u32
 ip4_fib_mtrie_leaf_get_adj_index (ip4_fib_mtrie_leaf_t n)
 {
@@ -125,35 +190,38 @@ ip4_fib_mtrie_leaf_get_adj_index (ip4_fib_mtrie_leaf_t n)
   return n >> 1;
 }
 
-/* Lookup step.  Processes 1 byte of 4 byte ip4 address. */
+/**
+ * @brief Lookup step.  Processes 1 byte of 4 byte ip4 address.
+ */
 always_inline ip4_fib_mtrie_leaf_t
 ip4_fib_mtrie_lookup_step (const ip4_fib_mtrie_t * m,
                           ip4_fib_mtrie_leaf_t current_leaf,
                           const ip4_address_t * dst_address,
                           u32 dst_address_byte_index)
 {
-  ip4_fib_mtrie_ply_t *ply;
+  ip4_fib_mtrie_8_ply_t *ply;
+
   uword current_is_terminal = ip4_fib_mtrie_leaf_is_terminal (current_leaf);
 
   if (!current_is_terminal)
     {
-      ply = m->ply_pool + (current_leaf >> 1);
+      ply = ip4_ply_pool + (current_leaf >> 1);
       return (ply->leaves[dst_address->as_u8[dst_address_byte_index]]);
     }
 
   return current_leaf;
 }
 
-/* Lookup step.  Processes 1 byte of 4 byte ip4 address. */
+/**
+ * @brief Lookup step number 1.  Processes 2 bytes of 4 byte ip4 address.
+ */
 always_inline ip4_fib_mtrie_leaf_t
 ip4_fib_mtrie_lookup_step_one (const ip4_fib_mtrie_t * m,
                               const ip4_address_t * dst_address)
 {
   ip4_fib_mtrie_leaf_t next_leaf;
-  ip4_fib_mtrie_ply_t *ply;
 
-  ply = m->ply_pool;
-  next_leaf = ply->leaves[dst_address->as_u8[0]];
+  next_leaf = m->root_ply.leaves[dst_address->as_u16[0]];
 
   return next_leaf;
 }
index b2c1fcd..1ff9fbd 100644 (file)
@@ -52,6 +52,7 @@ typedef union
   u32 data_u32;
   /* Aliases. */
   u8 as_u8[4];
+  u16 as_u16[2];
   u32 as_u32;
 } ip4_address_t;
 
index 7c2b7be..6831066 100644 (file)
@@ -165,11 +165,6 @@ ip4_source_check_inline (vlib_main_t * vm,
          leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address);
          leaf1 = ip4_fib_mtrie_lookup_step_one (mtrie1, &ip1->src_address);
 
-         leaf0 =
-           ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
-         leaf1 =
-           ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 1);
-
          leaf0 =
            ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
          leaf1 =
@@ -248,9 +243,6 @@ ip4_source_check_inline (vlib_main_t * vm,
 
          leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address);
 
-         leaf0 =
-           ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
-
          leaf0 =
            ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
 
index 8fa9a47..bf7ec7d 100644 (file)
@@ -153,6 +153,9 @@ typedef struct ip6_main_t
   /* Pool of FIBs. */
   struct fib_table_t_ *fibs;
 
+  /* Pool of V6 FIBs. */
+  ip6_fib_t *v6_fibs;
+
   /** Vector of MFIBs. */
   struct mfib_table_t_ *mfibs;
 
index e3a1fee..b9f1782 100644 (file)
@@ -240,6 +240,21 @@ send_ip_fib_details (vpe_api_main_t * am,
   vl_msg_api_send_shmem (q, (u8 *) & mp);
 }
 
+typedef struct vl_api_ip_fib_dump_walk_ctx_t_
+{
+  fib_node_index_t *feis;
+} vl_api_ip_fib_dump_walk_ctx_t;
+
+static int
+vl_api_ip_fib_dump_walk (fib_node_index_t fei, void *arg)
+{
+  vl_api_ip_fib_dump_walk_ctx_t *ctx = arg;
+
+  vec_add1 (ctx->feis, fei);
+
+  return (1);
+}
+
 static void
 vl_api_ip_fib_dump_t_handler (vl_api_ip_fib_dump_t * mp)
 {
@@ -247,12 +262,13 @@ vl_api_ip_fib_dump_t_handler (vl_api_ip_fib_dump_t * mp)
   unix_shared_memory_queue_t *q;
   ip4_main_t *im = &ip4_main;
   fib_table_t *fib_table;
-  fib_node_index_t lfei, *lfeip, *lfeis = NULL;
-  mpls_label_t key;
+  fib_node_index_t *lfeip;
   fib_prefix_t pfx;
   u32 fib_index;
   fib_route_path_encode_t *api_rpaths;
-  int i;
+  vl_api_ip_fib_dump_walk_ctx_t ctx = {
+    .feis = NULL,
+  };
 
   q = vl_api_client_index_to_input_queue (mp->client_index);
   if (q == 0)
@@ -261,19 +277,16 @@ vl_api_ip_fib_dump_t_handler (vl_api_ip_fib_dump_t * mp)
   /* *INDENT-OFF* */
   pool_foreach (fib_table, im->fibs,
   ({
-    for (i = 0; i < ARRAY_LEN (fib_table->v4.fib_entry_by_dst_address); i++)
-      {
-        hash_foreach(key, lfei, fib_table->v4.fib_entry_by_dst_address[i],
-        ({
-          vec_add1(lfeis, lfei);
-        }));
-      }
+    fib_table_walk(fib_table->ft_index,
+                   FIB_PROTOCOL_IP4,
+                   vl_api_ip_fib_dump_walk,
+                   &ctx);
   }));
   /* *INDENT-ON* */
 
-  vec_sort_with_function (lfeis, fib_entry_cmp_for_sort);
+  vec_sort_with_function (ctx.feis, fib_entry_cmp_for_sort);
 
-  vec_foreach (lfeip, lfeis)
+  vec_foreach (lfeip, ctx.feis)
   {
     fib_entry_get_prefix (*lfeip, &pfx);
     fib_index = fib_entry_get_fib_index (*lfeip);
@@ -286,7 +299,7 @@ vl_api_ip_fib_dump_t_handler (vl_api_ip_fib_dump_t * mp)
     vec_free (api_rpaths);
   }
 
-  vec_free (lfeis);
+  vec_free (ctx.feis);
 }
 
 static void
@@ -377,10 +390,10 @@ api_ip6_fib_table_get_all (unix_shared_memory_queue_t * q,
 {
   vpe_api_main_t *am = &vpe_api_main;
   ip6_main_t *im6 = &ip6_main;
-  ip6_fib_t *fib = &fib_table->v6;
   fib_node_index_t *fib_entry_index;
   api_ip6_fib_show_ctx_t ctx = {
-    .fib_index = fib->index,.entries = NULL,
+    .fib_index = fib_table->ft_index,
+    .entries = NULL,
   };
   fib_route_path_encode_t *api_rpaths;
   fib_prefix_t pfx;
index f631dc7..a085aaa 100644 (file)
@@ -18,6 +18,7 @@
 #include <vnet/vnet.h>
 #include <vnet/pg/pg.h>
 #include <vnet/mpls/mpls.h>
+#include <vnet/fib/mpls_fib.h>
 #include <vnet/fib/ip4_fib.h>
 #include <vnet/adj/adj_midchain.h>
 #include <vnet/dpo/classify_dpo.h>
index 300f2cf..b0125e6 100644 (file)
@@ -30,29 +30,6 @@ typedef enum {
   MPLS_N_ERROR,
 } mpls_error_t;
 
-#define MPLS_FIB_DEFAULT_TABLE_ID 0
-
-/**
- * Type exposure is to allow the DP fast/inlined access
- */
-#define MPLS_FIB_KEY_SIZE 21
-#define MPLS_FIB_DB_SIZE (1 << (MPLS_FIB_KEY_SIZE-1))
-
-typedef struct mpls_fib_t_
-{
-  /**
-   * A hash table of entries. 21 bit key
-   * Hash table for reduced memory footprint
-   */
-  uword * mf_entries;
-
-  /**
-   * The load-balance indeices keyed by 21 bit label+eos bit.
-   * A flat array for maximum lookup performace.
-   */
-  index_t mf_lbs[MPLS_FIB_DB_SIZE];
-} mpls_fib_t;
-
 /**
  * @brief Definition of a callback for receiving MPLS interface state change
  * notifications
@@ -67,6 +44,9 @@ typedef struct {
   /**  A pool of all the MPLS FIBs */
   struct fib_table_t_ *fibs;
 
+  /**  A pool of all the MPLS FIBs */
+  struct mpls_fib_t_ *mpls_fibs;
+
   /** A hash table to lookup the mpls_fib by table ID */
   uword *fib_index_by_table_id;
 
index a36a504..f1aef6c 100644 (file)
@@ -26,6 +26,7 @@
 #include <vnet/mpls/mpls_tunnel.h>
 #include <vnet/fib/fib_table.h>
 #include <vnet/fib/fib_api.h>
+#include <vnet/fib/mpls_fib.h>
 
 #include <vnet/vnet_msg_enum.h>
 
@@ -369,6 +370,21 @@ send_mpls_fib_details (vpe_api_main_t * am,
   vl_msg_api_send_shmem (q, (u8 *) & mp);
 }
 
+typedef struct vl_api_mpls_fib_dump_table_walk_ctx_t_
+{
+  fib_node_index_t *lfeis;
+} vl_api_mpls_fib_dump_table_walk_ctx_t;
+
+static int
+vl_api_mpls_fib_dump_table_walk (fib_node_index_t fei, void *arg)
+{
+  vl_api_mpls_fib_dump_table_walk_ctx_t *ctx = arg;
+
+  vec_add1 (ctx->lfeis, fei);
+
+  return (1);
+}
+
 static void
 vl_api_mpls_fib_dump_t_handler (vl_api_mpls_fib_dump_t * mp)
 {
@@ -376,28 +392,30 @@ vl_api_mpls_fib_dump_t_handler (vl_api_mpls_fib_dump_t * mp)
   unix_shared_memory_queue_t *q;
   mpls_main_t *mm = &mpls_main;
   fib_table_t *fib_table;
-  fib_node_index_t lfei, *lfeip, *lfeis = NULL;
-  mpls_label_t key;
+  mpls_fib_t *mpls_fib;
+  fib_node_index_t *lfeip = NULL;
   fib_prefix_t pfx;
   u32 fib_index;
   fib_route_path_encode_t *api_rpaths;
+  vl_api_mpls_fib_dump_table_walk_ctx_t ctx = {
+    .lfeis = NULL,
+  };
 
   q = vl_api_client_index_to_input_queue (mp->client_index);
   if (q == 0)
     return;
 
   /* *INDENT-OFF* */
-  pool_foreach (fib_table, mm->fibs,
+  pool_foreach (mpls_fib, mm->mpls_fibs,
   ({
-    hash_foreach(key, lfei, fib_table->mpls.mf_entries,
-    ({
-  vec_add1(lfeis, lfei);
-    }));
+    mpls_fib_table_walk (mpls_fib,
+                         vl_api_mpls_fib_dump_table_walk,
+                         &ctx);
   }));
   /* *INDENT-ON* */
-  vec_sort_with_function (lfeis, fib_entry_cmp_for_sort);
+  vec_sort_with_function (ctx.lfeis, fib_entry_cmp_for_sort);
 
-  vec_foreach (lfeip, lfeis)
+  vec_foreach (lfeip, ctx.lfeis)
   {
     fib_entry_get_prefix (*lfeip, &pfx);
     fib_index = fib_entry_get_fib_index (*lfeip);
@@ -410,7 +428,7 @@ vl_api_mpls_fib_dump_t_handler (vl_api_mpls_fib_dump_t * mp)
     vec_free (api_rpaths);
   }
 
-  vec_free (lfeis);
+  vec_free (ctx.lfeis);
 }
 
 /*
index 14ccd86..09ae8b8 100644 (file)
@@ -896,9 +896,10 @@ ip4_reset_fib_t_handler (vl_api_reset_fib_t * mp)
   /* *INDENT-OFF* */
   pool_foreach (fib_table, im4->fibs,
   ({
-    fib = &fib_table->v4;
     vnet_sw_interface_t * si;
 
+    fib = pool_elt_at_index (im4->v4_fibs, fib_table->ft_index);
+
     if (fib->table_id != target_fib_id)
       continue;
 
@@ -964,7 +965,8 @@ ip6_reset_fib_t_handler (vl_api_reset_fib_t * mp)
   pool_foreach (fib_table, im6->fibs,
   ({
     vnet_sw_interface_t * si;
-    fib = &(fib_table->v6);
+
+    fib = pool_elt_at_index (im6->v6_fibs, fib_table->ft_index);
 
     if (fib->table_id != target_fib_id)
       continue;
index 1927da0..042d02e 100644 (file)
@@ -17,6 +17,7 @@
 #include <vlib/threads.h>
 #include <vnet/fib/fib_entry.h>
 #include <vnet/fib/fib_table.h>
+#include <vnet/fib/ip4_fib.h>
 #include <vnet/dpo/load_balance.h>
 
 #define STATS_DEBUG 0
@@ -576,6 +577,7 @@ do_ip4_fibs (stats_main_t * sm)
   static ip4_route_t *routes;
   ip4_route_t *r;
   fib_table_t *fib;
+  ip4_fib_t *v4_fib;
   ip_lookup_main_t *lm = &im4->lookup_main;
   static uword *results;
   vl_api_vnet_ip4_fib_counters_t *mp = 0;
@@ -592,6 +594,8 @@ again:
     while ((fib - im4->fibs) < start_at_fib_index)
       continue;
 
+    v4_fib = pool_elt_at_index (im4->v4_fibs, fib->ft_index);
+
     if (mp == 0)
       {
        items_this_message = IP4_FIB_COUNTER_BATCH_SIZE;
@@ -615,9 +619,9 @@ again:
     vec_reset_length (routes);
     vec_reset_length (results);
 
-    for (i = 0; i < ARRAY_LEN (fib->v4.fib_entry_by_dst_address); i++)
+    for (i = 0; i < ARRAY_LEN (v4_fib->fib_entry_by_dst_address); i++)
       {
-       uword *hash = fib->v4.fib_entry_by_dst_address[i];
+       uword *hash = v4_fib->fib_entry_by_dst_address[i];
        hash_pair_t *p;
        ip4_route_t x;