IP6-MFIB: replace the radix tree with bihash (VPP-1526)
[vpp.git] / src / vnet / mfib / ip6_mfib.c
index e98ac42..554a932 100644 (file)
 #include <vnet/mfib/mfib_entry.h>
 #include <vnet/fib/ip6_fib.h>
 
-/**
- * The number of bytes in an address/ask key in the radix tree
- * First byte is the length in bytes.
- */
-#define IP6_MFIB_KEY_LEN 33
-
 /**
  * Key and mask for radix
  */
-typedef struct ip6_mfib_key_t_
-{
-    u8 key[IP6_MFIB_KEY_LEN];
-    u8 mask[IP6_MFIB_KEY_LEN];
-} ip6_mfib_key_t;
-
-/**
- * An object that is inserted into the radix tree.
- * Since it's in the tree and has pointers, it cannot realloc and so cannot
- * come from a vlib pool.
- */
-typedef struct ip6_mfib_node_t_
-{
-    struct radix_node i6mn_nodes[2];
-    ip6_mfib_key_t i6mn_key;
-    index_t i6mn_entry;
-} ip6_mfib_node_t;
+typedef clib_bihash_kv_40_8_t ip6_mfib_key_t;
 
 static const mfib_prefix_t all_zeros = {
     /* (*,*) */
@@ -185,11 +163,6 @@ ip6_create_mfib_with_table_id (u32 table_id,
 
     mfib_table_lock(mfib_table->mft_index, FIB_PROTOCOL_IP6, src);
 
-    mfib_table->v6.rhead =
-        clib_mem_alloc_aligned (sizeof(*mfib_table->v6.rhead),
-                                CLIB_CACHE_LINE_BYTES);
-    rn_inithead0(mfib_table->v6.rhead, 8);
-
     /*
      * add the special entries into the new FIB
      */
@@ -252,7 +225,6 @@ ip6_mfib_table_destroy (ip6_mfib_t *mfib)
     ASSERT(~0 != mfib_table->mft_table_id);
 
     hash_unset (ip6_main.mfib_index_by_table_id, mfib_table->mft_table_id);
-    clib_mem_free(mfib_table->v6.rhead);
     pool_put(ip6_main.mfibs, mfib_table);
 }
 
@@ -325,29 +297,24 @@ ip6_mfib_table_get_index_for_sw_if_index (u32 sw_if_index)
     return (ip6_main.mfib_index_by_sw_if_index[sw_if_index]);
 }
 
-#define IP6_MFIB_MK_KEY(_grp, _src, _key)                           \
-{                                                                   \
-    (_key)->key[0] = 33;                                            \
-    memcpy((_key)->key+1, _grp, 16);                                \
-    memcpy((_key)->key+17, _src, 16);                               \
-}
+#define IPV6_MFIB_GRP_LEN(_len)                 \
+    (_len > 128 ? 128 : _len)
 
-#define IP6_MFIB_MK_KEY_MASK(_grp, _src, _len, _key)                \
-{                                                                   \
-    IP6_MFIB_MK_KEY(_grp, _src, _key);                              \
-                                                                    \
-    (_key)->mask[0] = 33;                                           \
-    if (_len <= 128)                                                \
-    {                                                               \
-        memcpy((_key)->mask+1, &ip6_main.fib_masks[_len], 16);      \
-        clib_memset((_key)->mask+17, 0, 16);                             \
-    }                                                               \
-    else                                                            \
-    {                                                               \
-        ASSERT(_len == 256);                                        \
-        memcpy((_key)->mask+1, &ip6_main.fib_masks[128], 16);       \
-        memcpy((_key)->mask+17, &ip6_main.fib_masks[128], 16);      \
-    }                                                               \
+#define IP6_MFIB_MK_KEY(_mfib, _grp, _src, _len, _key)                  \
+{                                                                       \
+    _key.key[0] = (_grp->as_u64[0] &                                    \
+                   ip6_main.fib_masks[IPV6_MFIB_GRP_LEN(_len)].as_u64[0]); \
+    _key.key[1] = (_grp->as_u64[1] &                                    \
+                   ip6_main.fib_masks[IPV6_MFIB_GRP_LEN(_len)].as_u64[1]); \
+    if (_len == 256) {                                                  \
+        _key.key[2] = _src->as_u64[0];                                  \
+        _key.key[3] = _src->as_u64[1];                                  \
+    } else {                                                            \
+        _key.key[2] = 0;                                                \
+        _key.key[3] = 0;                                                \
+    }                                                                   \
+    _key.key[4] = _mfib->index;                                         \
+    _key.key[4] = (_key.key[4] << 32) | len;                            \
 }
 
 /*
@@ -361,68 +328,105 @@ ip6_mfib_table_lookup_exact_match (const ip6_mfib_t *mfib,
                                    const ip6_address_t *src,
                                    u32 len)
 {
-    ip6_mfib_node_t *i6mn;
-    ip6_mfib_key_t key;
-
-    IP6_MFIB_MK_KEY_MASK(grp, src, len, &key);
+    ip6_mfib_key_t key, value;
+    int rv;
 
-    i6mn = (ip6_mfib_node_t*) rn_lookup(key.key, key.mask,
-                                        (struct radix_node_head *)mfib->rhead);
+    IP6_MFIB_MK_KEY(mfib, grp, src, len, key);
 
-    if (NULL == i6mn)
-    {
-        return (INDEX_INVALID);
-    }
+    rv = clib_bihash_search_inline_2_40_8(&ip6_main.ip6_mtable.ip6_mhash,
+                                          &key, &value);
+    if (rv == 0)
+       return value.value;
 
-    return (i6mn->i6mn_entry);
+    return (FIB_NODE_INDEX_INVALID);
 }
 
 /*
  * ip6_fib_table_lookup
  *
- * Longest prefix match
+ * Longest prefix match for the forwarding plane (no mask given)
  */
 fib_node_index_t
-ip6_mfib_table_lookup (const ip6_mfib_t *mfib,
-                       const ip6_address_t *src,
-                       const ip6_address_t *grp,
-                       u32 len)
+ip6_mfib_table_fwd_lookup (const ip6_mfib_t *mfib,
+                           const ip6_address_t *src,
+                           const ip6_address_t *grp)
 {
-    ip6_mfib_node_t *i6mn;
-    ip6_mfib_key_t key;
+    ip6_mfib_table_instance_t *table;
+    ip6_mfib_key_t key, value;
+    int i, n, len;
+    int rv;
+
+    table = &ip6_main.ip6_mtable;
+    n = vec_len (table->prefix_lengths_in_search_order);
 
-    IP6_MFIB_MK_KEY_MASK(grp, src, len, &key);
+    for (i = 0; i < n; i++)
+    {
+       len = table->prefix_lengths_in_search_order[i];
 
-    i6mn = (ip6_mfib_node_t*) rn_search_m(key.key,
-                                          mfib->rhead->rnh_treetop,
-                                          key.mask);
+       ASSERT(len >= 0 && len <= 256);
+        IP6_MFIB_MK_KEY(mfib, grp, src, len, key);
 
-    ASSERT(NULL != i6mn);
+       rv = clib_bihash_search_inline_2_40_8(&table->ip6_mhash, &key, &value);
+       if (rv == 0)
+           return value.value;
+    }
 
-    return (i6mn->i6mn_entry);
+    return (FIB_NODE_INDEX_INVALID);
 }
 
 /*
  * ip6_fib_table_lookup
  *
- * Longest prefix match no mask
+ * Longest prefix match
  */
 fib_node_index_t
-ip6_mfib_table_lookup2 (const ip6_mfib_t *mfib,
-                        const ip6_address_t *src,
-                        const ip6_address_t *grp)
+ip6_mfib_table_lookup (const ip6_mfib_t *mfib,
+                       const ip6_address_t *src,
+                       const ip6_address_t *grp,
+                       u32 len)
 {
-    ip6_mfib_node_t *i6mn;
-    ip6_mfib_key_t key;
+    ip6_mfib_table_instance_t *table;
+    ip6_mfib_key_t key, value;
+    int i, n, rv;
+
+    table = &ip6_main.ip6_mtable;
+    n = vec_len (table->prefix_lengths_in_search_order);
+
+    /*
+     * start search from a mask length same length or shorter.
+     * we don't want matches longer than the mask passed
+     */
+    i = 0;
+    while (i < n && table->prefix_lengths_in_search_order[i] > len)
+    {
+        i++;
+    }
 
-    IP6_MFIB_MK_KEY(grp, src, &key);
+    for (; i < n; i++)
+    {
+       len = table->prefix_lengths_in_search_order[i];
 
-    i6mn = (ip6_mfib_node_t*) rn_match(key.key,
-                                       (struct radix_node_head *)mfib->rhead); // const cast
+       ASSERT(len >= 0 && len <= 256);
+        IP6_MFIB_MK_KEY(mfib, grp, src, len, key);
 
-    ASSERT(NULL != i6mn);
+       rv = clib_bihash_search_inline_2_40_8(&table->ip6_mhash, &key, &value);
+       if (rv == 0)
+           return value.value;
+    }
 
-    return (i6mn->i6mn_entry);
+    return (FIB_NODE_INDEX_INVALID);
+}
+
+static void
+compute_prefix_lengths_in_search_order (ip6_mfib_table_instance_t *table)
+{
+    int i;
+    vec_reset_length (table->prefix_lengths_in_search_order);
+    /* Note: bitmap reversed so this is in fact a longest prefix match */
+    clib_bitmap_foreach (i, table->non_empty_dst_address_length_bitmap,
+    ({
+       vec_add1(table->prefix_lengths_in_search_order, (256 - i));
+    }));
 }
 
 void
@@ -432,19 +436,21 @@ ip6_mfib_table_entry_insert (ip6_mfib_t *mfib,
                              u32 len,
                              fib_node_index_t mfib_entry_index)
 {
-    ip6_mfib_node_t *i6mn = clib_mem_alloc(sizeof(*i6mn));
+    ip6_mfib_table_instance_t *table;
+    ip6_mfib_key_t key;
 
-    clib_memset(i6mn->i6mn_nodes, 0, sizeof(i6mn->i6mn_nodes));
+    table = &ip6_main.ip6_mtable;
+    IP6_MFIB_MK_KEY(mfib, grp, src, len, key);
+    key.value = mfib_entry_index;
 
-    IP6_MFIB_MK_KEY_MASK(grp, src, len, &i6mn->i6mn_key);
-    i6mn->i6mn_entry = mfib_entry_index;
+    clib_bihash_add_del_40_8(&table->ip6_mhash, &key, 1);
 
-    if (NULL == rn_addroute(i6mn->i6mn_key.key,
-                            i6mn->i6mn_key.mask,
-                            mfib->rhead,
-                            i6mn->i6mn_nodes))
+    if (0 == table->dst_address_length_refcounts[len]++)
     {
-        ASSERT(0);
+        table->non_empty_dst_address_length_bitmap =
+            clib_bitmap_set (table->non_empty_dst_address_length_bitmap, 
+                             256 - len, 1);
+        compute_prefix_lengths_in_search_order (table);
     }
 }
 
@@ -454,14 +460,22 @@ ip6_mfib_table_entry_remove (ip6_mfib_t *mfib,
                              const ip6_address_t *src,
                              u32 len)
 {
-    ip6_mfib_node_t *i6mn;
+    ip6_mfib_table_instance_t *table;
     ip6_mfib_key_t key;
 
-    IP6_MFIB_MK_KEY_MASK(grp, src, len, &key);
+    IP6_MFIB_MK_KEY(mfib, grp, src, len, key);
 
-    i6mn = (ip6_mfib_node_t*) rn_delete(key.key, key.mask, mfib->rhead);
+    table = &ip6_main.ip6_mtable;
+    clib_bihash_add_del_40_8(&table->ip6_mhash, &key, 0);
 
-    clib_mem_free(i6mn);
+    ASSERT (table->dst_address_length_refcounts[len] > 0);
+    if (--table->dst_address_length_refcounts[len] == 0)
+    {
+       table->non_empty_dst_address_length_bitmap =
+            clib_bitmap_set (table->non_empty_dst_address_length_bitmap, 
+                             256 - len, 0);
+       compute_prefix_lengths_in_search_order (table);
+    }
 }
 
 static clib_error_t *
@@ -536,39 +550,45 @@ ip6_mfib_table_show_all (ip6_mfib_t *mfib,
     vec_free(ctx.entries);
 }
 
-typedef struct ip6_mfib_radix_walk_ctx_t_
+/**
+ * @brief Context when walking the IPv6 table. Since all VRFs are in the
+ * same hash table, we need to filter only those we need as we walk
+ */
+typedef struct ip6_mfib_walk_ctx_t_
 {
-    mfib_table_walk_fn_t user_fn;
-    void *user_ctx;
-} ip6_mfib_radix_walk_ctx_t;
+    u32 i6w_mfib_index;
+    mfib_table_walk_fn_t i6w_fn;
+    void *i6w_ctx;
+} ip6_mfib_walk_ctx_t;
 
 static int
-ip6_mfib_table_radix_walk (struct radix_node *rn,
-                           void *arg)
+ip6_mfib_walk_cb (clib_bihash_kv_40_8_t * kvp,
+                 void *arg)
 {
-    ip6_mfib_radix_walk_ctx_t *ctx = arg;
-    ip6_mfib_node_t *i6mn;
-
-    i6mn = (ip6_mfib_node_t*) rn;
-
-    ctx->user_fn(i6mn->i6mn_entry, ctx->user_ctx);
+    ip6_mfib_walk_ctx_t *ctx = arg;
 
-    return (0);
+    if ((kvp->key[4] >> 32) == ctx->i6w_mfib_index)
+    {
+        return (ctx->i6w_fn(kvp->value, ctx->i6w_ctx));
+    }
+    return (FIB_TABLE_WALK_CONTINUE);
 }
 
 void
 ip6_mfib_table_walk (ip6_mfib_t *mfib,
                      mfib_table_walk_fn_t fn,
-                     void *ctx)
+                     void *arg)
 {
-    ip6_mfib_radix_walk_ctx_t rn_ctx = {
-        .user_fn = fn,
-        .user_ctx = ctx,
+    ip6_mfib_walk_ctx_t ctx = {
+        .i6w_mfib_index = mfib->index,
+        .i6w_fn = fn,
+        .i6w_ctx = arg,
     };
 
-    rn_walktree(mfib->rhead,
-                ip6_mfib_table_radix_walk,
-                &rn_ctx);
+    clib_bihash_foreach_key_value_pair_40_8(
+        &ip6_main.ip6_mtable.ip6_mhash,
+        ip6_mfib_walk_cb,
+        &ctx);
 }
 
 static clib_error_t *