Improve Load-Balance MAPs 05/6305/2
authorNeale Ranns <nranns@cisco.com>
Tue, 18 Apr 2017 16:09:40 +0000 (09:09 -0700)
committerDave Barach <openvpp@barachs.net>
Mon, 24 Apr 2017 12:06:48 +0000 (12:06 +0000)
- only build them for popular path-lists (where popular means more than 64 children)
   the reason to have a map is to improve convergence speed for recursive prefixes - if there are only a few this technique is not needed
- only build them when there is at least one path that has recursive constraints, i.e. a path that can 'fail' in a PIC scenario.
- Use the MAPS in the switch path.
- PIC test cases for functionality (not convergence performance)

Change-Id: I70705444c8469d22b07ae34be82cfb6a01358e10
Signed-off-by: Neale Ranns <nranns@cisco.com>
18 files changed:
src/vnet/dpo/load_balance.c
src/vnet/dpo/load_balance_map.h
src/vnet/fib/fib_entry_src.c
src/vnet/fib/fib_entry_src_rr.c
src/vnet/fib/fib_path.c
src/vnet/fib/fib_path.h
src/vnet/fib/fib_path_list.c
src/vnet/fib/fib_path_list.h
src/vnet/fib/fib_table.c
src/vnet/fib/fib_test.c
src/vnet/fib/fib_walk.c
src/vnet/fib/fib_walk.h
src/vnet/ip/ip4_forward.c
src/vnet/ip/ip6_forward.c
src/vnet/ip/ip6_neighbor.c
src/vnet/mpls/mpls_lookup.c
test/test_mpls.py
test/vpp_ip_route.py

index 6b0eda0..af054f1 100644 (file)
@@ -118,7 +118,8 @@ load_balance_format (index_t lbi,
     buckets = load_balance_get_buckets(lb);
 
     s = format(s, "%U: ", format_dpo_type, DPO_LOAD_BALANCE);
-    s = format(s, "[index:%d buckets:%d ", lbi, lb->lb_n_buckets);
+    s = format(s, "[proto:%U ", format_dpo_proto, lb->lb_proto);
+    s = format(s, "index:%d buckets:%d ", lbi, lb->lb_n_buckets);
     s = format(s, "uRPF:%d ", lb->lb_urpf);
     s = format(s, "to:[%Ld:%Ld]", to.packets, to.bytes);
     if (0 != via.packets)
index 454bf4b..237f24b 100644 (file)
@@ -73,6 +73,37 @@ load_balance_map_get (index_t lbmi)
     return (pool_elt_at_index(load_balance_map_pool, lbmi));
 }
 
+static inline u16
+load_balance_map_translate (index_t lbmi,
+                            u16 bucket)
+{
+    load_balance_map_t*lbm;
+
+    lbm = load_balance_map_get(lbmi);
+
+    return (lbm->lbm_buckets[bucket]);
+}
+
+static inline const dpo_id_t *
+load_balance_get_fwd_bucket (const load_balance_t *lb,
+                             u16 bucket)
+{
+    ASSERT(bucket < lb->lb_n_buckets);
+
+    if (INDEX_INVALID != lb->lb_map)
+    {
+        bucket = load_balance_map_translate(lb->lb_map, bucket);
+    }
+
+    if (PREDICT_TRUE(LB_HAS_INLINE_BUCKETS(lb)))
+    {
+       return (&lb->lb_buckets_inline[bucket]);
+    }
+    else
+    {
+       return (&lb->lb_buckets[bucket]);
+    }
+}
 
 extern void load_balance_map_module_init(void);
 
index a700282..fd80497 100644 (file)
@@ -192,7 +192,7 @@ typedef struct fib_entry_src_collect_forwarding_ctx_t_
     const fib_entry_t *fib_entry;
     const fib_entry_src_t *esrc;
     fib_forward_chain_type_t fct;
-    int is_recursive;
+    int n_recursive_constrained;
 } fib_entry_src_collect_forwarding_ctx_t;
 
 /**
@@ -203,10 +203,11 @@ load_balance_flags_t
 fib_entry_calc_lb_flags (fib_entry_src_collect_forwarding_ctx_t *ctx)
 {
     /**
-     * We'll use a LB map is the path-list has recursive paths.
+     * We'll use a LB map if the path-list has multiple recursive paths.
      * recursive paths implies BGP, and hence scale.
      */
-    if (ctx->is_recursive)
+    if (ctx->n_recursive_constrained > 1 &&
+        fib_path_list_is_popular(ctx->esrc->fes_pl))
     {
         return (LOAD_BALANCE_FLAG_USES_MAP);
     }
@@ -282,9 +283,9 @@ fib_entry_src_collect_forwarding (fib_node_index_t pl_index,
         return (!0);
     }
 
-    if (fib_path_is_recursive(path_index))
+    if (fib_path_is_recursive_constrained(path_index))
     {
-        ctx->is_recursive = 1;
+        ctx->n_recursive_constrained += 1;
     }
 
     /*
@@ -397,7 +398,7 @@ fib_entry_src_mk_lb (fib_entry_t *fib_entry,
         .esrc = esrc,
         .fib_entry = fib_entry,
         .next_hops = NULL,
-        .is_recursive = 0,
+        .n_recursive_constrained = 0,
         .fct = fct,
     };
 
@@ -409,7 +410,7 @@ fib_entry_src_mk_lb (fib_entry_t *fib_entry,
     vec_validate(ctx.next_hops, fib_path_list_get_n_paths(esrc->fes_pl));
     vec_reset_length(ctx.next_hops);
 
-    lb_proto = fib_proto_to_dpo(fib_entry->fe_prefix.fp_proto);
+    lb_proto = fib_forw_chain_type_to_dpo_proto(fct);
 
     fib_path_list_walk(esrc->fes_pl,
                        fib_entry_src_collect_forwarding,
index ff15c54..c145aaa 100644 (file)
@@ -103,7 +103,7 @@ fib_entry_src_rr_activate (fib_entry_src_t *src,
        fib_entry_cover_track(cover, fib_entry_get_index(fib_entry));
 
     /*
-     * if the ocver is attached then install an attached-host path
+     * if the cover is attached then install an attached-host path
      * (like an adj-fib). Otherwise inherit the forwarding from the cover
      */
     if (FIB_ENTRY_FLAG_ATTACHED & fib_entry_get_flags_i(cover))
index 70c8790..889317f 100644 (file)
@@ -2025,13 +2025,15 @@ fib_path_append_nh_for_multipath_hash (fib_node_index_t path_index,
 }
 
 int
-fib_path_is_recursive (fib_node_index_t path_index)
+fib_path_is_recursive_constrained (fib_node_index_t path_index)
 {
     fib_path_t *path;
 
     path = fib_path_get(path_index);
 
-    return (FIB_PATH_TYPE_RECURSIVE == path->fp_type);
+    return ((FIB_PATH_TYPE_RECURSIVE == path->fp_type) &&
+            ((path->fp_cfg_flags & FIB_PATH_CFG_FLAG_RESOLVE_ATTACHED) ||
+             (path->fp_cfg_flags & FIB_PATH_CFG_FLAG_RESOLVE_HOST)));
 }
 
 int
index 334be6f..b6bf1e4 100644 (file)
@@ -144,7 +144,7 @@ extern fib_node_index_t fib_path_copy(fib_node_index_t path_index,
                                      fib_node_index_t path_list_index);
 extern int fib_path_resolve(fib_node_index_t path_index);
 extern int fib_path_is_resolved(fib_node_index_t path_index);
-extern int fib_path_is_recursive(fib_node_index_t path_index);
+extern int fib_path_is_recursive_constrained(fib_node_index_t path_index);
 extern int fib_path_is_exclusive(fib_node_index_t path_index);
 extern int fib_path_is_deag(fib_node_index_t path_index);
 extern int fib_path_is_looped(fib_node_index_t path_index);
index ea6565d..64917f9 100644 (file)
 #include <vnet/fib/fib_walk.h>
 #include <vnet/fib/fib_urpf_list.h>
 
+/**
+ * The magic number of child entries that make a path-list popular.
+ * There's a trade-off here between convergnece and forwarding speed.
+ * Popular path-lists generate load-balance maps for the entires that
+ * use them. If the map is present there is a switch path cost to indirect
+ * through the map - this indirection provides the fast convergence - so
+ * without the map convergence is slower.
+ */
+#define FIB_PATH_LIST_POPULAR 64
+
 /**
  * FIB path-list
  * A representation of the list/set of path trough which a prefix is reachable
@@ -454,14 +464,7 @@ fib_path_list_back_walk (fib_node_index_t path_list_index,
     /*
      * propagate the backwalk further
      */
-    if (32 >= fib_node_list_get_size(path_list->fpl_node.fn_children))
-    {
-        /*
-         * only a few children. continue the walk synchronously
-         */
-       fib_walk_sync(FIB_NODE_TYPE_PATH_LIST, path_list_index, ctx);
-    }
-    else
+    if (path_list->fpl_flags & FIB_PATH_LIST_FLAG_POPULAR)
     {
         /*
          * many children. schedule a async walk
@@ -471,6 +474,13 @@ fib_path_list_back_walk (fib_node_index_t path_list_index,
                        FIB_WALK_PRIORITY_LOW,
                        ctx);
     }
+    else
+    {
+        /*
+         * only a few children. continue the walk synchronously
+         */
+       fib_walk_sync(FIB_NODE_TYPE_PATH_LIST, path_list_index, ctx);
+    }
 }
 
 /*
@@ -625,6 +635,16 @@ fib_path_list_is_looped (fib_node_index_t path_list_index)
     return (path_list->fpl_flags & FIB_PATH_LIST_FLAG_LOOPED);
 }
 
+int
+fib_path_list_is_popular (fib_node_index_t path_list_index)
+{
+    fib_path_list_t *path_list;
+
+    path_list = fib_path_list_get(path_list_index);
+
+    return (path_list->fpl_flags & FIB_PATH_LIST_FLAG_POPULAR);
+}
+
 static fib_path_list_flags_t
 fib_path_list_flags_fixup (fib_path_list_flags_t flags)
 {
@@ -807,6 +827,7 @@ fib_path_list_path_add (fib_node_index_t path_list_index,
          */
        if (0 == fib_path_cmp(new_path_index, *orig_path_index))
         {
+            fib_path_destroy(new_path_index);
             return (*orig_path_index);
         }
     }
@@ -1173,10 +1194,38 @@ fib_path_list_child_add (fib_node_index_t path_list_index,
                         fib_node_type_t child_type,
                         fib_node_index_t child_index)
 {
-    return (fib_node_child_add(FIB_NODE_TYPE_PATH_LIST,
-                               path_list_index,
-                               child_type,
-                               child_index));
+    u32 sibling;
+
+    sibling = fib_node_child_add(FIB_NODE_TYPE_PATH_LIST,
+                                 path_list_index,
+                                 child_type,
+                                 child_index);
+
+    if (FIB_PATH_LIST_POPULAR == fib_node_get_n_children(FIB_NODE_TYPE_PATH_LIST,
+                                                         path_list_index))
+    {
+        /*
+         * Set the popular flag on the path-list once we pass the magic
+         * threshold. then walk children to update.
+         * We don't undo this action. The rational being that the number
+         * of entries using this prefix is large enough such that it is a
+         * non-trival amount of effort to converge them. If we get into the
+         * situation where we are adding and removing entries such that we
+         * flip-flop over the threshold, then this non-trivial work is added
+         * to each of those routes adds/deletes - not a situation we want.
+         */
+        fib_node_back_walk_ctx_t ctx = {
+            .fnbw_reason = FIB_NODE_BW_REASON_FLAG_EVALUATE,
+        };
+        fib_path_list_t *path_list;
+
+        path_list = fib_path_list_get(path_list_index);
+        path_list->fpl_flags |= FIB_PATH_LIST_FLAG_POPULAR;
+
+       fib_walk_sync(FIB_NODE_TYPE_PATH_LIST, path_list_index, &ctx);
+    }
+
+    return (sibling);
 }
 
 void
index 9d24621..376cb72 100644 (file)
@@ -38,11 +38,6 @@ typedef enum fib_path_list_attribute_t_ {
      * be searched for each route update.
      */
     FIB_PATH_LIST_ATTRIBUTE_SHARED = FIB_PATH_LIST_ATTRIBUTE_FIRST,
-    /**
-     * Indexed means the path-list keeps a hash table of all paths for
-     * fast lookup. The lookup result is the fib_node_index of the path.
-     */
-    FIB_PATH_LIST_ATTRIBUTE_INDEXED,
     /**
      * explicit drop path-list. Used when the entry source needs to 
      * force a drop, despite the fact the path info is present.
@@ -65,6 +60,12 @@ typedef enum fib_path_list_attribute_t_ {
      * looped path-list. one path looped implies the whole list is
      */
     FIB_PATH_LIST_ATTRIBUTE_LOOPED,
+    /**
+     * a popular path-ist is one that is shared amongst many entries.
+     * Path list become popular as they gain more children, but they
+     * don't become unpopular as they lose them.
+     */
+    FIB_PATH_LIST_ATTRIBUTE_POPULAR,
     /**
      * no uRPF - do not generate unicast RPF list for this path-list
      */
@@ -72,30 +73,30 @@ typedef enum fib_path_list_attribute_t_ {
     /**
      * Marher. Add new flags before this one, and then update it.
      */
-    FIB_PATH_LIST_ATTRIBUTE_LAST = FIB_PATH_LIST_ATTRIBUTE_LOOPED,
+    FIB_PATH_LIST_ATTRIBUTE_LAST = FIB_PATH_LIST_ATTRIBUTE_NO_URPF,
 } fib_path_list_attribute_t;
 
 typedef enum fib_path_list_flags_t_ {
     FIB_PATH_LIST_FLAG_NONE      = 0,
     FIB_PATH_LIST_FLAG_SHARED    = (1 << FIB_PATH_LIST_ATTRIBUTE_SHARED),
-    FIB_PATH_LIST_FLAG_INDEXED    = (1 << FIB_PATH_LIST_ATTRIBUTE_INDEXED),
     FIB_PATH_LIST_FLAG_DROP      = (1 << FIB_PATH_LIST_ATTRIBUTE_DROP),
     FIB_PATH_LIST_FLAG_LOCAL     = (1 << FIB_PATH_LIST_ATTRIBUTE_LOCAL),
     FIB_PATH_LIST_FLAG_EXCLUSIVE = (1 << FIB_PATH_LIST_ATTRIBUTE_EXCLUSIVE),
     FIB_PATH_LIST_FLAG_RESOLVED  = (1 << FIB_PATH_LIST_ATTRIBUTE_RESOLVED),
     FIB_PATH_LIST_FLAG_LOOPED    = (1 << FIB_PATH_LIST_ATTRIBUTE_LOOPED),
+    FIB_PATH_LIST_FLAG_POPULAR   = (1 << FIB_PATH_LIST_ATTRIBUTE_POPULAR),
     FIB_PATH_LIST_FLAG_NO_URPF   = (1 << FIB_PATH_LIST_ATTRIBUTE_NO_URPF),
 } fib_path_list_flags_t;
 
 #define FIB_PATH_LIST_ATTRIBUTES {                      \
     [FIB_PATH_LIST_ATTRIBUTE_SHARED]    = "shared",     \
-    [FIB_PATH_LIST_ATTRIBUTE_INDEXED]    = "indexed",   \
     [FIB_PATH_LIST_ATTRIBUTE_RESOLVED]  = "resolved",   \
     [FIB_PATH_LIST_ATTRIBUTE_DROP]      = "drop",       \
     [FIB_PATH_LIST_ATTRIBUTE_EXCLUSIVE] = "exclusive",   \
-    [FIB_PATH_LIST_ATTRIBUTE_LOCAL]     = "local",      \
-    [FIB_PATH_LIST_ATTRIBUTE_LOOPED]     = "looped",    \
-    [FIB_PATH_LIST_ATTRIBUTE_NO_URPF]     = "no-uRPF",  \
+    [FIB_PATH_LIST_ATTRIBUTE_LOCAL]     = "local",       \
+    [FIB_PATH_LIST_ATTRIBUTE_LOOPED]    = "looped",     \
+    [FIB_PATH_LIST_ATTRIBUTE_POPULAR]   = "popular",    \
+    [FIB_PATH_LIST_ATTRIBUTE_NO_URPF]   = "no-uRPF",    \
 }
 
 #define FOR_EACH_PATH_LIST_ATTRIBUTE(_item)            \
@@ -148,6 +149,7 @@ extern int fib_path_list_recursive_loop_detect(fib_node_index_t path_list_index,
                                               fib_node_index_t **entry_indicies);
 extern u32 fib_path_list_get_resolving_interface(fib_node_index_t path_list_index);
 extern int fib_path_list_is_looped(fib_node_index_t path_list_index);
+extern int fib_path_list_is_popular(fib_node_index_t path_list_index);
 extern fib_protocol_t fib_path_list_get_proto(fib_node_index_t path_list_index);
 extern u8 * fib_path_list_format(fib_node_index_t pl_index,
                                 u8 * s);
index 0938ce9..ff42804 100644 (file)
@@ -608,11 +608,19 @@ fib_table_entry_path_remove2 (u32 fib_index,
        fib_entry_src_flag_t src_flag;
         int was_sourced;
 
-       /*
+        /*
+         * if it's not sourced, then there's nowt to remove
+         */
+        was_sourced = fib_entry_is_sourced(fib_entry_index, source);
+        if (!was_sourced)
+        {
+            return;
+        }
+
+        /*
         * don't nobody go nowhere
         */
        fib_entry_lock(fib_entry_index);
-        was_sourced = fib_entry_is_sourced(fib_entry_index, source);
 
         for (ii = 0; ii < vec_len(rpath); ii++)
         {
index cbb5640..d3bdfa3 100644 (file)
@@ -729,6 +729,9 @@ fib_test_v4 (void)
        .ip4.as_u32 = clib_host_to_net_u32(0x0a0a0a02),
     };
 
+    FIB_TEST((0 == pool_elts(load_balance_map_pool)), "LB-map pool size is %d",
+            pool_elts(load_balance_map_pool));
+
     tm = &test_main;
 
     /* record the nubmer of load-balances in use before we start */
@@ -3090,6 +3093,43 @@ fib_test_v4 (void)
                             NULL,
                             FIB_ROUTE_PATH_RESOLVE_VIA_HOST);
 
+    /*
+     * add a bunch load more entries using this path combo so that we get
+     * an LB-map created.
+     */
+#define N_P 128
+    fib_prefix_t bgp_78s[N_P];
+    for (ii = 0; ii < N_P; ii++)
+    {
+        bgp_78s[ii].fp_len = 32;
+        bgp_78s[ii].fp_proto = FIB_PROTOCOL_IP4;
+        bgp_78s[ii].fp_addr.ip4.as_u32 = clib_host_to_net_u32(0x4e000000+ii);
+
+        
+        fib_table_entry_path_add(fib_index,
+                                 &bgp_78s[ii],
+                                 FIB_SOURCE_API,
+                                 FIB_ENTRY_FLAG_NONE,
+                                 FIB_PROTOCOL_IP4,
+                                 &pfx_1_1_1_3_s_32.fp_addr,
+                                 ~0,
+                                 fib_index,
+                                 1,
+                                 NULL,
+                                 FIB_ROUTE_PATH_RESOLVE_VIA_HOST);
+        fib_table_entry_path_add(fib_index,
+                                 &bgp_78s[ii],
+                                 FIB_SOURCE_API,
+                                 FIB_ENTRY_FLAG_NONE,
+                                 FIB_PROTOCOL_IP4,
+                                 &nh_1_1_1_1,
+                                 ~0,
+                                 fib_index,
+                                 1,
+                                 NULL,
+                                 FIB_ROUTE_PATH_RESOLVE_VIA_HOST);
+    }
+
     fei = fib_table_lookup_exact_match(fib_index, &bgp_200_pfx);
     dpo = fib_entry_contribute_ip_forwarding(fei);
 
@@ -3138,6 +3178,9 @@ fib_test_v4 (void)
                                1,
                                FIB_ROUTE_PATH_FLAG_NONE);
 
+    /* suspend so the update walk kicks int */
+    vlib_process_suspend(vlib_get_main(), 1e-5);
+
     fei = fib_table_lookup_exact_match(fib_index, &bgp_200_pfx);
     FIB_TEST(!dpo_cmp(dpo, fib_entry_contribute_ip_forwarding(fei)),
             "post PIC 200.200.200.200/32 was inplace modified");
@@ -3175,6 +3218,9 @@ fib_test_v4 (void)
                             NULL,
                             FIB_ROUTE_PATH_FLAG_NONE);
 
+    /* suspend so the update walk kicks in */
+    vlib_process_suspend(vlib_get_main(), 1e-5);
+
     FIB_TEST(!dpo_cmp(dpo2, load_balance_get_bucket_i(lb, 0)),
             "post PIC recovery adj for 200.200.200.200/32 is recursive "
             "via adj for 1.1.1.1");
@@ -3201,6 +3247,20 @@ fib_test_v4 (void)
                             1,
                             NULL,
                             FIB_ROUTE_PATH_RESOLVE_VIA_HOST);
+    for (ii = 0; ii < N_P; ii++)
+    {
+        fib_table_entry_path_add(fib_index,
+                                 &bgp_78s[ii],
+                            FIB_SOURCE_API,
+                            FIB_ENTRY_FLAG_NONE,
+                            FIB_PROTOCOL_IP4,
+                            &pfx_1_1_1_2_s_32.fp_addr,
+                            ~0,
+                            fib_index,
+                            1,
+                            NULL,
+                            FIB_ROUTE_PATH_RESOLVE_VIA_HOST);
+    }
 
     fei = fib_table_lookup_exact_match(fib_index, &bgp_200_pfx);
     dpo = fib_entry_contribute_ip_forwarding(fei);
@@ -3233,6 +3293,8 @@ fib_test_v4 (void)
                                ~0,
                                1,
                                FIB_ROUTE_PATH_FLAG_NONE);
+    /* suspend so the update walk kicks int */
+    vlib_process_suspend(vlib_get_main(), 1e-5);
 
     fei = fib_table_lookup_exact_match(fib_index, &bgp_200_pfx);
     dpo = fib_entry_contribute_ip_forwarding(fei);
@@ -3270,6 +3332,16 @@ fib_test_v4 (void)
                              NULL,
                              FIB_ROUTE_PATH_FLAG_NONE);
 
+    for (ii = 0; ii < N_P; ii++)
+    {
+        fib_table_entry_delete(fib_index,
+                               &bgp_78s[ii],
+                               FIB_SOURCE_API);
+        FIB_TEST((FIB_NODE_INDEX_INVALID ==
+                  fib_table_lookup_exact_match(fib_index, &bgp_78s[ii])),
+                 "%U removed",
+                 format_fib_prefix, &bgp_78s[ii]);
+    }
     fib_table_entry_path_remove(fib_index,
                                 &bgp_200_pfx,
                                 FIB_SOURCE_API,
@@ -3303,6 +3375,8 @@ fib_test_v4 (void)
     fib_table_entry_delete(fib_index,
                           &pfx_1_1_1_0_s_28,
                           FIB_SOURCE_API);
+    /* suspend so the update walk kicks int */
+    vlib_process_suspend(vlib_get_main(), 1e-5);
     FIB_TEST((FIB_NODE_INDEX_INVALID ==
              fib_table_lookup_exact_match(fib_index, &pfx_1_1_1_0_s_28)),
             "1.1.1.1/28 removed");
@@ -3821,7 +3895,7 @@ fib_test_v4 (void)
     /*
      * -2 entries and -2 non-shared path-list
      */
-    FIB_TEST((0  == fib_path_list_db_size()),   "path list DB population:%d",
+    FIB_TEST((0 == fib_path_list_db_size()),   "path list DB population:%d",
             fib_path_list_db_size());
     FIB_TEST((PNBR == fib_path_list_pool_size()), "path list pool size is %d",
             fib_path_list_pool_size());
@@ -3855,7 +3929,7 @@ fib_test_v4 (void)
     FIB_TEST((ENBR-5 == pool_elts(fib_urpf_list_pool)), "uRPF pool size is %d",
             pool_elts(fib_urpf_list_pool));
     FIB_TEST((0 == pool_elts(load_balance_map_pool)), "LB-map pool size is %d",
-             pool_elts(load_balance_map_pool));
+            pool_elts(load_balance_map_pool));
     FIB_TEST((lb_count == pool_elts(load_balance_pool)), "LB pool size is %d",
              pool_elts(load_balance_pool));
 
@@ -5900,6 +5974,12 @@ fib_test_label (void)
            .adj = DPO_PROTO_IP4,
        },
     };
+    fib_test_lb_bucket_t mpls_bucket_drop = {
+       .type = FT_LB_SPECIAL,
+       .special = {
+           .adj = DPO_PROTO_MPLS,
+       },
+    };
 
     fib_table_entry_path_remove(fib_index,
                                &pfx_1_1_1_1_s_32,
@@ -5932,9 +6012,9 @@ fib_test_label (void)
                           &pfx_24001_neos);
     FIB_TEST(fib_test_validate_entry(fei, 
                                     FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS,
-                                     1,
-                                     &bucket_drop),
-            "24001/eos LB 1 buckets via: DROP");
+                                     1,
+                                     &mpls_bucket_drop),
+            "24001/neos LB 1 buckets via: DROP");
 
     /*
      * add back the path with the valid label
@@ -7707,6 +7787,12 @@ lfib_test (void)
      * A recursive via a label that does not exist
      */
     fib_test_lb_bucket_t bucket_drop = {
+       .type = FT_LB_SPECIAL,
+       .special = {
+           .adj = DPO_PROTO_IP4,
+       },
+    };
+    fib_test_lb_bucket_t mpls_bucket_drop = {
        .type = FT_LB_SPECIAL,
        .special = {
            .adj = DPO_PROTO_MPLS,
@@ -7735,7 +7821,12 @@ lfib_test (void)
                                     FIB_FORW_CHAIN_TYPE_UNICAST_IP4,
                                     1,
                                     &bucket_drop),
-            "2.2.2.4/32 LB 1 buckets via: ip4-DROP");
+            "1200/neos LB 1 buckets via: ip4-DROP");
+    FIB_TEST(fib_test_validate_entry(lfe,
+                                    FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS,
+                                    1,
+                                    &mpls_bucket_drop),
+            "1200/neos LB 1 buckets via: mpls-DROP");
 
     fib_table_entry_delete(fib_index, &pfx_2_2_2_4_s_32, FIB_SOURCE_API);
 
@@ -7940,18 +8031,19 @@ fib_test (vlib_main_t * vm,
     }
     else
     {
-        /*
-         * These walk UT aren't run as part of the full suite, since the
-         * fib-walk process must be disabled in order for the tests to work
-         *
-         * fib_test_walk();
-         */
        res += fib_test_v4();
        res += fib_test_v6();
        res += fib_test_ae();
        res += fib_test_bfd();
        res += fib_test_label();
        res += lfib_test();
+
+        /*
+         * fib-walk process must be disabled in order for the walk tests to work
+         */
+        fib_walk_process_disable();
+        res += fib_test_walk();
+        fib_walk_process_enable();
     }
 
     if (res)
index 938f7b8..c570476 100644 (file)
@@ -95,11 +95,6 @@ typedef struct fib_walk_t_
  */
 static fib_walk_t *fib_walk_pool;
 
-/**
- * @brief There's only one event type sent to the walk process
- */
-#define FIB_WALK_EVENT 0
-
 /**
  * Statistics maintained per-walk queue
  */
@@ -240,10 +235,13 @@ fib_walk_queue_get_front (fib_walk_priority_t prio)
 }
 
 static void
-fib_walk_destroy (fib_walk_t *fwalk)
+fib_walk_destroy (index_t fwi)
 {
+    fib_walk_t *fwalk;
     u32 bucket, ii;
 
+    fwalk = fib_walk_get(fwi);
+
     if (FIB_NODE_INDEX_INVALID != fwalk->fw_prio_sibling)
     {
        fib_node_list_elt_remove(fwalk->fw_prio_sibling);
@@ -252,6 +250,12 @@ fib_walk_destroy (fib_walk_t *fwalk)
                          fwalk->fw_parent.fnp_index,
                          fwalk->fw_dep_sibling);
 
+    /*
+     * refetch the walk object. More walks could have been spawned as a result
+     * of releasing the lock on the parent.
+     */
+    fwalk = fib_walk_get(fwi);
+
     /*
      * add the stats to the continuous histogram collection.
      */
@@ -466,8 +470,7 @@ fib_walk_process_queues (vlib_main_t * vm,
             */
            if (FIB_WALK_ADVANCE_MORE != rc)
            {
-               fwalk = fib_walk_get(fwi);
-               fib_walk_destroy(fwalk);
+                fib_walk_destroy(fwi);
                fib_walk_queues.fwqs_queues[prio].fwq_stats[FIB_WALK_COMPLETED]++;
            }
            else
@@ -510,6 +513,16 @@ that_will_do_for_now:
     return (fib_walk_sleep_duration[sleep]);
 }
 
+/**
+ * Events sent to the FIB walk process
+ */
+typedef enum fib_walk_process_event_t_
+{
+    FIB_WALK_PROCESS_EVENT_DATA,
+    FIB_WALK_PROCESS_EVENT_ENABLE,
+    FIB_WALK_PROCESS_EVENT_DISABLE,
+} fib_walk_process_event;
+
 /**
  * @brief The 'fib-walk' process's main loop.
  */
@@ -518,22 +531,47 @@ fib_walk_process (vlib_main_t * vm,
                  vlib_node_runtime_t * node,
                  vlib_frame_t * f)
 {
+    uword event_type, *event_data = 0;
     f64 sleep_time;
+    int enabled;
 
+    enabled = 1;
     sleep_time = fib_walk_sleep_duration[FIB_WALK_SHORT_SLEEP];
 
     while (1)
     {
-       vlib_process_wait_for_event_or_clock(vm, sleep_time);
+        /*
+         * the feature to disable/enable this walk process is only
+         * for testing purposes
+         */
+        if (enabled)
+        {
+            vlib_process_wait_for_event_or_clock(vm, sleep_time);
+        }
+        else
+        {
+            vlib_process_wait_for_event(vm);
+        }
 
-       /*
-        * there may be lots of event queued between the processes,
-        * but the walks we want to schedule are in the priority queues,
-        * so we ignore the process events.
-        */
-       vlib_process_get_events(vm, NULL);
+        event_type = vlib_process_get_events(vm, &event_data);
+        vec_reset_length(event_data);
+
+        switch (event_type)
+       {
+       case FIB_WALK_PROCESS_EVENT_ENABLE:
+            enabled = 1;
+            break;
+       case FIB_WALK_PROCESS_EVENT_DISABLE:
+            enabled = 0;
+            break;
+       default:
+            break;
+       }
 
-       sleep_time = fib_walk_process_queues(vm, quota);
+        if (enabled)
+        {
+            sleep_time = fib_walk_process_queues(vm, quota);
+        }
     }
 
     /*
@@ -610,8 +648,8 @@ fib_walk_prio_queue_enquue (fib_walk_priority_t prio,
      */
     vlib_process_signal_event(vlib_get_main(),
                              fib_walk_process_node.index,
-                             FIB_WALK_EVENT,
-                             FIB_WALK_EVENT);
+                             FIB_WALK_PROCESS_EVENT_DATA,
+                             0);
 
     return (sibling);
 }
@@ -742,7 +780,7 @@ fib_walk_sync (fib_node_type_t parent_type,
            ASSERT(FIB_NODE_INDEX_INVALID != merged_walk.fnp_index);
            ASSERT(FIB_NODE_TYPE_WALK == merged_walk.fnp_type);
 
-           fib_walk_destroy(fwalk);
+           fib_walk_destroy(fwi);
 
            fwi = merged_walk.fnp_index;
            fwalk = fib_walk_get(fwi);
@@ -774,7 +812,7 @@ fib_walk_sync (fib_node_type_t parent_type,
 
     if (NULL != fwalk)
     {
-       fib_walk_destroy(fwalk);
+       fib_walk_destroy(fwi);
     }
 }
 
@@ -1106,3 +1144,47 @@ VLIB_CLI_COMMAND (fib_walk_clear_command, static) = {
     .short_help = "clear fib walk",
     .function = fib_walk_clear,
 };
+
+void
+fib_walk_process_enable (void)
+{
+    vlib_process_signal_event(vlib_get_main(),
+                              fib_walk_process_node.index,
+                              FIB_WALK_PROCESS_EVENT_ENABLE,
+                              0);
+}
+
+void
+fib_walk_process_disable (void)
+{
+    vlib_process_signal_event(vlib_get_main(),
+                              fib_walk_process_node.index,
+                              FIB_WALK_PROCESS_EVENT_DISABLE,
+                              0);
+}
+
+static clib_error_t *
+fib_walk_process_enable_disable (vlib_main_t * vm,
+                                 unformat_input_t * input,
+                                 vlib_cli_command_t * cmd)
+{
+    if (unformat (input, "enable"))
+    {
+        fib_walk_process_enable();
+    }
+    else if (unformat (input, "disable"))
+    {
+        fib_walk_process_disable();
+    }
+    else
+    {
+        return clib_error_return(0, "choose enable or disable");
+    }
+    return (NULL);
+}
+
+VLIB_CLI_COMMAND (fib_walk_process_command, static) = {
+    .path = "test fib-walk-process",
+    .short_help = "test fib-walk-process [enable|disable]",
+    .function = fib_walk_process_enable_disable,
+};
index 7413d8a..fdf2f10 100644 (file)
@@ -54,5 +54,8 @@ extern void fib_walk_sync(fib_node_type_t parent_type,
 
 extern u8* format_fib_walk_priority(u8 *s, va_list ap);
 
+extern void fib_walk_process_enable(void);
+extern void fib_walk_process_disable(void);
+
 #endif
 
index 0f56203..697d216 100644 (file)
@@ -49,6 +49,7 @@
 #include <vnet/fib/fib_urpf_list.h>    /* for FIB uRPF check */
 #include <vnet/fib/ip4_fib.h>
 #include <vnet/dpo/load_balance.h>
+#include <vnet/dpo/load_balance_map.h>
 #include <vnet/dpo/classify_dpo.h>
 #include <vnet/mfib/mfib_table.h>      /* for mFIB table and entry creation */
 
@@ -89,7 +90,6 @@ ip4_lookup_inline (vlib_main_t * vm,
        {
          vlib_buffer_t *p0, *p1, *p2, *p3;
          ip4_header_t *ip0, *ip1, *ip2, *ip3;
-         __attribute__ ((unused)) tcp_header_t *tcp0, *tcp1, *tcp2, *tcp3;
          ip_lookup_next_t next0, next1, next2, next3;
          const load_balance_t *lb0, *lb1, *lb2, *lb3;
          ip4_fib_mtrie_t *mtrie0, *mtrie1, *mtrie2, *mtrie3;
@@ -188,11 +188,6 @@ ip4_lookup_inline (vlib_main_t * vm,
              leaf3 = ip4_fib_mtrie_lookup_step_one (mtrie3, dst_addr3);
            }
 
-         tcp0 = (void *) (ip0 + 1);
-         tcp1 = (void *) (ip1 + 1);
-         tcp2 = (void *) (ip2 + 1);
-         tcp3 = (void *) (ip3 + 1);
-
          if (!lookup_for_responses_to_locally_received_packets)
            {
              leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
@@ -230,6 +225,15 @@ ip4_lookup_inline (vlib_main_t * vm,
          lb2 = load_balance_get (lb_index2);
          lb3 = load_balance_get (lb_index3);
 
+         ASSERT (lb0->lb_n_buckets > 0);
+         ASSERT (is_pow2 (lb0->lb_n_buckets));
+         ASSERT (lb1->lb_n_buckets > 0);
+         ASSERT (is_pow2 (lb1->lb_n_buckets));
+         ASSERT (lb2->lb_n_buckets > 0);
+         ASSERT (is_pow2 (lb2->lb_n_buckets));
+         ASSERT (lb3->lb_n_buckets > 0);
+         ASSERT (is_pow2 (lb3->lb_n_buckets));
+
          /* Use flow hash to compute multipath adjacency. */
          hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
          hash_c1 = vnet_buffer (p1)->ip.flow_hash = 0;
@@ -240,47 +244,57 @@ ip4_lookup_inline (vlib_main_t * vm,
              flow_hash_config0 = lb0->lb_hash_config;
              hash_c0 = vnet_buffer (p0)->ip.flow_hash =
                ip4_compute_flow_hash (ip0, flow_hash_config0);
+             dpo0 =
+               load_balance_get_fwd_bucket (lb0,
+                                            (hash_c0 &
+                                             (lb0->lb_n_buckets_minus_1)));
+           }
+         else
+           {
+             dpo0 = load_balance_get_bucket_i (lb0, 0);
            }
          if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
            {
              flow_hash_config1 = lb1->lb_hash_config;
              hash_c1 = vnet_buffer (p1)->ip.flow_hash =
                ip4_compute_flow_hash (ip1, flow_hash_config1);
+             dpo1 =
+               load_balance_get_fwd_bucket (lb1,
+                                            (hash_c1 &
+                                             (lb1->lb_n_buckets_minus_1)));
+           }
+         else
+           {
+             dpo1 = load_balance_get_bucket_i (lb1, 0);
            }
          if (PREDICT_FALSE (lb2->lb_n_buckets > 1))
            {
              flow_hash_config2 = lb2->lb_hash_config;
              hash_c2 = vnet_buffer (p2)->ip.flow_hash =
                ip4_compute_flow_hash (ip2, flow_hash_config2);
+             dpo2 =
+               load_balance_get_fwd_bucket (lb2,
+                                            (hash_c2 &
+                                             (lb2->lb_n_buckets_minus_1)));
+           }
+         else
+           {
+             dpo2 = load_balance_get_bucket_i (lb2, 0);
            }
          if (PREDICT_FALSE (lb3->lb_n_buckets > 1))
            {
              flow_hash_config3 = lb3->lb_hash_config;
              hash_c3 = vnet_buffer (p3)->ip.flow_hash =
                ip4_compute_flow_hash (ip3, flow_hash_config3);
+             dpo3 =
+               load_balance_get_fwd_bucket (lb3,
+                                            (hash_c3 &
+                                             (lb3->lb_n_buckets_minus_1)));
+           }
+         else
+           {
+             dpo3 = load_balance_get_bucket_i (lb3, 0);
            }
-
-         ASSERT (lb0->lb_n_buckets > 0);
-         ASSERT (is_pow2 (lb0->lb_n_buckets));
-         ASSERT (lb1->lb_n_buckets > 0);
-         ASSERT (is_pow2 (lb1->lb_n_buckets));
-         ASSERT (lb2->lb_n_buckets > 0);
-         ASSERT (is_pow2 (lb2->lb_n_buckets));
-         ASSERT (lb3->lb_n_buckets > 0);
-         ASSERT (is_pow2 (lb3->lb_n_buckets));
-
-         dpo0 = load_balance_get_bucket_i (lb0,
-                                           (hash_c0 &
-                                            (lb0->lb_n_buckets_minus_1)));
-         dpo1 = load_balance_get_bucket_i (lb1,
-                                           (hash_c1 &
-                                            (lb1->lb_n_buckets_minus_1)));
-         dpo2 = load_balance_get_bucket_i (lb2,
-                                           (hash_c2 &
-                                            (lb2->lb_n_buckets_minus_1)));
-         dpo3 = load_balance_get_bucket_i (lb3,
-                                           (hash_c3 &
-                                            (lb3->lb_n_buckets_minus_1)));
 
          next0 = dpo0->dpoi_next_node;
          vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
@@ -293,20 +307,16 @@ ip4_lookup_inline (vlib_main_t * vm,
 
          vlib_increment_combined_counter
            (cm, thread_index, lb_index0, 1,
-            vlib_buffer_length_in_chain (vm, p0)
-            + sizeof (ethernet_header_t));
+            vlib_buffer_length_in_chain (vm, p0));
          vlib_increment_combined_counter
            (cm, thread_index, lb_index1, 1,
-            vlib_buffer_length_in_chain (vm, p1)
-            + sizeof (ethernet_header_t));
+            vlib_buffer_length_in_chain (vm, p1));
          vlib_increment_combined_counter
            (cm, thread_index, lb_index2, 1,
-            vlib_buffer_length_in_chain (vm, p2)
-            + sizeof (ethernet_header_t));
+            vlib_buffer_length_in_chain (vm, p2));
          vlib_increment_combined_counter
            (cm, thread_index, lb_index3, 1,
-            vlib_buffer_length_in_chain (vm, p3)
-            + sizeof (ethernet_header_t));
+            vlib_buffer_length_in_chain (vm, p3));
 
          vlib_validate_buffer_enqueue_x4 (vm, node, next,
                                           to_next, n_left_to_next,
@@ -318,7 +328,6 @@ ip4_lookup_inline (vlib_main_t * vm,
        {
          vlib_buffer_t *p0;
          ip4_header_t *ip0;
-         __attribute__ ((unused)) tcp_header_t *tcp0;
          ip_lookup_next_t next0;
          const load_balance_t *lb0;
          ip4_fib_mtrie_t *mtrie0;
@@ -352,8 +361,6 @@ ip4_lookup_inline (vlib_main_t * vm,
              leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, dst_addr0);
            }
 
-         tcp0 = (void *) (ip0 + 1);
-
          if (!lookup_for_responses_to_locally_received_packets)
            leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
 
@@ -371,6 +378,9 @@ ip4_lookup_inline (vlib_main_t * vm,
          ASSERT (lbi0);
          lb0 = load_balance_get (lbi0);
 
+         ASSERT (lb0->lb_n_buckets > 0);
+         ASSERT (is_pow2 (lb0->lb_n_buckets));
+
          /* Use flow hash to compute multipath adjacency. */
          hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
          if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
@@ -379,20 +389,22 @@ ip4_lookup_inline (vlib_main_t * vm,
 
              hash_c0 = vnet_buffer (p0)->ip.flow_hash =
                ip4_compute_flow_hash (ip0, flow_hash_config0);
+             dpo0 =
+               load_balance_get_fwd_bucket (lb0,
+                                            (hash_c0 &
+                                             (lb0->lb_n_buckets_minus_1)));
+           }
+         else
+           {
+             dpo0 = load_balance_get_bucket_i (lb0, 0);
            }
-
-         ASSERT (lb0->lb_n_buckets > 0);
-         ASSERT (is_pow2 (lb0->lb_n_buckets));
-
-         dpo0 = load_balance_get_bucket_i (lb0,
-                                           (hash_c0 &
-                                            (lb0->lb_n_buckets_minus_1)));
 
          next0 = dpo0->dpoi_next_node;
          vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
 
-         vlib_increment_combined_counter
-           (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
+         vlib_increment_combined_counter (cm, thread_index, lbi0, 1,
+                                          vlib_buffer_length_in_chain (vm,
+                                                                       p0));
 
          from += 1;
          to_next += 1;
@@ -555,6 +567,12 @@ ip4_load_balance (vlib_main_t * vm,
                  hc0 = vnet_buffer (p0)->ip.flow_hash =
                    ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
                }
+             dpo0 = load_balance_get_fwd_bucket
+               (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
+           }
+         else
+           {
+             dpo0 = load_balance_get_bucket_i (lb0, 0);
            }
          if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
            {
@@ -568,14 +586,13 @@ ip4_load_balance (vlib_main_t * vm,
                  hc1 = vnet_buffer (p1)->ip.flow_hash =
                    ip4_compute_flow_hash (ip1, lb1->lb_hash_config);
                }
+             dpo1 = load_balance_get_fwd_bucket
+               (lb1, (hc1 & (lb1->lb_n_buckets_minus_1)));
+           }
+         else
+           {
+             dpo1 = load_balance_get_bucket_i (lb1, 0);
            }
-
-         dpo0 =
-           load_balance_get_bucket_i (lb0,
-                                      hc0 & (lb0->lb_n_buckets_minus_1));
-         dpo1 =
-           load_balance_get_bucket_i (lb1,
-                                      hc1 & (lb1->lb_n_buckets_minus_1));
 
          next0 = dpo0->dpoi_next_node;
          next1 = dpo1->dpoi_next_node;
@@ -629,11 +646,13 @@ ip4_load_balance (vlib_main_t * vm,
                  hc0 = vnet_buffer (p0)->ip.flow_hash =
                    ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
                }
+             dpo0 = load_balance_get_fwd_bucket
+               (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
+           }
+         else
+           {
+             dpo0 = load_balance_get_bucket_i (lb0, 0);
            }
-
-         dpo0 =
-           load_balance_get_bucket_i (lb0,
-                                      hc0 & (lb0->lb_n_buckets_minus_1));
 
          next0 = dpo0->dpoi_next_node;
          vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
index 98bfd4d..3bc07d0 100644 (file)
@@ -45,7 +45,7 @@
 #include <vnet/fib/fib_urpf_list.h>    /* for FIB uRPF check */
 #include <vnet/fib/ip6_fib.h>
 #include <vnet/mfib/ip6_mfib.h>
-#include <vnet/dpo/load_balance.h>
+#include <vnet/dpo/load_balance_map.h>
 #include <vnet/dpo/classify_dpo.h>
 
 #include <vppinfra/bihash_template.c>
@@ -138,6 +138,10 @@ ip6_lookup_inline (vlib_main_t * vm,
 
          lb0 = load_balance_get (lbi0);
          lb1 = load_balance_get (lbi1);
+         ASSERT (lb0->lb_n_buckets > 0);
+         ASSERT (lb1->lb_n_buckets > 0);
+         ASSERT (is_pow2 (lb0->lb_n_buckets));
+         ASSERT (is_pow2 (lb1->lb_n_buckets));
 
          vnet_buffer (p0)->ip.flow_hash = vnet_buffer (p1)->ip.flow_hash = 0;
 
@@ -146,25 +150,29 @@ ip6_lookup_inline (vlib_main_t * vm,
              flow_hash_config0 = lb0->lb_hash_config;
              vnet_buffer (p0)->ip.flow_hash =
                ip6_compute_flow_hash (ip0, flow_hash_config0);
+             dpo0 =
+               load_balance_get_fwd_bucket (lb0,
+                                            (vnet_buffer (p0)->ip.flow_hash &
+                                             (lb0->lb_n_buckets_minus_1)));
+           }
+         else
+           {
+             dpo0 = load_balance_get_bucket_i (lb0, 0);
            }
          if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
            {
              flow_hash_config1 = lb1->lb_hash_config;
              vnet_buffer (p1)->ip.flow_hash =
                ip6_compute_flow_hash (ip1, flow_hash_config1);
+             dpo1 =
+               load_balance_get_fwd_bucket (lb1,
+                                            (vnet_buffer (p1)->ip.flow_hash &
+                                             (lb1->lb_n_buckets_minus_1)));
+           }
+         else
+           {
+             dpo1 = load_balance_get_bucket_i (lb1, 0);
            }
-
-         ASSERT (lb0->lb_n_buckets > 0);
-         ASSERT (lb1->lb_n_buckets > 0);
-         ASSERT (is_pow2 (lb0->lb_n_buckets));
-         ASSERT (is_pow2 (lb1->lb_n_buckets));
-         dpo0 = load_balance_get_bucket_i (lb0,
-                                           (vnet_buffer (p0)->ip.flow_hash &
-                                            lb0->lb_n_buckets_minus_1));
-         dpo1 = load_balance_get_bucket_i (lb1,
-                                           (vnet_buffer (p1)->ip.flow_hash &
-                                            lb1->lb_n_buckets_minus_1));
-
          next0 = dpo0->dpoi_next_node;
          next1 = dpo1->dpoi_next_node;
 
@@ -266,16 +274,24 @@ ip6_lookup_inline (vlib_main_t * vm,
          lb0 = load_balance_get (lbi0);
 
          vnet_buffer (p0)->ip.flow_hash = 0;
+         ASSERT (lb0->lb_n_buckets > 0);
+         ASSERT (is_pow2 (lb0->lb_n_buckets));
 
          if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
            {
              flow_hash_config0 = lb0->lb_hash_config;
              vnet_buffer (p0)->ip.flow_hash =
                ip6_compute_flow_hash (ip0, flow_hash_config0);
+             dpo0 =
+               load_balance_get_fwd_bucket (lb0,
+                                            (vnet_buffer (p0)->ip.flow_hash &
+                                             (lb0->lb_n_buckets_minus_1)));
+           }
+         else
+           {
+             dpo0 = load_balance_get_bucket_i (lb0, 0);
            }
 
-         ASSERT (lb0->lb_n_buckets > 0);
-         ASSERT (is_pow2 (lb0->lb_n_buckets));
          dpo0 = load_balance_get_bucket_i (lb0,
                                            (vnet_buffer (p0)->ip.flow_hash &
                                             lb0->lb_n_buckets_minus_1));
@@ -337,10 +353,18 @@ ip6_add_interface_routes (vnet_main_t * vnm, u32 sw_if_index,
     {
       fib_node_index_t fei;
 
-      fei = fib_table_entry_update_one_path (fib_index, &pfx, FIB_SOURCE_INTERFACE, (FIB_ENTRY_FLAG_CONNECTED | FIB_ENTRY_FLAG_ATTACHED), FIB_PROTOCOL_IP6, NULL,      /* No next-hop address */
-                                            sw_if_index, ~0,   // invalid FIB index
-                                            1, NULL,   // no label stack
-                                            FIB_ROUTE_PATH_FLAG_NONE);
+      fei = fib_table_entry_update_one_path (fib_index,
+                                            &pfx,
+                                            FIB_SOURCE_INTERFACE,
+                                            (FIB_ENTRY_FLAG_CONNECTED |
+                                             FIB_ENTRY_FLAG_ATTACHED),
+                                            FIB_PROTOCOL_IP6,
+                                            /* No next-hop address */
+                                            NULL, sw_if_index,
+                                            /* invalid FIB index */
+                                            ~0, 1,
+                                            /* no label stack */
+                                            NULL, FIB_ROUTE_PATH_FLAG_NONE);
       a->neighbor_probe_adj_index = fib_entry_get_adj (fei);
     }
 
@@ -366,7 +390,13 @@ ip6_add_interface_routes (vnet_main_t * vnm, u32 sw_if_index,
        }
     }
 
-  fib_table_entry_update_one_path (fib_index, &pfx, FIB_SOURCE_INTERFACE, (FIB_ENTRY_FLAG_CONNECTED | FIB_ENTRY_FLAG_LOCAL), FIB_PROTOCOL_IP6, &pfx.fp_addr, sw_if_index, ~0,  // invalid FIB index
+  fib_table_entry_update_one_path (fib_index, &pfx,
+                                  FIB_SOURCE_INTERFACE,
+                                  (FIB_ENTRY_FLAG_CONNECTED |
+                                   FIB_ENTRY_FLAG_LOCAL),
+                                  FIB_PROTOCOL_IP6,
+                                  &pfx.fp_addr,
+                                  sw_if_index, ~0,
                                   1, NULL, FIB_ROUTE_PATH_FLAG_NONE);
 }
 
@@ -780,6 +810,14 @@ ip6_load_balance (vlib_main_t * vm,
                  hc0 = vnet_buffer (p0)->ip.flow_hash =
                    ip6_compute_flow_hash (ip0, lb0->lb_hash_config);
                }
+             dpo0 =
+               load_balance_get_fwd_bucket (lb0,
+                                            (hc0 &
+                                             lb0->lb_n_buckets_minus_1));
+           }
+         else
+           {
+             dpo0 = load_balance_get_bucket_i (lb0, 0);
            }
          if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
            {
@@ -793,14 +831,15 @@ ip6_load_balance (vlib_main_t * vm,
                  hc1 = vnet_buffer (p1)->ip.flow_hash =
                    ip6_compute_flow_hash (ip1, lb1->lb_hash_config);
                }
+             dpo1 =
+               load_balance_get_fwd_bucket (lb1,
+                                            (hc1 &
+                                             lb1->lb_n_buckets_minus_1));
+           }
+         else
+           {
+             dpo1 = load_balance_get_bucket_i (lb1, 0);
            }
-
-         dpo0 =
-           load_balance_get_bucket_i (lb0,
-                                      hc0 & (lb0->lb_n_buckets_minus_1));
-         dpo1 =
-           load_balance_get_bucket_i (lb1,
-                                      hc1 & (lb1->lb_n_buckets_minus_1));
 
          next0 = dpo0->dpoi_next_node;
          next1 = dpo1->dpoi_next_node;
@@ -869,10 +908,15 @@ ip6_load_balance (vlib_main_t * vm,
                  hc0 = vnet_buffer (p0)->ip.flow_hash =
                    ip6_compute_flow_hash (ip0, lb0->lb_hash_config);
                }
+             dpo0 =
+               load_balance_get_fwd_bucket (lb0,
+                                            (hc0 &
+                                             lb0->lb_n_buckets_minus_1));
+           }
+         else
+           {
+             dpo0 = load_balance_get_bucket_i (lb0, 0);
            }
-         dpo0 =
-           load_balance_get_bucket_i (lb0,
-                                      hc0 & (lb0->lb_n_buckets_minus_1));
 
          next0 = dpo0->dpoi_next_node;
          vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
index 3118277..ee80ee3 100644 (file)
@@ -630,7 +630,7 @@ vnet_set_ip6_ethernet_neighbor (vlib_main_t * vm,
          n->fib_entry_index =
            fib_table_entry_update_one_path (fib_index, &pfx,
                                             FIB_SOURCE_ADJ,
-                                            FIB_ENTRY_FLAG_NONE,
+                                            FIB_ENTRY_FLAG_ATTACHED,
                                             FIB_PROTOCOL_IP6, &pfx.fp_addr,
                                             n->key.sw_if_index, ~0, 1, NULL,
                                             FIB_ROUTE_PATH_FLAG_NONE);
index 3c6be7e..4b8a3ee 100644 (file)
@@ -19,7 +19,7 @@
 #include <vnet/pg/pg.h>
 #include <vnet/mpls/mpls.h>
 #include <vnet/fib/mpls_fib.h>
-#include <vnet/dpo/load_balance.h>
+#include <vnet/dpo/load_balance_map.h>
 #include <vnet/dpo/replicate_dpo.h>
 
 /**
@@ -47,7 +47,7 @@ format_mpls_lookup_trace (u8 * s, va_list * args)
   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
   mpls_lookup_trace_t * t = va_arg (*args, mpls_lookup_trace_t *);
 
-  s = format (s, "MPLS: next [%d], lookup fib index %d, LB index %d hash %d"
+  s = format (s, "MPLS: next [%d], lookup fib index %d, LB index %d hash %#x "
               "label %d eos %d", 
               t->next_index, t->lfib_index, t->lb_index, t->hash,
               vnet_mpls_uc_get_label(
@@ -64,8 +64,15 @@ always_inline u32
 mpls_compute_flow_hash (const mpls_unicast_header_t * hdr,
                         flow_hash_config_t flow_hash_config)
 {
-    // FIXME
-    return (vnet_mpls_uc_get_label(hdr->label_exp_s_ttl));
+    /*
+     * improve this to include:
+     *  - all labels in the stack.
+     *  - recognise entropy labels.
+     *
+     * We need to byte swap so we use the numerical value. i.e. an odd label
+     * leads to an odd bucket. ass opposed to a label above and below value X.
+     */
+    return (vnet_mpls_uc_get_label(clib_net_to_host_u32(hdr->label_exp_s_ttl)));
 }
 
 static inline uword
@@ -179,17 +186,21 @@ mpls_lookup (vlib_main_t * vm,
           else
           {
               lb0 = load_balance_get(lbi0);
+              ASSERT (lb0->lb_n_buckets > 0);
+              ASSERT (is_pow2 (lb0->lb_n_buckets));
 
               if (PREDICT_FALSE(lb0->lb_n_buckets > 1))
               {
                   hash_c0 = vnet_buffer (b0)->ip.flow_hash =
                       mpls_compute_flow_hash(h0, lb0->lb_hash_config);
+                  dpo0 = load_balance_get_fwd_bucket
+                      (lb0,
+                       (hash_c0 & (lb0->lb_n_buckets_minus_1)));
+              }
+              else
+              {
+                  dpo0 = load_balance_get_bucket_i (lb0, 0);
               }
-              ASSERT (lb0->lb_n_buckets > 0);
-              ASSERT (is_pow2 (lb0->lb_n_buckets));
-              dpo0 = load_balance_get_bucket_i(lb0,
-                                               (hash_c0 &
-                                                (lb0->lb_n_buckets_minus_1)));
               next0 = dpo0->dpoi_next_node;
 
               vnet_buffer (b0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
@@ -207,17 +218,21 @@ mpls_lookup (vlib_main_t * vm,
           else
           {
               lb1 = load_balance_get(lbi1);
+              ASSERT (lb1->lb_n_buckets > 0);
+              ASSERT (is_pow2 (lb1->lb_n_buckets));
 
               if (PREDICT_FALSE(lb1->lb_n_buckets > 1))
               {
                   hash_c1 = vnet_buffer (b1)->ip.flow_hash =
                       mpls_compute_flow_hash(h1, lb1->lb_hash_config);
+                  dpo1 = load_balance_get_fwd_bucket
+                      (lb1,
+                       (hash_c1 & (lb1->lb_n_buckets_minus_1)));
+              }
+              else
+              {
+                  dpo1 = load_balance_get_bucket_i (lb1, 0);
               }
-              ASSERT (lb1->lb_n_buckets > 0);
-              ASSERT (is_pow2 (lb1->lb_n_buckets));
-              dpo1 = load_balance_get_bucket_i(lb1,
-                                               (hash_c1 &
-                                                (lb1->lb_n_buckets_minus_1)));
               next1 = dpo1->dpoi_next_node;
 
               vnet_buffer (b1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
@@ -235,17 +250,21 @@ mpls_lookup (vlib_main_t * vm,
           else
           {
               lb2 = load_balance_get(lbi2);
+              ASSERT (lb2->lb_n_buckets > 0);
+              ASSERT (is_pow2 (lb2->lb_n_buckets));
 
               if (PREDICT_FALSE(lb2->lb_n_buckets > 1))
               {
                   hash_c2 = vnet_buffer (b2)->ip.flow_hash =
                       mpls_compute_flow_hash(h2, lb2->lb_hash_config);
+                  dpo2 = load_balance_get_fwd_bucket
+                      (lb2,
+                       (hash_c2 & (lb2->lb_n_buckets_minus_1)));
+              }
+              else
+              {
+                  dpo2 = load_balance_get_bucket_i (lb2, 0);
               }
-              ASSERT (lb2->lb_n_buckets > 0);
-              ASSERT (is_pow2 (lb2->lb_n_buckets));
-              dpo2 = load_balance_get_bucket_i(lb2,
-                                               (hash_c2 &
-                                                (lb2->lb_n_buckets_minus_1)));
               next2 = dpo2->dpoi_next_node;
 
               vnet_buffer (b2)->ip.adj_index[VLIB_TX] = dpo2->dpoi_index;
@@ -263,17 +282,21 @@ mpls_lookup (vlib_main_t * vm,
           else
           {
               lb3 = load_balance_get(lbi3);
+              ASSERT (lb3->lb_n_buckets > 0);
+              ASSERT (is_pow2 (lb3->lb_n_buckets));
 
               if (PREDICT_FALSE(lb3->lb_n_buckets > 1))
               {
                   hash_c3 = vnet_buffer (b3)->ip.flow_hash =
                       mpls_compute_flow_hash(h3, lb3->lb_hash_config);
+                  dpo3 = load_balance_get_fwd_bucket
+                      (lb3,
+                       (hash_c3 & (lb3->lb_n_buckets_minus_1)));
+              }
+              else
+              {
+                  dpo3 = load_balance_get_bucket_i (lb3, 0);
               }
-              ASSERT (lb3->lb_n_buckets > 0);
-              ASSERT (is_pow2 (lb3->lb_n_buckets));
-              dpo3 = load_balance_get_bucket_i(lb3,
-                                               (hash_c3 &
-                                                (lb3->lb_n_buckets_minus_1)));
               next3 = dpo3->dpoi_next_node;
 
               vnet_buffer (b3)->ip.adj_index[VLIB_TX] = dpo3->dpoi_index;
@@ -393,20 +416,21 @@ mpls_lookup (vlib_main_t * vm,
           else
           {
               lb0 = load_balance_get(lbi0);
+              ASSERT (lb0->lb_n_buckets > 0);
+              ASSERT (is_pow2 (lb0->lb_n_buckets));
 
               if (PREDICT_FALSE(lb0->lb_n_buckets > 1))
               {
                   hash_c0 = vnet_buffer (b0)->ip.flow_hash =
                       mpls_compute_flow_hash(h0, lb0->lb_hash_config);
+                  dpo0 = load_balance_get_fwd_bucket
+                      (lb0,
+                       (hash_c0 & (lb0->lb_n_buckets_minus_1)));
+              }
+              else
+              {
+                  dpo0 = load_balance_get_bucket_i (lb0, 0);
               }
-
-              ASSERT (lb0->lb_n_buckets > 0);
-              ASSERT (is_pow2 (lb0->lb_n_buckets));
-
-              dpo0 = load_balance_get_bucket_i(lb0,
-                                               (hash_c0 &
-                                                (lb0->lb_n_buckets_minus_1)));
-
               next0 = dpo0->dpoi_next_node;
               vnet_buffer (b0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
 
@@ -467,7 +491,7 @@ VLIB_REGISTER_NODE (mpls_lookup_node, static) = {
   .n_errors = MPLS_N_ERROR,
   .error_strings = mpls_error_strings,
 
-  .sibling_of = "ip4-lookup",
+  .sibling_of = "mpls-load-balance",
 
   .format_buffer = format_mpls_header,
   .format_trace = format_mpls_lookup_trace,
@@ -574,6 +598,11 @@ mpls_load_balance (vlib_main_t * vm,
               {
                   hc0 = vnet_buffer(p0)->ip.flow_hash = mpls_compute_flow_hash(mpls0, hc0);
               }
+              dpo0 = load_balance_get_fwd_bucket(lb0, (hc0 & lb0->lb_n_buckets_minus_1));
+          }
+          else
+          {
+              dpo0 = load_balance_get_bucket_i (lb0, 0);
           }
           if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
           {
@@ -585,10 +614,12 @@ mpls_load_balance (vlib_main_t * vm,
               {
                   hc1 = vnet_buffer(p1)->ip.flow_hash = mpls_compute_flow_hash(mpls1, hc1);
               }
+              dpo1 = load_balance_get_fwd_bucket(lb1, (hc1 & lb1->lb_n_buckets_minus_1));
+          }
+          else
+          {
+              dpo1 = load_balance_get_bucket_i (lb1, 0);
           }
-
-          dpo0 = load_balance_get_bucket_i(lb0, hc0 & (lb0->lb_n_buckets_minus_1));
-          dpo1 = load_balance_get_bucket_i(lb1, hc1 & (lb1->lb_n_buckets_minus_1));
 
           next0 = dpo0->dpoi_next_node;
           next1 = dpo1->dpoi_next_node;
@@ -650,9 +681,12 @@ mpls_load_balance (vlib_main_t * vm,
               {
                   hc0 = vnet_buffer(p0)->ip.flow_hash = mpls_compute_flow_hash(mpls0, hc0);
               }
+               dpo0 = load_balance_get_fwd_bucket(lb0, (hc0 & lb0->lb_n_buckets_minus_1));
+          }
+          else
+          {
+              dpo0 = load_balance_get_bucket_i (lb0, 0);
           }
-
-          dpo0 = load_balance_get_bucket_i(lb0, hc0 & (lb0->lb_n_buckets_minus_1));
 
           next0 = dpo0->dpoi_next_node;
           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
@@ -676,9 +710,13 @@ VLIB_REGISTER_NODE (mpls_load_balance_node) = {
   .function = mpls_load_balance,
   .name = "mpls-load-balance",
   .vector_size = sizeof (u32),
-  .sibling_of = "mpls-lookup",
-
   .format_trace = format_mpls_load_balance_trace,
+  .n_next_nodes = 1,
+  .next_nodes =
+  {
+      [0] = "mpls-drop",
+  },
+
 };
 
 VLIB_NODE_FUNCTION_MULTIARCH (mpls_load_balance_node, mpls_load_balance)
index 700b709..0ad1ee6 100644 (file)
@@ -1054,5 +1054,364 @@ class TestMPLSDisabled(VppTestCase):
         self.send_and_assert_no_replies(self.pg1, tx, "IPv6 disabled")
 
 
+class TestMPLSPIC(VppTestCase):
+    """ MPLS PIC edge convergence """
+
+    def setUp(self):
+        super(TestMPLSPIC, self).setUp()
+
+        # create 2 pg interfaces
+        self.create_pg_interfaces(range(4))
+
+        # core links
+        self.pg0.admin_up()
+        self.pg0.config_ip4()
+        self.pg0.resolve_arp()
+        self.pg0.enable_mpls()
+        self.pg1.admin_up()
+        self.pg1.config_ip4()
+        self.pg1.resolve_arp()
+        self.pg1.enable_mpls()
+
+        # VRF (customer facing) link
+        self.pg2.admin_up()
+        self.pg2.set_table_ip4(1)
+        self.pg2.config_ip4()
+        self.pg2.resolve_arp()
+        self.pg2.set_table_ip6(1)
+        self.pg2.config_ip6()
+        self.pg2.resolve_ndp()
+        self.pg3.admin_up()
+        self.pg3.set_table_ip4(1)
+        self.pg3.config_ip4()
+        self.pg3.resolve_arp()
+        self.pg3.set_table_ip6(1)
+        self.pg3.config_ip6()
+        self.pg3.resolve_ndp()
+
+    def tearDown(self):
+        super(TestMPLSPIC, self).tearDown()
+        self.pg0.disable_mpls()
+        for i in self.pg_interfaces:
+            i.unconfig_ip4()
+            i.unconfig_ip6()
+            i.set_table_ip4(0)
+            i.set_table_ip6(0)
+            i.admin_down()
+
+    def test_mpls_ibgp_pic(self):
+        """ MPLS iBGP PIC edge convergence
+
+        1) setup many iBGP VPN routes via a pair of iBGP peers.
+        2) Check EMCP forwarding to these peers
+        3) withdraw the IGP route to one of these peers.
+        4) check forwarding continues to the remaining peer
+        """
+
+        #
+        # IGP+LDP core routes
+        #
+        core_10_0_0_45 = VppIpRoute(self, "10.0.0.45", 32,
+                                    [VppRoutePath(self.pg0.remote_ip4,
+                                                  self.pg0.sw_if_index,
+                                                  labels=[45])])
+        core_10_0_0_45.add_vpp_config()
+
+        core_10_0_0_46 = VppIpRoute(self, "10.0.0.46", 32,
+                                    [VppRoutePath(self.pg1.remote_ip4,
+                                                  self.pg1.sw_if_index,
+                                                  labels=[46])])
+        core_10_0_0_46.add_vpp_config()
+
+        #
+        # Lot's of VPN routes. We need more the 64 so VPP will build
+        # the fast convergence indirection
+        #
+        vpn_routes = []
+        pkts = []
+        for ii in range(64):
+            dst = "192.168.1.%d" % ii
+            vpn_routes.append(VppIpRoute(self, dst, 32,
+                                         [VppRoutePath("10.0.0.45",
+                                                       0xffffffff,
+                                                       labels=[145],
+                                                       is_resolve_host=1),
+                                          VppRoutePath("10.0.0.46",
+                                                       0xffffffff,
+                                                       labels=[146],
+                                                       is_resolve_host=1)],
+                                         table_id=1))
+            vpn_routes[ii].add_vpp_config()
+
+            pkts.append(Ether(dst=self.pg2.local_mac,
+                              src=self.pg2.remote_mac) /
+                        IP(src=self.pg2.remote_ip4, dst=dst) /
+                        UDP(sport=1234, dport=1234) /
+                        Raw('\xa5' * 100))
+
+        #
+        # Send the packet stream (one pkt to each VPN route)
+        #  - expect a 50-50 split of the traffic
+        #
+        self.pg2.add_stream(pkts)
+        self.pg_enable_capture(self.pg_interfaces)
+        self.pg_start()
+
+        rx0 = self.pg0._get_capture(1)
+        rx1 = self.pg1._get_capture(1)
+
+        # not testig the LB hashing algorithm so we're not concerned
+        # with the split ratio, just as long as neither is 0
+        self.assertNotEqual(0, len(rx0))
+        self.assertNotEqual(0, len(rx1))
+
+        #
+        # use a test CLI command to stop the FIB walk process, this
+        # will prevent the FIB converging the VPN routes and thus allow
+        # us to probe the interim (psot-fail, pre-converge) state
+        #
+        self.vapi.ppcli("test fib-walk-process disable")
+
+        #
+        # Withdraw one of the IGP routes
+        #
+        core_10_0_0_46.remove_vpp_config()
+
+        #
+        # now all packets should be forwarded through the remaining peer
+        #
+        self.vapi.ppcli("clear trace")
+        self.pg2.add_stream(pkts)
+        self.pg_enable_capture(self.pg_interfaces)
+        self.pg_start()
+
+        rx0 = self.pg0.get_capture(len(pkts))
+
+        #
+        # enable the FIB walk process to converge the FIB
+        #
+        self.vapi.ppcli("test fib-walk-process enable")
+
+        #
+        # packets should still be forwarded through the remaining peer
+        #
+        self.pg2.add_stream(pkts)
+        self.pg_enable_capture(self.pg_interfaces)
+        self.pg_start()
+
+        rx0 = self.pg0.get_capture(64)
+
+        #
+        # Add the IGP route back and we return to load-balancing
+        #
+        core_10_0_0_46.add_vpp_config()
+
+        self.pg2.add_stream(pkts)
+        self.pg_enable_capture(self.pg_interfaces)
+        self.pg_start()
+
+        rx0 = self.pg0._get_capture(1)
+        rx1 = self.pg1._get_capture(1)
+        self.assertNotEqual(0, len(rx0))
+        self.assertNotEqual(0, len(rx1))
+
+    def test_mpls_ebgp_pic(self):
+        """ MPLS eBGP PIC edge convergence
+
+        1) setup many eBGP VPN routes via a pair of eBGP peers
+        2) Check EMCP forwarding to these peers
+        3) withdraw one eBGP path - expect LB across remaining eBGP
+        """
+
+        #
+        # Lot's of VPN routes. We need more the 64 so VPP will build
+        # the fast convergence indirection
+        #
+        vpn_routes = []
+        vpn_bindings = []
+        pkts = []
+        for ii in range(64):
+            dst = "192.168.1.%d" % ii
+            local_label = 1600 + ii
+            vpn_routes.append(VppIpRoute(self, dst, 32,
+                                         [VppRoutePath(self.pg2.remote_ip4,
+                                                       0xffffffff,
+                                                       nh_table_id=1,
+                                                       is_resolve_attached=1),
+                                          VppRoutePath(self.pg3.remote_ip4,
+                                                       0xffffffff,
+                                                       nh_table_id=1,
+                                                       is_resolve_attached=1)],
+                                         table_id=1))
+            vpn_routes[ii].add_vpp_config()
+
+            vpn_bindings.append(VppMplsIpBind(self, local_label, dst, 32,
+                                              ip_table_id=1))
+            vpn_bindings[ii].add_vpp_config()
+
+            pkts.append(Ether(dst=self.pg0.local_mac,
+                              src=self.pg0.remote_mac) /
+                        MPLS(label=local_label, ttl=64) /
+                        IP(src=self.pg0.remote_ip4, dst=dst) /
+                        UDP(sport=1234, dport=1234) /
+                        Raw('\xa5' * 100))
+
+        self.pg0.add_stream(pkts)
+        self.pg_enable_capture(self.pg_interfaces)
+        self.pg_start()
+
+        rx0 = self.pg2._get_capture(1)
+        rx1 = self.pg3._get_capture(1)
+        self.assertNotEqual(0, len(rx0))
+        self.assertNotEqual(0, len(rx1))
+
+        #
+        # use a test CLI command to stop the FIB walk process, this
+        # will prevent the FIB converging the VPN routes and thus allow
+        # us to probe the interim (psot-fail, pre-converge) state
+        #
+        self.vapi.ppcli("test fib-walk-process disable")
+
+        #
+        # withdraw the connected prefix on the interface.
+        #
+        self.pg2.unconfig_ip4()
+
+        #
+        # now all packets should be forwarded through the remaining peer
+        #
+        self.pg0.add_stream(pkts)
+        self.pg_enable_capture(self.pg_interfaces)
+        self.pg_start()
+
+        rx0 = self.pg3.get_capture(len(pkts))
+
+        #
+        # enable the FIB walk process to converge the FIB
+        #
+        self.vapi.ppcli("test fib-walk-process enable")
+        self.pg0.add_stream(pkts)
+        self.pg_enable_capture(self.pg_interfaces)
+        self.pg_start()
+
+        rx0 = self.pg3.get_capture(len(pkts))
+
+        #
+        # put the connecteds back
+        #
+        self.pg2.config_ip4()
+
+        self.pg0.add_stream(pkts)
+        self.pg_enable_capture(self.pg_interfaces)
+        self.pg_start()
+
+        rx0 = self.pg2._get_capture(1)
+        rx1 = self.pg3._get_capture(1)
+        self.assertNotEqual(0, len(rx0))
+        self.assertNotEqual(0, len(rx1))
+
+    def test_mpls_v6_ebgp_pic(self):
+        """ MPLSv6 eBGP PIC edge convergence
+
+        1) setup many eBGP VPNv6 routes via a pair of eBGP peers
+        2) Check EMCP forwarding to these peers
+        3) withdraw one eBGP path - expect LB across remaining eBGP
+        """
+
+        #
+        # Lot's of VPN routes. We need more the 64 so VPP will build
+        # the fast convergence indirection
+        #
+        vpn_routes = []
+        vpn_bindings = []
+        pkts = []
+        for ii in range(64):
+            dst = "3000::%d" % ii
+            local_label = 1600 + ii
+            vpn_routes.append(VppIpRoute(self, dst, 128,
+                                         [VppRoutePath(self.pg2.remote_ip6,
+                                                       0xffffffff,
+                                                       nh_table_id=1,
+                                                       is_resolve_attached=1,
+                                                       is_ip6=1),
+                                          VppRoutePath(self.pg3.remote_ip6,
+                                                       0xffffffff,
+                                                       nh_table_id=1,
+                                                       is_ip6=1,
+                                                       is_resolve_attached=1)],
+                                         table_id=1,
+                                         is_ip6=1))
+            vpn_routes[ii].add_vpp_config()
+
+            vpn_bindings.append(VppMplsIpBind(self, local_label, dst, 128,
+                                              ip_table_id=1,
+                                              is_ip6=1))
+            vpn_bindings[ii].add_vpp_config()
+
+            pkts.append(Ether(dst=self.pg0.local_mac,
+                              src=self.pg0.remote_mac) /
+                        MPLS(label=local_label, ttl=64) /
+                        IPv6(src=self.pg0.remote_ip6, dst=dst) /
+                        UDP(sport=1234, dport=1234) /
+                        Raw('\xa5' * 100))
+
+        self.pg0.add_stream(pkts)
+        self.pg_enable_capture(self.pg_interfaces)
+        self.pg_start()
+
+        rx0 = self.pg2._get_capture(1)
+        rx1 = self.pg3._get_capture(1)
+        self.assertNotEqual(0, len(rx0))
+        self.assertNotEqual(0, len(rx1))
+
+        #
+        # use a test CLI command to stop the FIB walk process, this
+        # will prevent the FIB converging the VPN routes and thus allow
+        # us to probe the interim (psot-fail, pre-converge) state
+        #
+        self.vapi.ppcli("test fib-walk-process disable")
+
+        #
+        # withdraw the connected prefix on the interface.
+        # and shutdown the interface so the ND cache is flushed.
+        #
+        self.pg2.unconfig_ip6()
+        self.pg2.admin_down()
+
+        #
+        # now all packets should be forwarded through the remaining peer
+        #
+        self.pg0.add_stream(pkts)
+        self.pg_enable_capture(self.pg_interfaces)
+        self.pg_start()
+
+        rx0 = self.pg3.get_capture(len(pkts))
+
+        #
+        # enable the FIB walk process to converge the FIB
+        #
+        self.vapi.ppcli("test fib-walk-process enable")
+        self.pg0.add_stream(pkts)
+        self.pg_enable_capture(self.pg_interfaces)
+        self.pg_start()
+
+        rx0 = self.pg3.get_capture(len(pkts))
+
+        #
+        # put the connecteds back
+        #
+        self.pg2.admin_up()
+        self.pg2.config_ip6()
+
+        self.pg0.add_stream(pkts)
+        self.pg_enable_capture(self.pg_interfaces)
+        self.pg_start()
+
+        rx0 = self.pg2._get_capture(1)
+        rx1 = self.pg3._get_capture(1)
+        self.assertNotEqual(0, len(rx0))
+        self.assertNotEqual(0, len(rx1))
+
+
 if __name__ == '__main__':
     unittest.main(testRunner=VppTestRunner)
index d6146f2..b68e210 100644 (file)
@@ -57,7 +57,9 @@ class VppRoutePath(object):
             nh_via_label=MPLS_LABEL_INVALID,
             is_ip6=0,
             rpf_id=0,
-            is_interface_rx=0):
+            is_interface_rx=0,
+            is_resolve_host=0,
+            is_resolve_attached=0):
         self.nh_itf = nh_sw_if_index
         self.nh_table_id = nh_table_id
         self.nh_via_label = nh_via_label
@@ -68,6 +70,8 @@ class VppRoutePath(object):
             self.nh_addr = inet_pton(AF_INET6, nh_addr)
         else:
             self.nh_addr = inet_pton(AF_INET, nh_addr)
+        self.is_resolve_host = is_resolve_host
+        self.is_resolve_attached = is_resolve_attached
         self.is_interface_rx = is_interface_rx
         self.is_rpf_id = 0
         if rpf_id != 0:
@@ -136,7 +140,11 @@ class VppIpRoute(VppObject):
                     next_hop_n_out_labels=len(
                         path.nh_labels),
                     next_hop_via_label=path.nh_via_label,
-                    is_ipv6=self.is_ip6)
+                    next_hop_table_id=path.nh_table_id,
+                    is_ipv6=self.is_ip6,
+                    is_resolve_host=path.is_resolve_host,
+                    is_resolve_attached=path.is_resolve_attached,
+                    is_multipath=1 if len(self.paths) > 1 else 0)
         self._test.registry.register(self, self._test.logger)
 
     def remove_vpp_config(self):
@@ -154,13 +162,16 @@ class VppIpRoute(VppObject):
                 is_ipv6=self.is_ip6)
         else:
             for path in self.paths:
-                self._test.vapi.ip_add_del_route(self.dest_addr,
-                                                 self.dest_addr_len,
-                                                 path.nh_addr,
-                                                 path.nh_itf,
-                                                 table_id=self.table_id,
-                                                 is_add=0,
-                                                 is_ipv6=self.is_ip6)
+                self._test.vapi.ip_add_del_route(
+                    self.dest_addr,
+                    self.dest_addr_len,
+                    path.nh_addr,
+                    path.nh_itf,
+                    table_id=self.table_id,
+                    next_hop_table_id=path.nh_table_id,
+                    next_hop_via_label=path.nh_via_label,
+                    is_add=0,
+                    is_ipv6=self.is_ip6)
 
     def query_vpp_config(self):
         return find_route(self._test,
@@ -318,33 +329,41 @@ class VppMplsIpBind(VppObject):
     """
 
     def __init__(self, test, local_label, dest_addr, dest_addr_len,
-                 table_id=0, ip_table_id=0):
+                 table_id=0, ip_table_id=0, is_ip6=0):
         self._test = test
-        self.dest_addr = inet_pton(AF_INET, dest_addr)
         self.dest_addr_len = dest_addr_len
+        self.dest_addr = dest_addr
         self.local_label = local_label
         self.table_id = table_id
         self.ip_table_id = ip_table_id
+        self.is_ip6 = is_ip6
+        if is_ip6:
+            self.dest_addrn = inet_pton(AF_INET6, dest_addr)
+        else:
+            self.dest_addrn = inet_pton(AF_INET, dest_addr)
 
     def add_vpp_config(self):
         self._test.vapi.mpls_ip_bind_unbind(self.local_label,
-                                            self.dest_addr,
+                                            self.dest_addrn,
                                             self.dest_addr_len,
                                             table_id=self.table_id,
-                                            ip_table_id=self.ip_table_id)
+                                            ip_table_id=self.ip_table_id,
+                                            is_ip4=(self.is_ip6 == 0))
         self._test.registry.register(self, self._test.logger)
 
     def remove_vpp_config(self):
         self._test.vapi.mpls_ip_bind_unbind(self.local_label,
-                                            self.dest_addr,
+                                            self.dest_addrn,
                                             self.dest_addr_len,
-                                            is_bind=0)
+                                            table_id=self.table_id,
+                                            ip_table_id=self.ip_table_id,
+                                            is_bind=0,
+                                            is_ip4=(self.is_ip6 == 0))
 
     def query_vpp_config(self):
         dump = self._test.vapi.mpls_fib_dump()
         for e in dump:
             if self.local_label == e.label \
-               and self.eos_bit == e.eos_bit \
                and self.table_id == e.table_id:
                 return True
         return False
@@ -357,7 +376,7 @@ class VppMplsIpBind(VppObject):
                 % (self.table_id,
                    self.local_label,
                    self.ip_table_id,
-                   inet_ntop(AF_INET, self.dest_addr),
+                   self.dest_addr,
                    self.dest_addr_len))