fib: doc nitfixes
[vpp.git] / src / vnet / fib / fib_path_list.c
index ce11cf4..df08bb2 100644 (file)
 #include <vnet/fib/fib_node_list.h>
 #include <vnet/fib/fib_walk.h>
 #include <vnet/fib/fib_urpf_list.h>
+#include <vnet/fib/fib_path_ext.h>
+#include <vnet/fib/fib_table.h>
+
+/**
+ * The magic number of child entries that make a path-list popular.
+ * There's a trade-off here between convergence and forwarding speed.
+ * Popular path-lists generate load-balance maps for the entries that
+ * use them. If the map is present there is a switch path cost to indirect
+ * through the map - this indirection provides the fast convergence - so
+ * without the map convergence is slower.
+ */
+#define FIB_PATH_LIST_POPULAR 64
 
 /**
  * FIB path-list
@@ -40,13 +52,6 @@ typedef struct fib_path_list_t_ {
      */
     fib_path_list_flags_t fpl_flags;
 
-    /**
-     * The next-hop protocol for the paths in this path list.
-     * Note that fixing the proto here means we don't support a mix of
-     * v4 and v6 paths. ho hum.
-     */
-    fib_protocol_t fpl_nh_proto;
-
     /**
      * Vector of paths indicies for all configured paths.
      * For shareable path-lists this list MUST not change.
@@ -74,24 +79,22 @@ static fib_path_list_t * fib_path_list_pool;
  */
 static uword *fib_path_list_db;
 
+/**
+ * the logger
+ */
+vlib_log_class_t fib_path_list_logger;
+
 /*
  * Debug macro
  */
-#ifdef FIB_DEBUG
-#define FIB_PATH_LIST_DBG(_pl, _fmt, _args...)           \
-{                                                        \
-    u8 *_tmp = 0;                                        \
-    _tmp = fib_path_list_format(                         \
-       fib_path_list_get_index(_pl), _tmp);              \
-    clib_warning("pl:[%d:%p:%p:%s]:" _fmt,               \
-                fib_path_list_get_index(_pl),            \
-                _pl, _pl->fpl_paths, _tmp,               \
-                ##_args);                                \
-    vec_free(_tmp);                                      \
+#define FIB_PATH_LIST_DBG(_pl, _fmt, _args...)                  \
+{                                                               \
+    vlib_log_debug(fib_path_list_logger,                        \
+                   "[%U]:" _fmt,                                \
+                   format_fib_path_list,                        \
+                   fib_path_list_get_index(_pl), 0,             \
+                   ##_args);                                    \
 }
-#else
-#define FIB_PATH_LIST_DBG(_pl, _fmt, _args...)
-#endif
 
 static fib_path_list_t *
 fib_path_list_get (fib_node_index_t index)
@@ -108,9 +111,7 @@ fib_path_list_get_node (fib_node_index_t index)
 static fib_path_list_t*
 fib_path_list_from_fib_node (fib_node_t *node)
 {
-#if CLIB_DEBUG > 0
     ASSERT(FIB_NODE_TYPE_PATH_LIST == node->fn_type);
-#endif
     return ((fib_path_list_t*)node);
 }
 
@@ -120,18 +121,22 @@ fib_path_list_get_index (fib_path_list_t *path_list)
     return (path_list - fib_path_list_pool);
 }
 
-static u8 *
+u8 *
 format_fib_path_list (u8 * s, va_list * args)
 {
+    fib_node_index_t *path_index, path_list_index;
     fib_path_list_attribute_t attr;
-    fib_node_index_t *path_index;
     fib_path_list_t *path_list;
+    u32 indent;
 
-    path_list = va_arg (*args, fib_path_list_t *);
-    
-    s = format (s, "    index:%u", fib_path_list_get_index(path_list));
+    path_list_index = va_arg (*args, fib_node_index_t);
+    indent = va_arg (*args, u32);
+    path_list = fib_path_list_get(path_list_index);
+
+    s = format (s, "%Upath-list:[%d]",
+                format_white_space, indent,
+                fib_path_list_get_index(path_list));
     s = format (s, " locks:%u", path_list->fpl_node.fn_locks);
-    s = format (s, " proto:%U", format_fib_protocol, path_list->fpl_nh_proto);
 
     if (FIB_PATH_LIST_FLAG_NONE != path_list->fpl_flags)
     {
@@ -148,42 +153,19 @@ format_fib_path_list (u8 * s, va_list * args)
 
     vec_foreach (path_index, path_list->fpl_paths)
     {
-       s = fib_path_format(*path_index, s);
+       s = format(s, "%U", format_fib_path, *path_index, indent+2,
+                   FIB_PATH_FORMAT_FLAGS_NONE);
        s = format(s, "\n");
     }
 
     return (s);
 }
 
-u8 *
-fib_path_list_adjs_format (fib_node_index_t path_list_index,
-                          u32 indent,
-                          u8 * s)
-{
-    fib_path_list_t *path_list;
-    u32 i;
-
-    path_list = fib_path_list_get(path_list_index);
-
-    vec_foreach_index (i, path_list->fpl_paths)
-    {
-       s = fib_path_adj_format(path_list->fpl_paths[i],
-                               indent, s);
-    }
-
-    return (s);
-}
-
-
 u8 *
 fib_path_list_format (fib_node_index_t path_list_index,
                      u8 * s)
 {
-    fib_path_list_t *path_list;
-
-    path_list = fib_path_list_get(path_list_index);
-
-    return (format(s, "%U", format_fib_path_list, path_list));
+    return (format(s, "%U", format_fib_path_list, path_list_index, 4));
 }
 
 static uword
@@ -194,7 +176,10 @@ fib_path_list_hash (fib_path_list_t *path_list)
 
     ASSERT(path_list);
 
-    new_path_list_hash = old_path_list_hash = vec_len(path_list->fpl_paths);
+    new_path_list_hash =
+        old_path_list_hash =
+            (vec_len(path_list->fpl_paths) << 16 |
+             (path_list->fpl_flags & FIB_PATH_LIST_KEY_FLAGS));
 
     vec_foreach (path_index, path_list->fpl_paths)
     {
@@ -354,6 +339,18 @@ fib_path_list_last_lock_gone (fib_node_t *node)
     fib_path_list_destroy(path_list);
 }
 
+static load_balance_flags_t
+fib_path_list_fwd_flags_2_load_balance (fib_path_list_fwd_flags_t pl_flags)
+{
+    load_balance_flags_t lb_flags = LOAD_BALANCE_FLAG_NONE;
+
+    if (pl_flags & FIB_PATH_LIST_FWD_FLAG_STICKY)
+    {
+        lb_flags |= LOAD_BALANCE_ATTR_STICKY;
+    }
+    return (lb_flags);
+}
+
 /*
  * fib_path_mk_lb
  *
@@ -363,41 +360,41 @@ fib_path_list_last_lock_gone (fib_node_t *node)
 static void
 fib_path_list_mk_lb (fib_path_list_t *path_list,
                     fib_forward_chain_type_t fct,
-                    dpo_id_t *dpo)
+                    dpo_id_t *dpo,
+                     fib_path_list_fwd_flags_t flags)
 {
-    load_balance_path_t *nhs;
     fib_node_index_t *path_index;
+    load_balance_path_t *nhs;
+    dpo_proto_t dproto;
 
-    nhs  = NULL;
-
-    if (!dpo_id_is_valid(dpo))
-    {
-        /*
-         * first time create
-         */
-        dpo_set(dpo,
-                DPO_LOAD_BALANCE,
-                fib_forw_chain_type_to_dpo_proto(fct),
-                load_balance_create(0,
-                                   fib_forw_chain_type_to_dpo_proto(fct),
-                                   0 /* FIXME FLOW HASH */));
-    }
+    nhs = NULL;
+    dproto = fib_forw_chain_type_to_dpo_proto(fct);
 
     /*
      * We gather the DPOs from resolved paths.
      */
     vec_foreach (path_index, path_list->fpl_paths)
     {
-       nhs = fib_path_append_nh_for_multipath_hash(*path_index,
-                                                    fct,
-                                                    nhs);
+        if ((flags & FIB_PATH_LIST_FWD_FLAG_STICKY) ||
+            fib_path_is_resolved(*path_index))
+        {
+            nhs = fib_path_append_nh_for_multipath_hash(*path_index,
+                                                        fct, nhs);
+        }
     }
 
     /*
      * Path-list load-balances, which if used, would be shared and hence
      * never need a load-balance map.
      */
-    load_balance_multipath_update(dpo, nhs, LOAD_BALANCE_FLAG_NONE);
+    dpo_set(dpo,
+            DPO_LOAD_BALANCE,
+            dproto,
+            load_balance_create(vec_len(nhs),
+                                dproto,
+                                load_balance_get_default_flow_hash(dproto)));
+    load_balance_multipath_update(dpo, nhs,
+                                  fib_path_list_fwd_flags_2_load_balance(flags));
 
     FIB_PATH_LIST_DBG(path_list, "mk lb: %d", dpo->dpoi_index);
 
@@ -474,17 +471,13 @@ fib_path_list_back_walk (fib_node_index_t path_list_index,
 
     fib_path_list_mk_urpf(path_list);
 
+    FIB_PATH_LIST_DBG(path_list, "bw:%U",
+                      format_fib_node_bw_reason, ctx->fnbw_reason);
+
     /*
      * propagate the backwalk further
      */
-    if (32 >= fib_node_list_get_size(path_list->fpl_node.fn_children))
-    {
-        /*
-         * only a few children. continue the walk synchronously
-         */
-       fib_walk_sync(FIB_NODE_TYPE_PATH_LIST, path_list_index, ctx);
-    }
-    else
+    if (path_list->fpl_flags & FIB_PATH_LIST_FLAG_POPULAR)
     {
         /*
          * many children. schedule a async walk
@@ -494,6 +487,13 @@ fib_path_list_back_walk (fib_node_index_t path_list_index,
                        FIB_WALK_PRIORITY_LOW,
                        ctx);
     }
+    else
+    {
+        /*
+         * only a few children. continue the walk synchronously
+         */
+       fib_walk_sync(FIB_NODE_TYPE_PATH_LIST, path_list_index, ctx);
+    }
 }
 
 /*
@@ -507,7 +507,7 @@ fib_path_list_back_walk_notify (fib_node_t *node,
 {
     /*
      * the path-list is not a direct child of any other node type
-     * paths, which do not change thier to-list-mapping, save the
+     * paths, which do not change their to-list-mapping, save the
      * list they are a member of, and invoke the BW function directly.
      */
     ASSERT(0);
@@ -538,22 +538,20 @@ static const fib_node_vft_t fib_path_list_vft = {
     .fnv_mem_show = fib_path_list_memory_show,
 };
 
-static fib_path_list_t *
+static inline fib_path_list_t *
 fib_path_list_alloc (fib_node_index_t *path_list_index)
 {
     fib_path_list_t *path_list;
 
     pool_get(fib_path_list_pool, path_list);
-    memset(path_list, 0, sizeof(*path_list));
+    clib_memset(path_list, 0, sizeof(*path_list));
 
     fib_node_init(&path_list->fpl_node,
                  FIB_NODE_TYPE_PATH_LIST);
     path_list->fpl_urpf = INDEX_INVALID;
+    path_list->fpl_paths = NULL;
 
-    if (NULL != path_list_index)
-    {
-       *path_list_index = fib_path_list_get_index(path_list);
-    }
+    *path_list_index = fib_path_list_get_index(path_list);
 
     FIB_PATH_LIST_DBG(path_list, "alloc");
 
@@ -598,6 +596,11 @@ fib_path_list_get_n_paths (fib_node_index_t path_list_index)
 {
     fib_path_list_t *path_list;
 
+    if (FIB_NODE_INDEX_INVALID == path_list_index)
+    {
+        return (0);
+    }
+
     path_list = fib_path_list_get(path_list_index);
 
     return (vec_len(path_list->fpl_paths));
@@ -626,7 +629,7 @@ fib_path_list_get_resolving_interface (fib_node_index_t path_list_index)
     return (sw_if_index);
 }
 
-fib_protocol_t
+dpo_proto_t
 fib_path_list_get_proto (fib_node_index_t path_list_index)
 {
     fib_path_list_t *path_list;
@@ -650,25 +653,14 @@ fib_path_list_is_looped (fib_node_index_t path_list_index)
     return (path_list->fpl_flags & FIB_PATH_LIST_FLAG_LOOPED);
 }
 
-static fib_path_cfg_flags_t 
-fib_path_list_flags_2_path_flags (fib_path_list_flags_t plf)
+int
+fib_path_list_is_popular (fib_node_index_t path_list_index)
 {
-    fib_path_cfg_flags_t pf = FIB_PATH_CFG_FLAG_NONE;
+    fib_path_list_t *path_list;
 
-    if (plf & FIB_PATH_LIST_FLAG_LOCAL)
-    {
-       pf |= FIB_PATH_CFG_FLAG_LOCAL;
-    }
-    if (plf & FIB_PATH_LIST_FLAG_DROP)
-    {
-       pf |= FIB_PATH_CFG_FLAG_DROP;
-    }
-    if (plf & FIB_PATH_LIST_FLAG_EXCLUSIVE)
-    {
-       pf |= FIB_PATH_CFG_FLAG_EXCLUSIVE;
-    }
+    path_list = fib_path_list_get(path_list_index);
 
-    return (pf);
+    return (path_list->fpl_flags & FIB_PATH_LIST_FLAG_POPULAR);
 }
 
 static fib_path_list_flags_t
@@ -697,18 +689,25 @@ fib_path_list_create (fib_path_list_flags_t flags,
     flags = fib_path_list_flags_fixup(flags);
     path_list = fib_path_list_alloc(&path_list_index);
     path_list->fpl_flags = flags;
-    /*
-     * we'll assume for now all paths are the same next-hop protocol
-     */
-    path_list->fpl_nh_proto = rpaths[0].frp_proto;
 
-    vec_foreach_index(i, rpaths)
+    if (NULL != rpaths)
     {
-       vec_add1(path_list->fpl_paths,
-                fib_path_create(path_list_index,
-                                path_list->fpl_nh_proto,
-                                fib_path_list_flags_2_path_flags(flags),
-                                &rpaths[i]));
+        vec_foreach_index(i, rpaths)
+        {
+            vec_add1(path_list->fpl_paths,
+                     fib_path_create(path_list_index,
+                                     &rpaths[i]));
+        }
+        /*
+         * we sort the paths since the key for the path-list is
+         * the description of the paths it contains. The paths need to
+         * be sorted else this description will differ.
+         */
+        if (vec_len(path_list->fpl_paths) > 1)
+        {
+            vec_sort_with_function(path_list->fpl_paths,
+                                   fib_path_cmp_for_sort);
+        }
     }
 
     /*
@@ -725,7 +724,7 @@ fib_path_list_create (fib_path_list_flags_t flags,
        if (FIB_NODE_INDEX_INVALID != old_path_list_index)
        {
            fib_path_list_destroy(path_list);
-       
+
            path_list_index = old_path_list_index;
        }
        else
@@ -750,8 +749,29 @@ fib_path_list_create (fib_path_list_flags_t flags,
     return (path_list_index);
 }
 
+static fib_path_cfg_flags_t
+fib_path_list_flags_2_path_flags (fib_path_list_flags_t plf)
+{
+    fib_path_cfg_flags_t pf = FIB_PATH_CFG_FLAG_NONE;
+
+    if (plf & FIB_PATH_LIST_FLAG_DROP)
+    {
+       pf |= FIB_PATH_CFG_FLAG_DROP;
+    }
+    if (plf & FIB_PATH_LIST_FLAG_EXCLUSIVE)
+    {
+       pf |= FIB_PATH_CFG_FLAG_EXCLUSIVE;
+    }
+    if (plf & FIB_PATH_LIST_FLAG_LOCAL)
+    {
+        pf |= FIB_PATH_CFG_FLAG_LOCAL;
+    }
+
+    return (pf);
+}
+
 fib_node_index_t
-fib_path_list_create_special (fib_protocol_t nh_proto,
+fib_path_list_create_special (dpo_proto_t nh_proto,
                              fib_path_list_flags_t flags,
                              const dpo_id_t *dpo)
 {
@@ -760,11 +780,10 @@ fib_path_list_create_special (fib_protocol_t nh_proto,
 
     path_list = fib_path_list_alloc(&path_list_index);
     path_list->fpl_flags = flags;
-    path_list->fpl_nh_proto = nh_proto;
 
     path_index =
        fib_path_create_special(path_list_index,
-                               path_list->fpl_nh_proto,
+                                nh_proto,
                                fib_path_list_flags_2_path_flags(flags),
                                dpo);
     vec_add1(path_list->fpl_paths, path_index);
@@ -777,6 +796,30 @@ fib_path_list_create_special (fib_protocol_t nh_proto,
     return (path_list_index);
 }
 
+/*
+ * return the index info the path-lists's vector of paths, of the matching path.
+ * ~0 if not found
+ */
+u32
+fib_path_list_find_rpath (fib_node_index_t path_list_index,
+                          const fib_route_path_t *rpath)
+{
+    fib_path_list_t *path_list;
+    u32 ii;
+
+    path_list = fib_path_list_get(path_list_index);
+
+    vec_foreach_index (ii, path_list->fpl_paths)
+    {
+        if (!fib_path_cmp_w_route_path(path_list->fpl_paths[ii], rpath))
+        {
+            return (ii);
+        }
+    }
+    return (~0);
+}
+
+
 /*
  * fib_path_list_copy_and_path_add
  *
@@ -784,18 +827,94 @@ fib_path_list_create_special (fib_protocol_t nh_proto,
  * The path-list returned could either have been newly created, or
  * can be a shared path-list from the data-base.
  */
+fib_node_index_t*
+fib_path_list_paths_add (fib_node_index_t path_list_index,
+                         const fib_route_path_t *rpaths)
+{
+    fib_node_index_t *new_path_indices, *path_index;
+    const fib_route_path_t *rpath;
+    fib_path_list_t *path_list;
+    u32 ii;
+
+    /*
+     * alloc the new list before we retrieve the old one, lest
+     * the alloc result in a realloc
+     */
+    path_list = fib_path_list_get(path_list_index);
+
+    ASSERT(!(path_list->fpl_flags & FIB_PATH_LIST_FLAG_SHARED));
+
+    FIB_PATH_LIST_DBG(path_list, "paths-add");
+
+    new_path_indices = NULL;
+    vec_validate_init_empty(new_path_indices,
+                            vec_len(rpaths) - 1,
+                            FIB_NODE_INDEX_INVALID);
+
+    vec_foreach (path_index, path_list->fpl_paths)
+    {
+        /*
+         * don't add duplicate paths
+         */
+        int found = 0;
+
+        vec_foreach_index(ii, rpaths)
+        {
+            rpath = &rpaths[ii];
+            if (0 == fib_path_cmp_w_route_path(*path_index, rpath))
+            {
+                found = 1;
+                break;
+            }
+        }
+        if (found)
+        {
+            new_path_indices[ii] = *path_index;
+        }
+    }
+
+    /*
+     * new_path_indices array contains INVALID for each path not found
+     * and something valid for matches
+     */
+    vec_foreach_index (ii, new_path_indices)
+    {
+        path_index = &new_path_indices[ii];
+        rpath = &rpaths[ii];
+
+        if (FIB_NODE_INDEX_INVALID == *path_index)
+        {
+            *path_index = fib_path_create(path_list_index, rpath);
+            /*
+             * Add the new path - no sort, no sharing, no key..
+             */
+            vec_add1(path_list->fpl_paths, *path_index);
+
+            /*
+             * no shared path list requested. resolve and use the one
+             * just created.
+             */
+            fib_path_resolve(*path_index);
+        }
+    }
+
+    FIB_PATH_LIST_DBG(path_list, "paths-added");
+
+    return (new_path_indices);
+}
+
 fib_node_index_t
 fib_path_list_copy_and_path_add (fib_node_index_t orig_path_list_index,
-                                fib_path_list_flags_t flags,
-                                const fib_route_path_t *rpaths)
+                                 fib_path_list_flags_t flags,
+                                 const fib_route_path_t *rpaths)
 {
-    fib_node_index_t path_index, new_path_index, *orig_path_index;
+    fib_node_index_t new_path_index, *orig_path_index;
     fib_path_list_t *path_list, *orig_path_list;
+    fib_node_index_t exist_path_list_index;
     fib_node_index_t path_list_index;
+    const fib_route_path_t *rpath;
     fib_node_index_t pi;
 
-    ASSERT(1 == vec_len(rpaths));
-
     /*
      * alloc the new list before we retrieve the old one, lest
      * the alloc result in a realloc
@@ -808,35 +927,51 @@ fib_path_list_copy_and_path_add (fib_node_index_t orig_path_list_index,
 
     flags = fib_path_list_flags_fixup(flags);
     path_list->fpl_flags = flags;
-    path_list->fpl_nh_proto = orig_path_list->fpl_nh_proto;
-    vec_validate(path_list->fpl_paths, vec_len(orig_path_list->fpl_paths));
-    pi = 0;
 
-    new_path_index = fib_path_create(path_list_index,
-                                     path_list->fpl_nh_proto,
-                                     fib_path_list_flags_2_path_flags(flags),
-                                     rpaths);
+    vec_validate(path_list->fpl_paths,
+                 (vec_len(orig_path_list->fpl_paths) +
+                  vec_len(rpaths) - 1));
+    pi = 0;
 
-    vec_foreach (orig_path_index, orig_path_list->fpl_paths)
+    vec_foreach(orig_path_index, orig_path_list->fpl_paths)
     {
         /*
-         * don't add duplicate paths
-         * In the unlikely event the path is a duplicate, then we'll
-         * find a matching path-list later and this one will be toast.
+         * copy the original paths over to the new list
          */
-       if (0 != fib_path_cmp(new_path_index, *orig_path_index))
+        path_list->fpl_paths[pi++] = fib_path_copy(*orig_path_index,
+                                                   path_list_index);
+    }
+    vec_foreach(rpath, rpaths)
+    {
+        int duplicate = 0;
+
+        new_path_index = fib_path_create(path_list_index, rpath);
+
+        vec_foreach(orig_path_index, orig_path_list->fpl_paths)
+        {
+            /*
+             * don't add duplicate paths
+             * In the unlikely event the path is a duplicate, then we'll
+             * find a matching path-list later and this one will be toast.
+             */
+            if (0 == fib_path_cmp(new_path_index, *orig_path_index))
+            {
+                duplicate = 1;
+                break;
+            }
+        }
+        if (duplicate)
         {
-            path_index = fib_path_copy(*orig_path_index, path_list_index);
-            path_list->fpl_paths[pi++] = path_index;
+            _vec_len(path_list->fpl_paths) =
+                vec_len(path_list->fpl_paths) - 1;
+            fib_path_destroy(new_path_index);
         }
         else
         {
-            _vec_len(path_list->fpl_paths) = vec_len(orig_path_list->fpl_paths);
+            path_list->fpl_paths[pi++] = new_path_index;
         }
     }
 
-    path_list->fpl_paths[pi] = new_path_index;
-
     /*
      * we sort the paths since the key for the path-list is
      * the description of the paths it contains. The paths need to
@@ -847,46 +982,99 @@ fib_path_list_copy_and_path_add (fib_node_index_t orig_path_list_index,
     FIB_PATH_LIST_DBG(path_list, "path-added");
 
     /*
-     * If a shared path list is requested, consult the DB for a match
+     * check for a matching path-list in the DB.
+     * If we find one then we can return the existing one and destroy the
+     * new one just created.
      */
     if (path_list->fpl_flags & FIB_PATH_LIST_FLAG_SHARED)
     {
-       fib_node_index_t exist_path_list_index;
-       /*
-        * check for a matching path-list in the DB.
-        * If we find one then we can return the existing one and destroy the
-        * new one just created.
-        */
-       exist_path_list_index = fib_path_list_db_find(path_list);
-       if (FIB_NODE_INDEX_INVALID != exist_path_list_index)
-       {
-           fib_path_list_destroy(path_list);
+        exist_path_list_index = fib_path_list_db_find(path_list);
+        if (FIB_NODE_INDEX_INVALID != exist_path_list_index)
+        {
+            fib_path_list_destroy(path_list);
        
-           path_list_index = exist_path_list_index;
-       }
-       else
-       {
-           /*
-            * if there was not a matching path-list, then this
-            * new one will need inserting into the DB and resolving.
-            */
-           fib_path_list_db_insert(path_list_index);
+            path_list_index = exist_path_list_index;
+        }
+        else
+        {
+            /*
+             * if there was not a matching path-list, then this
+             * new one will need inserting into the DB and resolving.
+             */
+            fib_path_list_db_insert(path_list_index);
 
-           path_list = fib_path_list_resolve(path_list);
-       }
+            path_list = fib_path_list_resolve(path_list);
+        }
     }
     else
     {
-       /*
-        * no shared path list requested. resolve and use the one
-        * just created.
-        */
-       path_list = fib_path_list_resolve(path_list);
+        /*
+         * no shared path list requested. resolve and use the one
+         * just created.
+         */
+        path_list = fib_path_list_resolve(path_list);
     }
 
     return (path_list_index);
 }
 
+/*
+ * fib_path_list_paths_remove
+ */
+fib_node_index_t*
+fib_path_list_paths_remove (fib_node_index_t path_list_index,
+                           const fib_route_path_t *rpaths)
+{
+    fib_node_index_t *match_path_indices;
+    fib_path_list_t *path_list;
+    i32 ii, jj;
+
+    path_list = fib_path_list_get(path_list_index);
+    match_path_indices = NULL;
+    vec_validate_init_empty(match_path_indices,
+                            vec_len(rpaths) - 1,
+                            FIB_NODE_INDEX_INVALID);
+
+    ASSERT(!(path_list->fpl_flags & FIB_PATH_LIST_FLAG_SHARED));
+
+    FIB_PATH_LIST_DBG(path_list, "path-remove");
+
+    /*
+     * the number of existing paths is likely to be larger than the
+     * number of paths being added.
+     * walk in reverse so the vec_del is ok
+     */
+    vec_foreach_index_backwards(ii, path_list->fpl_paths)
+    {
+        int found = ~0;
+
+        vec_foreach_index(jj, rpaths)
+        {
+            if (0 == fib_path_cmp_w_route_path(path_list->fpl_paths[ii],
+                                               &rpaths[jj]))
+            {
+                found = jj;
+                break;
+            }
+        }
+        if (~0 != found)
+        {
+            fib_node_index_t match_path_index;
+            /*
+             * match - remove it
+             */
+            match_path_index = path_list->fpl_paths[ii];
+            vec_del1(path_list->fpl_paths, ii);
+            fib_path_destroy(match_path_index);
+            match_path_indices[jj] = match_path_index;
+        }
+    }
+
+    FIB_PATH_LIST_DBG(path_list, "paths-removed");
+
+    return (match_path_indices);
+}
+
 /*
  * fib_path_list_copy_and_path_remove
  *
@@ -899,12 +1087,11 @@ fib_path_list_copy_and_path_remove (fib_node_index_t orig_path_list_index,
                                    fib_path_list_flags_t flags,
                                    const fib_route_path_t *rpaths)
 {
-    fib_node_index_t path_index, *orig_path_index, path_list_index, tmp_path_index;
+    fib_node_index_t *orig_path_index, path_list_index, tmp_path_index;
     fib_path_list_t *path_list,  *orig_path_list;
+    const fib_route_path_t *rpath;
     fib_node_index_t pi;
 
-    ASSERT(1 == vec_len(rpaths));
-
     path_list = fib_path_list_alloc(&path_list_index);
 
     flags = fib_path_list_flags_fixup(flags);
@@ -913,52 +1100,46 @@ fib_path_list_copy_and_path_remove (fib_node_index_t orig_path_list_index,
     FIB_PATH_LIST_DBG(orig_path_list, "copy-remove");
 
     path_list->fpl_flags = flags;
-    path_list->fpl_nh_proto = orig_path_list->fpl_nh_proto;
     /*
      * allocate as many paths as we might need in one go, rather than
      * using vec_add to do a few at a time.
      */
-    if (vec_len(orig_path_list->fpl_paths) > 1)
-    {
-       vec_validate(path_list->fpl_paths, vec_len(orig_path_list->fpl_paths) - 2);
-    }
+    vec_validate(path_list->fpl_paths,
+                 vec_len(orig_path_list->fpl_paths) - 1);
     pi = 0;
 
     /*
      * create a representation of the path to be removed, so it
      * can be used as a comparison object during the copy.
      */
-    tmp_path_index = fib_path_create(path_list_index,
-                                    path_list->fpl_nh_proto,
-                                    fib_path_list_flags_2_path_flags(flags),
-                                    rpaths);
-
-    vec_foreach (orig_path_index, orig_path_list->fpl_paths)
+    vec_foreach(orig_path_index, orig_path_list->fpl_paths)
     {
-       if (0 != fib_path_cmp(tmp_path_index, *orig_path_index)) {
-           path_index = fib_path_copy(*orig_path_index, path_list_index);
-           if (pi < vec_len(path_list->fpl_paths))
-           {
-               path_list->fpl_paths[pi++] = path_index;
-           }
-           else
-           {
-               /*
-                * this is the unlikely case that the path being
-                * removed does not match one in the path-list, so
-                * we end up with as many paths as we started with.
-                * the paths vector was sized above with the expectation
-                * that we would have 1 less.
-                */
-               vec_add1(path_list->fpl_paths, path_index);
-           }
-       }
+        /*
+         * copy the original paths over to the new list
+         */
+        path_list->fpl_paths[pi++] = fib_path_copy(*orig_path_index,
+                                                   path_list_index);
     }
+    vec_foreach(rpath, rpaths)
+    {
+        int found = 0;
+        tmp_path_index = fib_path_create(path_list_index, rpath);
 
-    /*
-     * done with the temporary now
-     */
-    fib_path_destroy(tmp_path_index);
+        vec_foreach_index(pi, path_list->fpl_paths)
+        {
+            if (0 == fib_path_cmp(tmp_path_index, path_list->fpl_paths[pi]))
+            {
+                found = 1;
+                break;
+            }
+        }
+        if (found)
+        {
+            fib_path_destroy(path_list->fpl_paths[pi]);
+            vec_del1(path_list->fpl_paths, pi);
+        }
+        fib_path_destroy(tmp_path_index);
+    }
 
     /*
      * if there are no paths, then the new path-list is aborted
@@ -1029,13 +1210,26 @@ fib_path_list_copy_and_path_remove (fib_node_index_t orig_path_list_index,
 void
 fib_path_list_contribute_forwarding (fib_node_index_t path_list_index,
                                     fib_forward_chain_type_t fct,
+                                     fib_path_list_fwd_flags_t flags,
                                     dpo_id_t *dpo)
 {
     fib_path_list_t *path_list;
 
     path_list = fib_path_list_get(path_list_index);
 
-    fib_path_list_mk_lb(path_list, fct, dpo);
+    fib_path_list_mk_lb(path_list, fct, dpo, flags);
+
+    ASSERT(DPO_LOAD_BALANCE == dpo->dpoi_type);
+
+    /*
+     * If there's only one bucket in the load-balance then we can
+     * squash it out.
+     */
+    if ((1 == load_balance_n_buckets(dpo->dpoi_index)) &&
+        (FIB_PATH_LIST_FWD_FLAG_COLLAPSE & flags))
+    {
+        dpo_copy(dpo, load_balance_get_bucket(dpo->dpoi_index, 0));
+    }
 }
 
 /*
@@ -1080,9 +1274,11 @@ fib_path_list_recursive_loop_detect (fib_node_index_t path_list_index,
 
        is_looped  = fib_path_recursive_loop_detect(*path_index, copy_ptr);
        list_looped += is_looped;
+
+        vec_free(copy);
     }
 
-    FIB_PATH_LIST_DBG(path_list, "loop-detect: eval:%d", eval);
+    FIB_PATH_LIST_DBG(path_list, "loop-detect: eval:%d", list_looped);
 
     if (list_looped)
     {
@@ -1101,10 +1297,38 @@ fib_path_list_child_add (fib_node_index_t path_list_index,
                         fib_node_type_t child_type,
                         fib_node_index_t child_index)
 {
-    return (fib_node_child_add(FIB_NODE_TYPE_PATH_LIST,
-                               path_list_index,
-                               child_type,
-                               child_index));
+    u32 sibling;
+
+    sibling = fib_node_child_add(FIB_NODE_TYPE_PATH_LIST,
+                                 path_list_index,
+                                 child_type,
+                                 child_index);
+
+    if (FIB_PATH_LIST_POPULAR == fib_node_get_n_children(FIB_NODE_TYPE_PATH_LIST,
+                                                         path_list_index))
+    {
+        /*
+         * Set the popular flag on the path-list once we pass the magic
+         * threshold. then walk children to update.
+         * We don't undo this action. The rational being that the number
+         * of entries using this prefix is large enough such that it is a
+         * non-trivial amount of effort to converge them. If we get into the
+         * situation where we are adding and removing entries such that we
+         * flip-flop over the threshold, then this non-trivial work is added
+         * to each of those routes adds/deletes - not a situation we want.
+         */
+        fib_node_back_walk_ctx_t ctx = {
+            .fnbw_reason = FIB_NODE_BW_REASON_FLAG_EVALUATE,
+        };
+        fib_path_list_t *path_list;
+
+        path_list = fib_path_list_get(path_list_index);
+        path_list->fpl_flags |= FIB_PATH_LIST_FLAG_POPULAR;
+
+       fib_walk_sync(FIB_NODE_TYPE_PATH_LIST, path_list_index, &ctx);
+    }
+
+    return (sibling);
 }
 
 void
@@ -1126,7 +1350,6 @@ fib_path_list_lock(fib_node_index_t path_list_index)
        path_list = fib_path_list_get(path_list_index);
 
        fib_node_lock(&path_list->fpl_node);
-       FIB_PATH_LIST_DBG(path_list, "lock");
     }
 }
 
@@ -1138,7 +1361,6 @@ fib_path_list_unlock (fib_node_index_t path_list_index)
     if (FIB_NODE_INDEX_INVALID != path_list_index)
     {
        path_list = fib_path_list_get(path_list_index);
-       FIB_PATH_LIST_DBG(path_list, "unlock");
     
        fib_node_unlock(&path_list->fpl_node);
     }
@@ -1168,11 +1390,36 @@ fib_path_list_walk (fib_node_index_t path_list_index,
 
     vec_foreach(path_index, path_list->fpl_paths)
     {
-       if (!func(path_list_index, *path_index, ctx))
+       if (FIB_PATH_LIST_WALK_STOP == func(path_list_index,
+                                            *path_index,
+                                            ctx))
            break;
     }
 }
 
+void
+fib_path_list_walk_w_ext (fib_node_index_t path_list_index,
+                          const fib_path_ext_list_t *ext_list,
+                          fib_path_list_walk_w_ext_fn_t func,
+                          void *ctx)
+{
+    fib_node_index_t *path_index;
+    fib_path_list_t *path_list;
+    fib_path_ext_t *path_ext;
+
+    path_list = fib_path_list_get(path_list_index);
+
+    vec_foreach(path_index, path_list->fpl_paths)
+    {
+        path_ext = fib_path_ext_list_find_by_path_index(ext_list, *path_index);
+
+        if (FIB_PATH_LIST_WALK_STOP == func(path_list_index,
+                                            *path_index,
+                                            path_ext,
+                                            ctx))
+            break;
+    }
+}
 
 void
 fib_path_list_module_init (void)
@@ -1186,6 +1433,7 @@ fib_path_list_module_init (void)
                                     fib_path_list_db_hash_key_equal,
                                     /* format pair/arg */
                                     0, 0);
+    fib_path_list_logger = vlib_log_register_class("fib", "path-list");
 }
 
 static clib_error_t *
@@ -1207,7 +1455,7 @@ show_fib_path_list_command (vlib_main_t * vm,
            u8 *s = fib_path_list_format(pli, NULL);
            s = format(s, "children:");
            s = fib_node_children_format(path_list->fpl_node.fn_children, s);
-           vlib_cli_output (vm, "%s", s);
+           vlib_cli_output (vm, "%v", s);
            vec_free(s);
        }
        else
@@ -1221,10 +1469,10 @@ show_fib_path_list_command (vlib_main_t * vm,
         * show all
         */
        vlib_cli_output (vm, "FIB Path Lists");
-       pool_foreach(path_list, fib_path_list_pool,
-       ({
-           vlib_cli_output (vm, "%U", format_fib_path_list, path_list);
-       }));
+       pool_foreach_index (pli, fib_path_list_pool)
+        {
+           vlib_cli_output (vm, "%U", format_fib_path_list, pli, 0);
+       }
     }
     return (NULL);
 }