FIB path preference
[vpp.git] / src / vnet / fib / fib_entry_src.c
index aa1d5a2..ff73cbf 100644 (file)
@@ -17,6 +17,7 @@
 #include <vnet/dpo/load_balance.h>
 #include <vnet/dpo/mpls_label_dpo.h>
 #include <vnet/dpo/drop_dpo.h>
+#include <vnet/dpo/replicate_dpo.h>
 
 #include <vnet/fib/fib_entry_src.h>
 #include <vnet/fib/fib_table.h>
@@ -145,7 +146,7 @@ fib_entry_src_action_deinit (fib_entry_t *fib_entry,
        fib_entry_src_vft[source].fesv_deinit(esrc);
     }
 
-    vec_free(esrc->fes_path_exts);
+    fib_path_ext_list_flush(&esrc->fes_path_exts);
     vec_del1(fib_entry->fe_srcs, index);
 }
 
@@ -187,11 +188,12 @@ fib_entry_src_action_cover_update (fib_entry_t *fib_entry,
 
 typedef struct fib_entry_src_collect_forwarding_ctx_t_
 {
-    load_balance_path_t * next_hops;
+    load_balance_path_t *next_hops;
     const fib_entry_t *fib_entry;
     const fib_entry_src_t *esrc;
     fib_forward_chain_type_t fct;
-    int is_recursive;
+    int n_recursive_constrained;
+    u16 preference;
 } fib_entry_src_collect_forwarding_ctx_t;
 
 /**
@@ -202,10 +204,11 @@ load_balance_flags_t
 fib_entry_calc_lb_flags (fib_entry_src_collect_forwarding_ctx_t *ctx)
 {
     /**
-     * We'll use a LB map is the path-list has recursive paths.
+     * We'll use a LB map if the path-list has multiple recursive paths.
      * recursive paths implies BGP, and hence scale.
      */
-    if (ctx->is_recursive)
+    if (ctx->n_recursive_constrained > 1 &&
+        fib_path_list_is_popular(ctx->esrc->fes_pl))
     {
         return (LOAD_BALANCE_FLAG_USES_MAP);
     }
@@ -229,8 +232,6 @@ fib_forward_chain_type_t
 fib_entry_chain_type_fixup (const fib_entry_t *entry,
                            fib_forward_chain_type_t fct)
 {
-    ASSERT(FIB_FORW_CHAIN_TYPE_MPLS_EOS == fct);
-
     /*
      * The EOS chain is a tricky since one cannot know the adjacency
      * to link to without knowing what the packets payload protocol
@@ -238,6 +239,11 @@ fib_entry_chain_type_fixup (const fib_entry_t *entry,
      */
     fib_forward_chain_type_t dfct;
 
+    if (FIB_FORW_CHAIN_TYPE_MPLS_EOS != fct)
+    {
+        return (fct);
+    }
+
     dfct = fib_entry_get_default_chain_type(entry);
 
     if (FIB_FORW_CHAIN_TYPE_MPLS_EOS == dfct)
@@ -259,14 +265,78 @@ fib_entry_chain_type_fixup (const fib_entry_t *entry,
     return (dfct);
 }
 
-static int
+static void
+fib_entry_src_get_path_forwarding (fib_node_index_t path_index,
+                                   fib_entry_src_collect_forwarding_ctx_t *ctx)
+{
+    load_balance_path_t *nh;
+
+    /*
+     * no extension => no out-going label for this path. that's OK
+     * in the case of an IP or EOS chain, but not for non-EOS
+     */
+    switch (ctx->fct)
+    {
+    case FIB_FORW_CHAIN_TYPE_UNICAST_IP4:
+    case FIB_FORW_CHAIN_TYPE_UNICAST_IP6:
+    case FIB_FORW_CHAIN_TYPE_MCAST_IP4:
+    case FIB_FORW_CHAIN_TYPE_MCAST_IP6:
+        /*
+         * EOS traffic with no label to stack, we need the IP Adj
+         */
+        vec_add2(ctx->next_hops, nh, 1);
+
+        nh->path_index = path_index;
+        nh->path_weight = fib_path_get_weight(path_index);
+        fib_path_contribute_forwarding(path_index, ctx->fct, &nh->path_dpo);
+
+        break;
+    case FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS:
+        if (fib_path_is_exclusive(path_index) ||
+            fib_path_is_deag(path_index))
+        {
+            vec_add2(ctx->next_hops, nh, 1);
+
+            nh->path_index = path_index;
+            nh->path_weight = fib_path_get_weight(path_index);
+            fib_path_contribute_forwarding(path_index,
+                                           FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS,
+                                           &nh->path_dpo);
+        }
+        break;
+    case FIB_FORW_CHAIN_TYPE_MPLS_EOS:
+        {
+            /*
+             * no label. we need a chain based on the payload. fixup.
+             */
+            vec_add2(ctx->next_hops, nh, 1);
+
+            nh->path_index = path_index;
+            nh->path_weight = fib_path_get_weight(path_index);
+            fib_path_contribute_forwarding(path_index,
+                                           fib_entry_chain_type_fixup(ctx->fib_entry,
+                                                                      ctx->fct),
+                                           &nh->path_dpo);
+            fib_path_stack_mpls_disp(path_index,
+                                     ctx->fib_entry->fe_prefix.fp_payload_proto,
+                                     &nh->path_dpo);
+
+            break;
+        }
+    case FIB_FORW_CHAIN_TYPE_ETHERNET:
+    case FIB_FORW_CHAIN_TYPE_NSH:
+        ASSERT(0);
+        break;
+    }
+}
+
+static fib_path_list_walk_rc_t
 fib_entry_src_collect_forwarding (fib_node_index_t pl_index,
                                   fib_node_index_t path_index,
                                   void *arg)
 {
     fib_entry_src_collect_forwarding_ctx_t *ctx;
     fib_path_ext_t *path_ext;
-    int have_path_ext;
 
     ctx = arg;
 
@@ -275,97 +345,79 @@ fib_entry_src_collect_forwarding (fib_node_index_t pl_index,
      */
     if (!fib_path_is_resolved(path_index))
     {
-        return (!0);
+        return (FIB_PATH_LIST_WALK_CONTINUE);
     }
 
-    if (fib_path_is_recursive(path_index))
+    if (fib_path_is_recursive_constrained(path_index))
     {
-        ctx->is_recursive = 1;
+        ctx->n_recursive_constrained += 1;
     }
-
-    /*
-     * get the matching path-extension for the path being visited.
-     */
-    have_path_ext = 0;
-    vec_foreach(path_ext, ctx->esrc->fes_path_exts)
-    {
-        if (path_ext->fpe_path_index == path_index)
-        {
-            have_path_ext = 1;
-            break;
-        }
-    }
-    
-    if (have_path_ext &&
-        fib_entry_src_valid_out_label(path_ext->fpe_label_stack[0]))
+    if (0xffff == ctx->preference)
     {
         /*
-         * found a matching extension. stack it to obtain the forwarding
-         * info for this path.
+         * not set a preference yet, so the first path we encounter
+         * sets the preference we are collecting.
          */
-        ctx->next_hops = fib_path_ext_stack(path_ext, ctx->fib_entry, ctx->fct, ctx->next_hops);
+        ctx->preference = fib_path_get_preference(path_index);
     }
-    else
+    else if (ctx->preference != fib_path_get_preference(path_index))
     {
-        load_balance_path_t *nh;
-
         /*
-         * no extension => no out-going label for this path. that's OK
-         * in the case of an IP or EOS chain, but not for non-EOS
+         * this path does not belong to the same preference as the
+         * previous paths encountered. we are done now.
          */
-        switch (ctx->fct)
-        {
-        case FIB_FORW_CHAIN_TYPE_UNICAST_IP4:
-        case FIB_FORW_CHAIN_TYPE_UNICAST_IP6:
-        case FIB_FORW_CHAIN_TYPE_MCAST_IP4:
-        case FIB_FORW_CHAIN_TYPE_MCAST_IP6:
-            /*
-             * EOS traffic with no label to stack, we need the IP Adj
-             */
-            vec_add2(ctx->next_hops, nh, 1);
-
-            nh->path_index = path_index;
-            nh->path_weight = fib_path_get_weight(path_index);
-            fib_path_contribute_forwarding(path_index, ctx->fct, &nh->path_dpo);
+        return (FIB_PATH_LIST_WALK_STOP);
+    }
 
-            break;
-        case FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS:
-           if (fib_path_is_exclusive(path_index) ||
-               fib_path_is_deag(path_index))
-           {
-               vec_add2(ctx->next_hops, nh, 1);
+    /*
+     * get the matching path-extension for the path being visited.
+     */
+    path_ext = fib_path_ext_list_find_by_path_index(&ctx->esrc->fes_path_exts,
+                                                    path_index);
 
-               nh->path_index = path_index;
-               nh->path_weight = fib_path_get_weight(path_index);
-               fib_path_contribute_forwarding(path_index,
-                                              FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS,
-                                              &nh->path_dpo);
-           }
-            break;
-        case FIB_FORW_CHAIN_TYPE_MPLS_EOS:
+    if (NULL != path_ext)
+    {
+        switch (path_ext->fpe_type)
         {
+        case FIB_PATH_EXT_MPLS:
+            if (fib_entry_src_valid_out_label(path_ext->fpe_label_stack[0]))
+            {
+                /*
+                 * found a matching extension. stack it to obtain the forwarding
+                 * info for this path.
+                 */
+                ctx->next_hops =
+                    fib_path_ext_stack(path_ext,
+                                       ctx->fct,
+                                       fib_entry_chain_type_fixup(ctx->fib_entry,
+                                                                  ctx->fct),
+                                       ctx->next_hops);
+            }
+            else
+            {
+                fib_entry_src_get_path_forwarding(path_index, ctx);
+            }
+            break;
+        case FIB_PATH_EXT_ADJ:
+            if (FIB_PATH_EXT_ADJ_FLAG_REFINES_COVER & path_ext->fpe_adj_flags)
+            {
+                fib_entry_src_get_path_forwarding(path_index, ctx);
+            }
             /*
-             * no label. we need a chain based on the payload. fixup.
+             * else
+             *  the path does not refine the cover, meaning that
+             *  the adjacency doesdoes not match the sub-net on the link.
+             *  So this path does not contribute forwarding.
              */
-            vec_add2(ctx->next_hops, nh, 1);
-
-            nh->path_index = path_index;
-            nh->path_weight = fib_path_get_weight(path_index);
-            fib_path_contribute_forwarding(path_index,
-                                           fib_entry_chain_type_fixup(ctx->fib_entry,
-                                                                      ctx->fct),
-                                           &nh->path_dpo);
-
             break;
         }
-        case FIB_FORW_CHAIN_TYPE_ETHERNET:
-        case FIB_FORW_CHAIN_TYPE_NSH:
-           ASSERT(0);
-           break;
-        }
+    }
+    else
+    {
+        fib_entry_src_get_path_forwarding(path_index, ctx);
     }
 
-    return (!0);
+    return (FIB_PATH_LIST_WALK_CONTINUE);
 }
 
 void
@@ -385,8 +437,9 @@ fib_entry_src_mk_lb (fib_entry_t *fib_entry,
         .esrc = esrc,
         .fib_entry = fib_entry,
         .next_hops = NULL,
-        .is_recursive = 0,
+        .n_recursive_constrained = 0,
         .fct = fct,
+        .preference = 0xffff,
     };
 
     /*
@@ -397,7 +450,7 @@ fib_entry_src_mk_lb (fib_entry_t *fib_entry,
     vec_validate(ctx.next_hops, fib_path_list_get_n_paths(esrc->fes_pl));
     vec_reset_length(ctx.next_hops);
 
-    lb_proto = fib_proto_to_dpo(fib_entry->fe_prefix.fp_proto);
+    lb_proto = fib_forw_chain_type_to_dpo_proto(fct);
 
     fib_path_list_walk(esrc->fes_pl,
                        fib_entry_src_collect_forwarding,
@@ -424,50 +477,85 @@ fib_entry_src_mk_lb (fib_entry_t *fib_entry,
         /*
          * first time create
          */
-        flow_hash_config_t fhc;
-
-        fhc = fib_table_get_flow_hash_config(fib_entry->fe_fib_index,
-                                             dpo_proto_to_fib(lb_proto));
-        dpo_set(dpo_lb,
-                DPO_LOAD_BALANCE,
-                lb_proto,
-                load_balance_create(0, lb_proto, fhc));
-    }
-
-    load_balance_multipath_update(dpo_lb,
-                                  ctx.next_hops,
-                                  fib_entry_calc_lb_flags(&ctx));
-    vec_free(ctx.next_hops);
+        if (esrc->fes_entry_flags & FIB_ENTRY_FLAG_MULTICAST)
+        {
+            dpo_set(dpo_lb,
+                    DPO_REPLICATE,
+                    lb_proto,
+                    MPLS_IS_REPLICATE | replicate_create(0, lb_proto));
+        }
+        else
+        {
+            flow_hash_config_t fhc;
+            fib_protocol_t fp;
 
-    /*
-     * if this entry is sourced by the uRPF-exempt source then we
-     * append the always present local0 interface (index 0) to the
-     * uRPF list so it is not empty. that way packets pass the loose check.
-     */
-    index_t ui = fib_path_list_get_urpf(esrc->fes_pl);
+            /*
+             * if the protocol for the LB we are building does not match that
+             * of the fib_entry (i.e. we are build the [n]EOS LB for an IPv[46]
+             * then the fib_index is not an index that relates to the table
+             * type we need. So get the default flow-hash config instead.
+             */
+            fp = dpo_proto_to_fib(lb_proto);
+
+            if (fib_entry->fe_prefix.fp_proto != fp)
+            {
+                fhc = fib_table_get_default_flow_hash_config(fp);
+            }
+            else
+            {
+                fhc = fib_table_get_flow_hash_config(fib_entry->fe_fib_index, fp);
+            }
+            dpo_set(dpo_lb,
+                    DPO_LOAD_BALANCE,
+                    lb_proto,
+                    load_balance_create(0, lb_proto, fhc));
+        }
+    }
 
-    if ((fib_entry_is_sourced(fib_entry_get_index(fib_entry),
-                             FIB_SOURCE_URPF_EXEMPT) ||
-        (esrc->fes_entry_flags & FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT))&&
-       (0 == fib_urpf_check_size(ui)))
+    if (esrc->fes_entry_flags & FIB_ENTRY_FLAG_MULTICAST)
     {
-       /*
-        * The uRPF list we get from the path-list is shared by all
-        * other users of the list, but the uRPF exemption applies
-        * only to this prefix. So we need our own list.
-        */
-       ui = fib_urpf_list_alloc_and_lock();
-       fib_urpf_list_append(ui, 0);
-       fib_urpf_list_bake(ui);
-       load_balance_set_urpf(dpo_lb->dpoi_index, ui);
-       fib_urpf_list_unlock(ui);
+        /*
+         * MPLS multicast
+         */
+        replicate_multipath_update(dpo_lb, ctx.next_hops);
     }
     else
     {
-       load_balance_set_urpf(dpo_lb->dpoi_index, ui);
+        load_balance_multipath_update(dpo_lb,
+                                      ctx.next_hops,
+                                      fib_entry_calc_lb_flags(&ctx));
+        vec_free(ctx.next_hops);
+
+        /*
+         * if this entry is sourced by the uRPF-exempt source then we
+         * append the always present local0 interface (index 0) to the
+         * uRPF list so it is not empty. that way packets pass the loose check.
+         */
+        index_t ui = fib_path_list_get_urpf(esrc->fes_pl);
+
+        if ((fib_entry_is_sourced(fib_entry_get_index(fib_entry),
+                                  FIB_SOURCE_URPF_EXEMPT) ||
+             (esrc->fes_entry_flags & FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT))&&
+            (0 == fib_urpf_check_size(ui)))
+        {
+            /*
+             * The uRPF list we get from the path-list is shared by all
+             * other users of the list, but the uRPF exemption applies
+             * only to this prefix. So we need our own list.
+             */
+            ui = fib_urpf_list_alloc_and_lock();
+            fib_urpf_list_append(ui, 0);
+            fib_urpf_list_bake(ui);
+            load_balance_set_urpf(dpo_lb->dpoi_index, ui);
+            fib_urpf_list_unlock(ui);
+        }
+        else
+        {
+            load_balance_set_urpf(dpo_lb->dpoi_index, ui);
+        }
+        load_balance_set_fib_entry_flags(dpo_lb->dpoi_index,
+                                         fib_entry_get_flags_i(fib_entry));
     }
-    load_balance_set_fib_entry_flags(dpo_lb->dpoi_index,
-                                     fib_entry_get_flags_i(fib_entry));
 }
 
 void
@@ -887,21 +975,6 @@ fib_entry_src_action_remove (fib_entry_t *fib_entry,
     return (sflags);
 }
 
-static inline int
-fib_route_recurses_via_self (const fib_prefix_t *prefix,
-                            const fib_route_path_t *rpath)
-{
-    /*
-     * not all zeros next hop &&
-     * is recursive path &&
-     * nexthop is same as the route's address
-     */
-    return ((!ip46_address_is_zero(&rpath->frp_addr)) &&
-           (~0 == rpath->frp_sw_if_index) &&
-           (0 == ip46_address_cmp(&rpath->frp_addr, &prefix->fp_addr)));
-
-}
-
 /*
  * fib_route_attached_cross_table
  *
@@ -962,14 +1035,14 @@ fib_entry_src_flags_2_path_list_flags (fib_entry_flag_t eflags)
     {
        plf |= FIB_PATH_LIST_FLAG_DROP;
     }
-    if (eflags & FIB_ENTRY_FLAG_LOCAL)
-    {
-       plf |= FIB_PATH_LIST_FLAG_LOCAL;
-    }
     if (eflags & FIB_ENTRY_FLAG_EXCLUSIVE)
     {
        plf |= FIB_PATH_LIST_FLAG_EXCLUSIVE;
     }
+    if (eflags & FIB_ENTRY_FLAG_LOCAL)
+    {
+       plf |= FIB_PATH_LIST_FLAG_LOCAL;
+    }
 
     return (plf);
 }
@@ -980,25 +1053,6 @@ fib_entry_flags_update (const fib_entry_t *fib_entry,
                        fib_path_list_flags_t *pl_flags,
                        fib_entry_src_t *esrc)
 {
-    /*
-     * don't allow the addition of a recursive looped path for prefix
-     * via itself.
-     */
-    if (fib_route_recurses_via_self(&fib_entry->fe_prefix, rpath))     
-    {
-       /*
-        * force the install of a drop path-list.
-        * we want the entry to have some path-list, mainly so
-        * the dodgy path can be rmeoved when the source stops playing
-        * silly buggers.
-        */
-       *pl_flags |= FIB_PATH_LIST_FLAG_DROP;
-    }
-    else
-    {
-       *pl_flags &= ~FIB_PATH_LIST_FLAG_DROP;
-    }
-
     if ((esrc->fes_src == FIB_SOURCE_API) ||
        (esrc->fes_src == FIB_SOURCE_CLI))
     {
@@ -1021,58 +1075,6 @@ fib_entry_flags_update (const fib_entry_t *fib_entry,
     }
 }
 
-/*
- * fib_entry_src_path_ext_add
- *
- * append a path extension to the entry's list
- */
-static void
-fib_entry_src_path_ext_append (fib_entry_src_t *esrc,
-                              const fib_route_path_t *rpath)
-{
-    if (NULL != rpath->frp_label_stack)
-    {
-       fib_path_ext_t *path_ext;
-
-       vec_add2(esrc->fes_path_exts, path_ext, 1);
-
-       fib_path_ext_init(path_ext, esrc->fes_pl, rpath);
-    }
-}
-
-/*
- * fib_entry_src_path_ext_insert
- *
- * insert, sorted, a path extension to the entry's list.
- * It's not strictly necessary in sort the path extensions, since each
- * extension has the path index to which it resolves. However, by being
- * sorted the load-balance produced has a deterministic order, not an order
- * based on the sequence of extension additions. this is a considerable benefit.
- */
-static void
-fib_entry_src_path_ext_insert (fib_entry_src_t *esrc,
-                              const fib_route_path_t *rpath)
-{
-    if (0 == vec_len(esrc->fes_path_exts))
-       return (fib_entry_src_path_ext_append(esrc, rpath));
-
-    if (NULL != rpath->frp_label_stack)
-    {
-       fib_path_ext_t path_ext;
-       int i = 0;
-
-       fib_path_ext_init(&path_ext, esrc->fes_pl, rpath);
-
-       while (i < vec_len(esrc->fes_path_exts) &&
-              (fib_path_ext_cmp(&esrc->fes_path_exts[i], rpath) < 0))
-       {
-           i++;
-       }
-
-       vec_insert_elts(esrc->fes_path_exts, &path_ext, 1, i);
-    }
-}
-
 /*
  * fib_entry_src_action_add
  *
@@ -1089,7 +1091,6 @@ fib_entry_src_action_path_add (fib_entry_t *fib_entry,
 {
     fib_node_index_t old_path_list, fib_entry_index;
     fib_path_list_flags_t pl_flags;
-    fib_path_ext_t *path_ext;
     fib_entry_src_t *esrc;
 
     /*
@@ -1126,18 +1127,6 @@ fib_entry_src_action_path_add (fib_entry_t *fib_entry,
     fib_entry_src_vft[source].fesv_path_add(esrc, fib_entry, pl_flags, rpath);
     fib_entry = fib_entry_get(fib_entry_index);
 
-    /*
-     * re-resolve all the path-extensions with the new path-list
-     */
-    vec_foreach(path_ext, esrc->fes_path_exts)
-    {
-       fib_path_ext_resolve(path_ext, esrc->fes_pl);
-    }
-    /*
-     * if the path has a label we need to add a path extension
-     */
-    fib_entry_src_path_ext_insert(esrc, rpath);
-
     fib_path_list_lock(esrc->fes_pl);
     fib_path_list_unlock(old_path_list);
 
@@ -1162,7 +1151,6 @@ fib_entry_src_action_path_swap (fib_entry_t *fib_entry,
     fib_node_index_t old_path_list, fib_entry_index;
     fib_path_list_flags_t pl_flags;
     const fib_route_path_t *rpath;
-    fib_path_ext_t *path_ext;
     fib_entry_src_t *esrc;
 
     esrc = fib_entry_src_find(fib_entry, source, NULL);
@@ -1204,17 +1192,6 @@ fib_entry_src_action_path_swap (fib_entry_t *fib_entry,
                                             pl_flags,
                                             rpaths);
 
-    vec_foreach(path_ext, esrc->fes_path_exts)
-    {
-       vec_free(path_ext->fpe_label_stack);
-    }
-    vec_free(esrc->fes_path_exts);
-
-    vec_foreach(rpath, rpaths)
-    {
-       fib_entry_src_path_ext_append(esrc, rpath);
-    }
-
     fib_entry = fib_entry_get(fib_entry_index);
 
     fib_path_list_lock(esrc->fes_pl);
@@ -1230,7 +1207,6 @@ fib_entry_src_action_path_remove (fib_entry_t *fib_entry,
 {
     fib_path_list_flags_t pl_flags;
     fib_node_index_t old_path_list;
-    fib_path_ext_t *path_ext;
     fib_entry_src_t *esrc;
 
     esrc = fib_entry_src_find(fib_entry, source, NULL);
@@ -1252,29 +1228,6 @@ fib_entry_src_action_path_remove (fib_entry_t *fib_entry,
     fib_entry_flags_update(fib_entry, rpath, &pl_flags, esrc);
 
     fib_entry_src_vft[source].fesv_path_remove(esrc, pl_flags, rpath);
-    /*
-     * find the matching path extension and remove it
-     */
-    vec_foreach(path_ext, esrc->fes_path_exts)
-    {
-       if (!fib_path_ext_cmp(path_ext, rpath))
-       {
-           /*
-            * delete the element moving the remaining elements down 1 position.
-            * this preserves the sorted order.
-            */
-           vec_free(path_ext->fpe_label_stack);
-           vec_delete(esrc->fes_path_exts, 1, (path_ext - esrc->fes_path_exts));
-           break;
-       }
-    }
-    /*
-     * re-resolve all the path-extensions with the new path-list
-     */
-    vec_foreach(path_ext, esrc->fes_path_exts)
-    {
-       fib_path_ext_resolve(path_ext, esrc->fes_pl);
-    }
 
     /*
      * lock the new path-list, unlock the old if it had one