fib: format deleted LB
[vpp.git] / src / vnet / dpo / load_balance.c
index 8074555..8f2a0de 100644 (file)
@@ -13,7 +13,6 @@
  * limitations under the License.
  */
 
-#include <vnet/ip/lookup.h>
 #include <vnet/dpo/load_balance.h>
 #include <vnet/dpo/load_balance_map.h>
 #include <vnet/dpo/drop_dpo.h>
 #include <vnet/adj/adj.h>
 #include <vnet/adj/adj_internal.h>
 #include <vnet/fib/fib_urpf_list.h>
-#include <vnet/bier/bier_hdr_inlines.h>
+#include <vnet/bier/bier_fwd.h>
+#include <vnet/fib/mpls_fib.h>
+#include <vnet/ip/ip4_inlines.h>
+#include <vnet/ip/ip6_inlines.h>
+
+// clang-format off
 
 /*
  * distribution error tolerance for load-balancing
  */
 const f64 multipath_next_hop_error_tolerance = 0.1;
 
-#undef LB_DEBUG
+static const char *load_balance_attr_names[] = LOAD_BALANCE_ATTR_NAMES;
+
+/**
+ * the logger
+ */
+vlib_log_class_t load_balance_logger;
 
-#ifdef LB_DEBUG
 #define LB_DBG(_lb, _fmt, _args...)                                     \
 {                                                                       \
-    u8* _tmp =NULL;                                                     \
-    clib_warning("lb:[%s]:" _fmt,                                       \
-                 load_balance_format(load_balance_get_index((_lb)),     \
-                                     0, _tmp),                          \
-                 ##_args);                                              \
-    vec_free(_tmp);                                                     \
+    vlib_log_debug(load_balance_logger,                                 \
+                   "lb:[%U]:" _fmt,                                     \
+                   format_load_balance, load_balance_get_index(_lb),    \
+                   LOAD_BALANCE_FORMAT_NONE,                            \
+                   ##_args);                                            \
 }
-#else
-#define LB_DBG(_p, _fmt, _args...)
-#endif
-
 
 /**
  * Pool of all DPOs. It's not static so the DP can have fast access
@@ -53,7 +56,16 @@ load_balance_t *load_balance_pool;
 /**
  * The one instance of load-balance main
  */
-load_balance_main_t load_balance_main;
+load_balance_main_t load_balance_main = {
+    .lbm_to_counters = {
+        .name = "route-to",
+        .stat_segment_name = "/net/route/to",
+    },
+    .lbm_via_counters = {
+        .name = "route-via",
+        .stat_segment_name = "/net/route/via",
+    }
+};
 
 f64
 load_balance_get_multipath_tolerance (void)
@@ -84,12 +96,33 @@ static load_balance_t *
 load_balance_alloc_i (void)
 {
     load_balance_t *lb;
+    u8 need_barrier_sync = 0;
+    vlib_main_t *vm = vlib_get_main();
+    ASSERT (vm->thread_index == 0);
+
+    need_barrier_sync = pool_get_will_expand (load_balance_pool);
+
+    if (need_barrier_sync)
+        vlib_worker_thread_barrier_sync (vm);
 
     pool_get_aligned(load_balance_pool, lb, CLIB_CACHE_LINE_BYTES);
-    memset(lb, 0, sizeof(*lb));
+    clib_memset(lb, 0, sizeof(*lb));
 
     lb->lb_map = INDEX_INVALID;
     lb->lb_urpf = INDEX_INVALID;
+
+    if (need_barrier_sync == 0)
+    {
+        need_barrier_sync += vlib_validate_combined_counter_will_expand
+            (&(load_balance_main.lbm_to_counters),
+             load_balance_get_index(lb));
+        need_barrier_sync += vlib_validate_combined_counter_will_expand
+            (&(load_balance_main.lbm_via_counters),
+             load_balance_get_index(lb));
+        if (need_barrier_sync)
+            vlib_worker_thread_barrier_sync (vm);
+    }
+
     vlib_validate_combined_counter(&(load_balance_main.lbm_to_counters),
                                    load_balance_get_index(lb));
     vlib_validate_combined_counter(&(load_balance_main.lbm_via_counters),
@@ -99,6 +132,9 @@ load_balance_alloc_i (void)
     vlib_zero_combined_counter(&(load_balance_main.lbm_via_counters),
                                load_balance_get_index(lb));
 
+    if (need_barrier_sync)
+        vlib_worker_thread_barrier_release (vm);
+
     return (lb);
 }
 
@@ -113,7 +149,13 @@ load_balance_format (index_t lbi,
     dpo_id_t *buckets;
     u32 i;
 
-    lb = load_balance_get(lbi);
+    lb = load_balance_get_or_null(lbi);
+    if (lb == NULL)
+      {
+       s = format(s, "DELETED lb:%u", lbi);
+       return (s);
+      }
+
     vlib_get_combined_counter(&(load_balance_main.lbm_to_counters), lbi, &to);
     vlib_get_combined_counter(&(load_balance_main.lbm_via_counters), lbi, &via);
     buckets = load_balance_get_buckets(lb);
@@ -122,6 +164,21 @@ load_balance_format (index_t lbi,
     s = format(s, "[proto:%U ", format_dpo_proto, lb->lb_proto);
     s = format(s, "index:%d buckets:%d ", lbi, lb->lb_n_buckets);
     s = format(s, "uRPF:%d ", lb->lb_urpf);
+    if (lb->lb_flags)
+    {
+        load_balance_attr_t attr;
+
+        s = format(s, "flags:[");
+
+        FOR_EACH_LOAD_BALANCE_ATTR(attr)
+        {
+            if (lb->lb_flags & (1 << attr))
+            {
+                s = format (s, "%s", load_balance_attr_names[attr]);
+            }
+        }
+        s = format(s, "] ");
+    }
     s = format(s, "to:[%Ld:%Ld]", to.packets, to.bytes);
     if (0 != via.packets)
     {
@@ -155,6 +212,7 @@ format_load_balance (u8 * s, va_list * args)
 
     return (load_balance_format(lbi, flags, 0, s));
 }
+
 static u8*
 format_load_balance_dpo (u8 * s, va_list * args)
 {
@@ -164,6 +222,26 @@ format_load_balance_dpo (u8 * s, va_list * args)
     return (load_balance_format(lbi, LOAD_BALANCE_FORMAT_DETAIL, indent, s));
 }
 
+flow_hash_config_t
+load_balance_get_default_flow_hash (dpo_proto_t lb_proto)
+{
+    switch (lb_proto)
+    {
+    case DPO_PROTO_IP4:
+    case DPO_PROTO_IP6:
+        return (IP_FLOW_HASH_DEFAULT);
+
+    case DPO_PROTO_MPLS:
+        return (MPLS_FLOW_HASH_DEFAULT);
+
+    case DPO_PROTO_ETHERNET:
+    case DPO_PROTO_BIER:
+    case DPO_PROTO_NSH:
+        break;
+    }
+
+    return (0);
+}
 
 static load_balance_t *
 load_balance_create_i (u32 num_buckets,
@@ -172,6 +250,8 @@ load_balance_create_i (u32 num_buckets,
 {
     load_balance_t *lb;
 
+    ASSERT (num_buckets <= LB_MAX_BUCKETS);
+
     lb = load_balance_alloc_i();
     lb->lb_hash_config = fhc;
     lb->lb_n_buckets = num_buckets;
@@ -240,6 +320,16 @@ load_balance_is_drop (const dpo_id_t *dpo)
     return (0);
 }
 
+u16
+load_balance_n_buckets (index_t lbi)
+{
+    load_balance_t *lb;
+
+    lb = load_balance_get(lbi);
+
+    return (lb->lb_n_buckets);
+}
+
 void
 load_balance_set_fib_entry_flags (index_t lbi,
                                   fib_entry_flag_t flags)
@@ -326,7 +416,7 @@ ip_multipath_normalize_next_hops (const load_balance_path_t * raw_next_hops,
     {
         nhs[0] = raw_next_hops[0];
         nhs[0].path_weight = 1;
-        _vec_len (nhs) = 1;
+        vec_set_len (nhs, 1);
         sum_weight = 1;
         goto done;
     }
@@ -343,14 +433,14 @@ ip_multipath_normalize_next_hops (const load_balance_path_t * raw_next_hops,
         if (nhs[0].path_weight == nhs[1].path_weight)
         {
             nhs[0].path_weight = nhs[1].path_weight = 1;
-            _vec_len (nhs) = 2;
+            vec_set_len (nhs, 2);
             sum_weight = 2;
             goto done;
         }
     }
     else
     {
-        clib_memcpy (nhs, raw_next_hops, n_nhs * sizeof (raw_next_hops[0]));
+        clib_memcpy_fast (nhs, raw_next_hops, n_nhs * sizeof (raw_next_hops[0]));
         qsort (nhs, n_nhs, sizeof (nhs[0]), (void *) next_hop_sort_by_weight);
     }
 
@@ -373,8 +463,9 @@ ip_multipath_normalize_next_hops (const load_balance_path_t * raw_next_hops,
 
     /* Try larger and larger power of 2 sized adjacency blocks until we
        find one where traffic flows to within 1% of specified weights. */
-    for (n_adj = max_pow2 (n_nhs); ; n_adj *= 2)
+    for (n_adj = clib_min(max_pow2 (n_nhs), LB_MAX_BUCKETS); ; n_adj *= 2)
     {
+        ASSERT (n_adj <= LB_MAX_BUCKETS);
         error = 0;
 
         norm = n_adj / ((f64) sum_weight);
@@ -394,7 +485,7 @@ ip_multipath_normalize_next_hops (const load_balance_path_t * raw_next_hops,
                 /*
                  * when the weight skew is high (norm is small) and n == nf.
                  * without this correction the path with a low weight would have
-                 * no represenation in the load-balanace - don't want that.
+                 * no representation in the load-balanace - don't want that.
                  * If the weight skew is high so the load-balance has many buckets
                  * to allow it. pays ya money takes ya choice.
                  */
@@ -405,12 +496,22 @@ ip_multipath_normalize_next_hops (const load_balance_path_t * raw_next_hops,
 
         nhs[0].path_weight += n_adj_left;
 
-        /* Less than 5% average error per adjacency with this size adjacency block? */
-        if (error <= multipath_next_hop_error_tolerance*n_adj)
+        /* Less than 1% average error per adjacency with this size adjacency block,
+         * or did we reached the maximum number of buckets we support? */
+        if (error <= multipath_next_hop_error_tolerance*n_adj ||
+            n_adj >= LB_MAX_BUCKETS)
         {
-            /* Truncate any next hops with zero weight. */
-            _vec_len (nhs) = i;
-            break;
+          if (i < n_nhs)
+          {
+            /* Truncate any next hops in excess */
+            vlib_log_err(load_balance_logger,
+                         "Too many paths for load-balance, truncating %d -> %d",
+                         n_nhs, i);
+            for (int j = i; j < n_nhs; j++)
+              dpo_reset (&vec_elt(nhs, j).path_dpo);
+          }
+          vec_set_len (nhs, i);
+          break;
         }
     }
 
@@ -448,12 +549,12 @@ load_balance_multipath_next_hop_fixup (const load_balance_path_t *nhs,
  * next hop adjacencies.
  */
 static void
-load_balance_fill_buckets (load_balance_t *lb,
-                           load_balance_path_t *nhs,
-                           dpo_id_t *buckets,
-                           u32 n_buckets)
+load_balance_fill_buckets_norm (load_balance_t *lb,
+                                load_balance_path_t *nhs,
+                                dpo_id_t *buckets,
+                                u32 n_buckets)
 {
-    load_balance_path_t * nh;
+    load_balance_path_t *nh;
     u16 ii, bucket;
 
     bucket = 0;
@@ -471,11 +572,76 @@ load_balance_fill_buckets (load_balance_t *lb,
         }
     }
 }
+static void
+load_balance_fill_buckets_sticky (load_balance_t *lb,
+                                  load_balance_path_t *nhs,
+                                  dpo_id_t *buckets,
+                                  u32 n_buckets)
+{
+    load_balance_path_t *nh, *fwding_paths;
+    u16 ii, bucket, fpath;
+
+    fpath = bucket = 0;
+    fwding_paths = NULL;
+
+    vec_foreach (nh, nhs)
+    {
+        if (!dpo_is_drop(&nh->path_dpo))
+        {
+            vec_add1(fwding_paths, *nh);
+        }
+    }
+    if (vec_len(fwding_paths) == 0)
+        fwding_paths = vec_dup(nhs);
+
+    /*
+     * the next-hops have normalised weights. that means their sum is the number
+     * of buckets we need to fill.
+     */
+    vec_foreach (nh, nhs)
+    {
+        for (ii = 0; ii < nh->path_weight; ii++)
+        {
+            ASSERT(bucket < n_buckets);
+            if (!dpo_is_drop(&nh->path_dpo))
+            {
+                load_balance_set_bucket_i(lb, bucket++, buckets, &nh->path_dpo);
+            }
+            else
+            {
+                /* fill the bucks from the next up path */
+                load_balance_set_bucket_i(lb, bucket++, buckets, &fwding_paths[fpath].path_dpo);
+                ASSERT(vec_len(fwding_paths) > 0);
+                fpath = (fpath + 1) % vec_len(fwding_paths);
+            }
+        }
+    }
+
+    vec_free(fwding_paths);
+}
+
+static void
+load_balance_fill_buckets (load_balance_t *lb,
+                           load_balance_path_t *nhs,
+                           dpo_id_t *buckets,
+                           u32 n_buckets,
+                           load_balance_flags_t flags)
+{
+    if (flags & LOAD_BALANCE_FLAG_STICKY)
+    {
+        load_balance_fill_buckets_sticky(lb, nhs, buckets, n_buckets);
+    }
+    else
+    {
+        load_balance_fill_buckets_norm(lb, nhs, buckets, n_buckets);
+    }
+}
 
 static inline void
 load_balance_set_n_buckets (load_balance_t *lb,
                             u32 n_buckets)
 {
+    ASSERT (n_buckets <= LB_MAX_BUCKETS);
     lb->lb_n_buckets = n_buckets;
     lb->lb_n_buckets_minus_1 = n_buckets-1;
 }
@@ -495,6 +661,7 @@ load_balance_multipath_update (const dpo_id_t *dpo,
 
     ASSERT(DPO_LOAD_BALANCE == dpo->dpoi_type);
     lb = load_balance_get(dpo->dpoi_index);
+    lb->lb_flags = flags;
     fixed_nhs = load_balance_multipath_next_hop_fixup(raw_nhs, lb->lb_proto);
     n_buckets =
         ip_multipath_normalize_next_hops((NULL == fixed_nhs ?
@@ -504,8 +671,6 @@ load_balance_multipath_update (const dpo_id_t *dpo,
                                          &sum_of_weights,
                                          multipath_next_hop_error_tolerance);
 
-    ASSERT (n_buckets >= vec_len (raw_nhs));
-
     /*
      * Save the old load-balance map used, and get a new one if required.
      */
@@ -534,7 +699,7 @@ load_balance_multipath_update (const dpo_id_t *dpo,
 
         load_balance_fill_buckets(lb, nhs,
                                   load_balance_get_buckets(lb),
-                                  n_buckets);
+                                  n_buckets, flags);
         lb->lb_map = lbmi;
     }
     else
@@ -555,7 +720,7 @@ load_balance_multipath_update (const dpo_id_t *dpo,
              */
             load_balance_fill_buckets(lb, nhs,
                                       load_balance_get_buckets(lb),
-                                      n_buckets);
+                                      n_buckets, flags);
             lb->lb_map = lbmi;
         }
         else if (n_buckets > lb->lb_n_buckets)
@@ -580,7 +745,7 @@ load_balance_multipath_update (const dpo_id_t *dpo,
 
                 load_balance_fill_buckets(lb, nhs,
                                           lb->lb_buckets,
-                                          n_buckets);
+                                          n_buckets, flags);
                 CLIB_MEMORY_BARRIER();
                 load_balance_set_n_buckets(lb, n_buckets);
 
@@ -601,7 +766,7 @@ load_balance_multipath_update (const dpo_id_t *dpo,
                      */
                     load_balance_fill_buckets(lb, nhs,
                                               load_balance_get_buckets(lb),
-                                              n_buckets);
+                                              n_buckets, flags);
                     CLIB_MEMORY_BARRIER();
                     load_balance_set_n_buckets(lb, n_buckets);
                 }
@@ -620,7 +785,8 @@ load_balance_multipath_update (const dpo_id_t *dpo,
                                          n_buckets - 1,
                                          CLIB_CACHE_LINE_BYTES);
 
-                    load_balance_fill_buckets(lb, nhs, new_buckets, n_buckets);
+                    load_balance_fill_buckets(lb, nhs, new_buckets,
+                                              n_buckets, flags);
                     CLIB_MEMORY_BARRIER();
                     lb->lb_buckets = new_buckets;
                     CLIB_MEMORY_BARRIER();
@@ -664,7 +830,7 @@ load_balance_multipath_update (const dpo_id_t *dpo,
                  */
                 load_balance_fill_buckets(lb, nhs,
                                           lb->lb_buckets_inline,
-                                          n_buckets);
+                                          n_buckets, flags);
                 CLIB_MEMORY_BARRIER();
                 load_balance_set_n_buckets(lb, n_buckets);
                 CLIB_MEMORY_BARRIER();
@@ -692,9 +858,8 @@ load_balance_multipath_update (const dpo_id_t *dpo,
                 load_balance_set_n_buckets(lb, n_buckets);
                 CLIB_MEMORY_BARRIER();
 
-                load_balance_fill_buckets(lb, nhs,
-                                          buckets,
-                                          n_buckets);
+                load_balance_fill_buckets(lb, nhs, buckets,
+                                          n_buckets, flags);
 
                 for (ii = n_buckets; ii < old_n_buckets; ii++)
                 {
@@ -774,11 +939,30 @@ load_balance_mem_show (void)
     load_balance_map_show_mem();
 }
 
+static u16
+load_balance_dpo_get_mtu (const dpo_id_t *dpo)
+{
+    const dpo_id_t *buckets;
+    load_balance_t *lb;
+    u16 i, mtu = 0xffff;
+
+    lb = load_balance_get(dpo->dpoi_index);
+    buckets = load_balance_get_buckets(lb);
+
+    for (i = 0; i < lb->lb_n_buckets; i++)
+    {
+        mtu = clib_min (mtu, dpo_get_mtu (&buckets[i]));
+    }
+
+    return (mtu);
+}
+
 const static dpo_vft_t lb_vft = {
     .dv_lock = load_balance_lock,
     .dv_unlock = load_balance_unlock,
     .dv_format = format_load_balance_dpo,
     .dv_mem_show = load_balance_mem_show,
+    .dv_get_mtu = load_balance_dpo_get_mtu,
 };
 
 /**
@@ -848,6 +1032,9 @@ load_balance_module_init (void)
     lbi = load_balance_create(1, DPO_PROTO_IP4, 0);
     load_balance_set_bucket(lbi, 0, drop_dpo_get(DPO_PROTO_IP4));
 
+    load_balance_logger =
+        vlib_log_register_class("dpo", "load-balance");
+
     load_balance_map_module_init();
 }
 
@@ -868,19 +1055,26 @@ load_balance_show (vlib_main_t * vm,
 
     if (INDEX_INVALID != lbi)
     {
-        vlib_cli_output (vm, "%U", format_load_balance, lbi,
+        if (pool_is_free_index(load_balance_pool, lbi))
+        {
+            vlib_cli_output (vm, "no such load-balance:%d", lbi);
+        }
+        else
+        {
+            vlib_cli_output (vm, "%U", format_load_balance, lbi,
                          LOAD_BALANCE_FORMAT_DETAIL);
+        }
     }
     else
     {
         load_balance_t *lb;
 
-        pool_foreach(lb, load_balance_pool,
-        ({
+        pool_foreach (lb, load_balance_pool)
+         {
             vlib_cli_output (vm, "%U", format_load_balance,
                              load_balance_get_index(lb),
                              LOAD_BALANCE_FORMAT_NONE);
-        }));
+        }
     }
 
     return 0;
@@ -989,10 +1183,10 @@ load_balance_inline (vlib_main_t * vm,
          {
              /* it's BIER */
              const bier_hdr_t *bh0 = vlib_buffer_get_current(b0);
-             vnet_buffer(b0)->ip.flow_hash = bier_hdr_get_entropy(bh0);
+             vnet_buffer(b0)->ip.flow_hash = bier_compute_flow_hash(bh0);
          }
 
-         dpo0 = load_balance_get_bucket_i(lb0, 
+         dpo0 = load_balance_get_bucket_i(lb0,
                                           vnet_buffer(b0)->ip.flow_hash &
                                           (lb0->lb_n_buckets_minus_1));
 
@@ -1169,3 +1363,5 @@ VLIB_REGISTER_NODE (bier_load_balance_node) = {
   .format_trace = format_bier_load_balance_trace,
   .sibling_of = "mpls-load-balance",
 };
+
+// clang-format on