misc: move to new pool_foreach macros
[vpp.git] / src / vnet / dpo / load_balance.c
index bb38233..fb876a0 100644 (file)
@@ -13,7 +13,6 @@
  * limitations under the License.
  */
 
-#include <vnet/ip/lookup.h>
 #include <vnet/dpo/load_balance.h>
 #include <vnet/dpo/load_balance_map.h>
 #include <vnet/dpo/drop_dpo.h>
 #include <vnet/adj/adj.h>
 #include <vnet/adj/adj_internal.h>
 #include <vnet/fib/fib_urpf_list.h>
-#include <vnet/bier/bier_hdr_inlines.h>
+#include <vnet/bier/bier_fwd.h>
+#include <vnet/fib/mpls_fib.h>
+#include <vnet/ip/ip4_inlines.h>
+#include <vnet/ip/ip6_inlines.h>
 
 /*
  * distribution error tolerance for load-balancing
  */
 const f64 multipath_next_hop_error_tolerance = 0.1;
 
-#undef LB_DEBUG
+static const char *load_balance_attr_names[] = LOAD_BALANCE_ATTR_NAMES;
+
+/**
+ * the logger
+ */
+vlib_log_class_t load_balance_logger;
 
-#ifdef LB_DEBUG
 #define LB_DBG(_lb, _fmt, _args...)                                     \
 {                                                                       \
-    u8* _tmp =NULL;                                                     \
-    clib_warning("lb:[%s]:" _fmt,                                       \
-                 load_balance_format(load_balance_get_index((_lb)),     \
-                                     0, _tmp),                          \
-                 ##_args);                                              \
-    vec_free(_tmp);                                                     \
+    vlib_log_debug(load_balance_logger,                                 \
+                   "lb:[%U]:" _fmt,                                     \
+                   format_load_balance, load_balance_get_index(_lb),    \
+                   LOAD_BALANCE_FORMAT_NONE,                            \
+                   ##_args);                                            \
 }
-#else
-#define LB_DBG(_p, _fmt, _args...)
-#endif
-
 
 /**
  * Pool of all DPOs. It's not static so the DP can have fast access
@@ -53,7 +54,16 @@ load_balance_t *load_balance_pool;
 /**
  * The one instance of load-balance main
  */
-load_balance_main_t load_balance_main;
+load_balance_main_t load_balance_main = {
+    .lbm_to_counters = {
+        .name = "route-to",
+        .stat_segment_name = "/net/route/to",
+    },
+    .lbm_via_counters = {
+        .name = "route-via",
+        .stat_segment_name = "/net/route/via",
+    }
+};
 
 f64
 load_balance_get_multipath_tolerance (void)
@@ -84,12 +94,33 @@ static load_balance_t *
 load_balance_alloc_i (void)
 {
     load_balance_t *lb;
+    u8 need_barrier_sync = 0;
+    vlib_main_t *vm = vlib_get_main();
+    ASSERT (vm->thread_index == 0);
+
+    pool_get_aligned_will_expand (load_balance_pool, need_barrier_sync,
+                                  CLIB_CACHE_LINE_BYTES);
+    if (need_barrier_sync)
+        vlib_worker_thread_barrier_sync (vm);
 
     pool_get_aligned(load_balance_pool, lb, CLIB_CACHE_LINE_BYTES);
-    memset(lb, 0, sizeof(*lb));
+    clib_memset(lb, 0, sizeof(*lb));
 
     lb->lb_map = INDEX_INVALID;
     lb->lb_urpf = INDEX_INVALID;
+
+    if (need_barrier_sync == 0)
+    {
+        need_barrier_sync += vlib_validate_combined_counter_will_expand
+            (&(load_balance_main.lbm_to_counters),
+             load_balance_get_index(lb));
+        need_barrier_sync += vlib_validate_combined_counter_will_expand
+            (&(load_balance_main.lbm_via_counters),
+             load_balance_get_index(lb));
+        if (need_barrier_sync)
+            vlib_worker_thread_barrier_sync (vm);
+    }
+
     vlib_validate_combined_counter(&(load_balance_main.lbm_to_counters),
                                    load_balance_get_index(lb));
     vlib_validate_combined_counter(&(load_balance_main.lbm_via_counters),
@@ -99,6 +130,9 @@ load_balance_alloc_i (void)
     vlib_zero_combined_counter(&(load_balance_main.lbm_via_counters),
                                load_balance_get_index(lb));
 
+    if (need_barrier_sync)
+        vlib_worker_thread_barrier_release (vm);
+
     return (lb);
 }
 
@@ -122,6 +156,21 @@ load_balance_format (index_t lbi,
     s = format(s, "[proto:%U ", format_dpo_proto, lb->lb_proto);
     s = format(s, "index:%d buckets:%d ", lbi, lb->lb_n_buckets);
     s = format(s, "uRPF:%d ", lb->lb_urpf);
+    if (lb->lb_flags)
+    {
+        load_balance_attr_t attr;
+
+        s = format(s, "flags:[");
+
+        FOR_EACH_LOAD_BALANCE_ATTR(attr)
+        {
+            if (lb->lb_flags & (1 << attr))
+            {
+                s = format (s, "%s", load_balance_attr_names[attr]);
+            }
+        }
+        s = format(s, "] ");
+    }
     s = format(s, "to:[%Ld:%Ld]", to.packets, to.bytes);
     if (0 != via.packets)
     {
@@ -155,6 +204,7 @@ format_load_balance (u8 * s, va_list * args)
 
     return (load_balance_format(lbi, flags, 0, s));
 }
+
 static u8*
 format_load_balance_dpo (u8 * s, va_list * args)
 {
@@ -164,6 +214,26 @@ format_load_balance_dpo (u8 * s, va_list * args)
     return (load_balance_format(lbi, LOAD_BALANCE_FORMAT_DETAIL, indent, s));
 }
 
+flow_hash_config_t
+load_balance_get_default_flow_hash (dpo_proto_t lb_proto)
+{
+    switch (lb_proto)
+    {
+    case DPO_PROTO_IP4:
+    case DPO_PROTO_IP6:
+        return (IP_FLOW_HASH_DEFAULT);
+
+    case DPO_PROTO_MPLS:
+        return (MPLS_FLOW_HASH_DEFAULT);
+
+    case DPO_PROTO_ETHERNET:
+    case DPO_PROTO_BIER:
+    case DPO_PROTO_NSH:
+        break;
+    }
+
+    return (0);
+}
 
 static load_balance_t *
 load_balance_create_i (u32 num_buckets,
@@ -360,7 +430,7 @@ ip_multipath_normalize_next_hops (const load_balance_path_t * raw_next_hops,
     }
     else
     {
-        clib_memcpy (nhs, raw_next_hops, n_nhs * sizeof (raw_next_hops[0]));
+        clib_memcpy_fast (nhs, raw_next_hops, n_nhs * sizeof (raw_next_hops[0]));
         qsort (nhs, n_nhs, sizeof (nhs[0]), (void *) next_hop_sort_by_weight);
     }
 
@@ -404,7 +474,7 @@ ip_multipath_normalize_next_hops (const load_balance_path_t * raw_next_hops,
                 /*
                  * when the weight skew is high (norm is small) and n == nf.
                  * without this correction the path with a low weight would have
-                 * no represenation in the load-balanace - don't want that.
+                 * no representation in the load-balanace - don't want that.
                  * If the weight skew is high so the load-balance has many buckets
                  * to allow it. pays ya money takes ya choice.
                  */
@@ -458,12 +528,12 @@ load_balance_multipath_next_hop_fixup (const load_balance_path_t *nhs,
  * next hop adjacencies.
  */
 static void
-load_balance_fill_buckets (load_balance_t *lb,
-                           load_balance_path_t *nhs,
-                           dpo_id_t *buckets,
-                           u32 n_buckets)
+load_balance_fill_buckets_norm (load_balance_t *lb,
+                                load_balance_path_t *nhs,
+                                dpo_id_t *buckets,
+                                u32 n_buckets)
 {
-    load_balance_path_t * nh;
+    load_balance_path_t *nh;
     u16 ii, bucket;
 
     bucket = 0;
@@ -481,6 +551,69 @@ load_balance_fill_buckets (load_balance_t *lb,
         }
     }
 }
+static void
+load_balance_fill_buckets_sticky (load_balance_t *lb,
+                                  load_balance_path_t *nhs,
+                                  dpo_id_t *buckets,
+                                  u32 n_buckets)
+{
+    load_balance_path_t *nh, *fwding_paths;
+    u16 ii, bucket, fpath;
+
+    fpath = bucket = 0;
+    fwding_paths = NULL;
+
+    vec_foreach (nh, nhs)
+    {
+        if (!dpo_is_drop(&nh->path_dpo))
+        {
+            vec_add1(fwding_paths, *nh);
+        }
+    }
+    if (vec_len(fwding_paths) == 0)
+        fwding_paths = vec_dup(nhs);
+
+    /*
+     * the next-hops have normalised weights. that means their sum is the number
+     * of buckets we need to fill.
+     */
+    vec_foreach (nh, nhs)
+    {
+        for (ii = 0; ii < nh->path_weight; ii++)
+        {
+            ASSERT(bucket < n_buckets);
+            if (!dpo_is_drop(&nh->path_dpo))
+            {
+                load_balance_set_bucket_i(lb, bucket++, buckets, &nh->path_dpo);
+            }
+            else
+            {
+                /* fill the bucks from the next up path */
+                load_balance_set_bucket_i(lb, bucket++, buckets, &fwding_paths[fpath].path_dpo);
+                fpath = (fpath + 1) % vec_len(fwding_paths);
+            }
+        }
+    }
+
+    vec_free(fwding_paths);
+}
+
+static void
+load_balance_fill_buckets (load_balance_t *lb,
+                           load_balance_path_t *nhs,
+                           dpo_id_t *buckets,
+                           u32 n_buckets,
+                           load_balance_flags_t flags)
+{
+    if (flags & LOAD_BALANCE_FLAG_STICKY)
+    {
+        load_balance_fill_buckets_sticky(lb, nhs, buckets, n_buckets);
+    }
+    else
+    {
+        load_balance_fill_buckets_norm(lb, nhs, buckets, n_buckets);
+    }
+}
 
 static inline void
 load_balance_set_n_buckets (load_balance_t *lb,
@@ -505,6 +638,7 @@ load_balance_multipath_update (const dpo_id_t *dpo,
 
     ASSERT(DPO_LOAD_BALANCE == dpo->dpoi_type);
     lb = load_balance_get(dpo->dpoi_index);
+    lb->lb_flags = flags;
     fixed_nhs = load_balance_multipath_next_hop_fixup(raw_nhs, lb->lb_proto);
     n_buckets =
         ip_multipath_normalize_next_hops((NULL == fixed_nhs ?
@@ -544,7 +678,7 @@ load_balance_multipath_update (const dpo_id_t *dpo,
 
         load_balance_fill_buckets(lb, nhs,
                                   load_balance_get_buckets(lb),
-                                  n_buckets);
+                                  n_buckets, flags);
         lb->lb_map = lbmi;
     }
     else
@@ -565,7 +699,7 @@ load_balance_multipath_update (const dpo_id_t *dpo,
              */
             load_balance_fill_buckets(lb, nhs,
                                       load_balance_get_buckets(lb),
-                                      n_buckets);
+                                      n_buckets, flags);
             lb->lb_map = lbmi;
         }
         else if (n_buckets > lb->lb_n_buckets)
@@ -590,7 +724,7 @@ load_balance_multipath_update (const dpo_id_t *dpo,
 
                 load_balance_fill_buckets(lb, nhs,
                                           lb->lb_buckets,
-                                          n_buckets);
+                                          n_buckets, flags);
                 CLIB_MEMORY_BARRIER();
                 load_balance_set_n_buckets(lb, n_buckets);
 
@@ -611,7 +745,7 @@ load_balance_multipath_update (const dpo_id_t *dpo,
                      */
                     load_balance_fill_buckets(lb, nhs,
                                               load_balance_get_buckets(lb),
-                                              n_buckets);
+                                              n_buckets, flags);
                     CLIB_MEMORY_BARRIER();
                     load_balance_set_n_buckets(lb, n_buckets);
                 }
@@ -630,7 +764,8 @@ load_balance_multipath_update (const dpo_id_t *dpo,
                                          n_buckets - 1,
                                          CLIB_CACHE_LINE_BYTES);
 
-                    load_balance_fill_buckets(lb, nhs, new_buckets, n_buckets);
+                    load_balance_fill_buckets(lb, nhs, new_buckets,
+                                              n_buckets, flags);
                     CLIB_MEMORY_BARRIER();
                     lb->lb_buckets = new_buckets;
                     CLIB_MEMORY_BARRIER();
@@ -674,7 +809,7 @@ load_balance_multipath_update (const dpo_id_t *dpo,
                  */
                 load_balance_fill_buckets(lb, nhs,
                                           lb->lb_buckets_inline,
-                                          n_buckets);
+                                          n_buckets, flags);
                 CLIB_MEMORY_BARRIER();
                 load_balance_set_n_buckets(lb, n_buckets);
                 CLIB_MEMORY_BARRIER();
@@ -702,9 +837,8 @@ load_balance_multipath_update (const dpo_id_t *dpo,
                 load_balance_set_n_buckets(lb, n_buckets);
                 CLIB_MEMORY_BARRIER();
 
-                load_balance_fill_buckets(lb, nhs,
-                                          buckets,
-                                          n_buckets);
+                load_balance_fill_buckets(lb, nhs, buckets,
+                                          n_buckets, flags);
 
                 for (ii = n_buckets; ii < old_n_buckets; ii++)
                 {
@@ -858,6 +992,9 @@ load_balance_module_init (void)
     lbi = load_balance_create(1, DPO_PROTO_IP4, 0);
     load_balance_set_bucket(lbi, 0, drop_dpo_get(DPO_PROTO_IP4));
 
+    load_balance_logger =
+        vlib_log_register_class("dpo", "load-balance");
+
     load_balance_map_module_init();
 }
 
@@ -878,19 +1015,26 @@ load_balance_show (vlib_main_t * vm,
 
     if (INDEX_INVALID != lbi)
     {
-        vlib_cli_output (vm, "%U", format_load_balance, lbi,
+        if (pool_is_free_index(load_balance_pool, lbi))
+        {
+            vlib_cli_output (vm, "no such load-balance:%d", lbi);
+        }
+        else
+        {
+            vlib_cli_output (vm, "%U", format_load_balance, lbi,
                          LOAD_BALANCE_FORMAT_DETAIL);
+        }
     }
     else
     {
         load_balance_t *lb;
 
-        pool_foreach(lb, load_balance_pool,
-        ({
+        pool_foreach (lb, load_balance_pool)
+         {
             vlib_cli_output (vm, "%U", format_load_balance,
                              load_balance_get_index(lb),
                              LOAD_BALANCE_FORMAT_NONE);
-        }));
+        }
     }
 
     return 0;
@@ -999,10 +1143,10 @@ load_balance_inline (vlib_main_t * vm,
          {
              /* it's BIER */
              const bier_hdr_t *bh0 = vlib_buffer_get_current(b0);
-             vnet_buffer(b0)->ip.flow_hash = bier_hdr_get_entropy(bh0);
+             vnet_buffer(b0)->ip.flow_hash = bier_compute_flow_hash(bh0);
          }
 
-         dpo0 = load_balance_get_bucket_i(lb0, 
+         dpo0 = load_balance_get_bucket_i(lb0,
                                           vnet_buffer(b0)->ip.flow_hash &
                                           (lb0->lb_n_buckets_minus_1));