misc: move to new pool_foreach macros
[vpp.git] / src / vnet / dpo / load_balance.c
index e70a7a3..fb876a0 100644 (file)
@@ -13,7 +13,6 @@
  * limitations under the License.
  */
 
-#include <vnet/ip/lookup.h>
 #include <vnet/dpo/load_balance.h>
 #include <vnet/dpo/load_balance_map.h>
 #include <vnet/dpo/drop_dpo.h>
 #include <vnet/adj/adj.h>
 #include <vnet/adj/adj_internal.h>
 #include <vnet/fib/fib_urpf_list.h>
+#include <vnet/bier/bier_fwd.h>
+#include <vnet/fib/mpls_fib.h>
+#include <vnet/ip/ip4_inlines.h>
+#include <vnet/ip/ip6_inlines.h>
 
 /*
  * distribution error tolerance for load-balancing
  */
 const f64 multipath_next_hop_error_tolerance = 0.1;
 
-#undef LB_DEBUG
+static const char *load_balance_attr_names[] = LOAD_BALANCE_ATTR_NAMES;
+
+/**
+ * the logger
+ */
+vlib_log_class_t load_balance_logger;
 
-#ifdef LB_DEBUG
 #define LB_DBG(_lb, _fmt, _args...)                                     \
 {                                                                       \
-    u8* _tmp =NULL;                                                     \
-    clib_warning("lb:[%s]:" _fmt,                                       \
-                 load_balance_format(load_balance_get_index((_lb)),     \
-                                     0, _tmp),                          \
-                 ##_args);                                              \
-    vec_free(_tmp);                                                     \
+    vlib_log_debug(load_balance_logger,                                 \
+                   "lb:[%U]:" _fmt,                                     \
+                   format_load_balance, load_balance_get_index(_lb),    \
+                   LOAD_BALANCE_FORMAT_NONE,                            \
+                   ##_args);                                            \
 }
-#else
-#define LB_DBG(_p, _fmt, _args...)
-#endif
-
 
 /**
  * Pool of all DPOs. It's not static so the DP can have fast access
@@ -52,7 +54,16 @@ load_balance_t *load_balance_pool;
 /**
  * The one instance of load-balance main
  */
-load_balance_main_t load_balance_main;
+load_balance_main_t load_balance_main = {
+    .lbm_to_counters = {
+        .name = "route-to",
+        .stat_segment_name = "/net/route/to",
+    },
+    .lbm_via_counters = {
+        .name = "route-via",
+        .stat_segment_name = "/net/route/via",
+    }
+};
 
 f64
 load_balance_get_multipath_tolerance (void)
@@ -83,12 +94,33 @@ static load_balance_t *
 load_balance_alloc_i (void)
 {
     load_balance_t *lb;
+    u8 need_barrier_sync = 0;
+    vlib_main_t *vm = vlib_get_main();
+    ASSERT (vm->thread_index == 0);
+
+    pool_get_aligned_will_expand (load_balance_pool, need_barrier_sync,
+                                  CLIB_CACHE_LINE_BYTES);
+    if (need_barrier_sync)
+        vlib_worker_thread_barrier_sync (vm);
 
     pool_get_aligned(load_balance_pool, lb, CLIB_CACHE_LINE_BYTES);
-    memset(lb, 0, sizeof(*lb));
+    clib_memset(lb, 0, sizeof(*lb));
 
     lb->lb_map = INDEX_INVALID;
     lb->lb_urpf = INDEX_INVALID;
+
+    if (need_barrier_sync == 0)
+    {
+        need_barrier_sync += vlib_validate_combined_counter_will_expand
+            (&(load_balance_main.lbm_to_counters),
+             load_balance_get_index(lb));
+        need_barrier_sync += vlib_validate_combined_counter_will_expand
+            (&(load_balance_main.lbm_via_counters),
+             load_balance_get_index(lb));
+        if (need_barrier_sync)
+            vlib_worker_thread_barrier_sync (vm);
+    }
+
     vlib_validate_combined_counter(&(load_balance_main.lbm_to_counters),
                                    load_balance_get_index(lb));
     vlib_validate_combined_counter(&(load_balance_main.lbm_via_counters),
@@ -98,6 +130,9 @@ load_balance_alloc_i (void)
     vlib_zero_combined_counter(&(load_balance_main.lbm_via_counters),
                                load_balance_get_index(lb));
 
+    if (need_barrier_sync)
+        vlib_worker_thread_barrier_release (vm);
+
     return (lb);
 }
 
@@ -118,8 +153,24 @@ load_balance_format (index_t lbi,
     buckets = load_balance_get_buckets(lb);
 
     s = format(s, "%U: ", format_dpo_type, DPO_LOAD_BALANCE);
-    s = format(s, "[index:%d buckets:%d ", lbi, lb->lb_n_buckets);
+    s = format(s, "[proto:%U ", format_dpo_proto, lb->lb_proto);
+    s = format(s, "index:%d buckets:%d ", lbi, lb->lb_n_buckets);
     s = format(s, "uRPF:%d ", lb->lb_urpf);
+    if (lb->lb_flags)
+    {
+        load_balance_attr_t attr;
+
+        s = format(s, "flags:[");
+
+        FOR_EACH_LOAD_BALANCE_ATTR(attr)
+        {
+            if (lb->lb_flags & (1 << attr))
+            {
+                s = format (s, "%s", load_balance_attr_names[attr]);
+            }
+        }
+        s = format(s, "] ");
+    }
     s = format(s, "to:[%Ld:%Ld]", to.packets, to.bytes);
     if (0 != via.packets)
     {
@@ -153,6 +204,7 @@ format_load_balance (u8 * s, va_list * args)
 
     return (load_balance_format(lbi, flags, 0, s));
 }
+
 static u8*
 format_load_balance_dpo (u8 * s, va_list * args)
 {
@@ -162,6 +214,26 @@ format_load_balance_dpo (u8 * s, va_list * args)
     return (load_balance_format(lbi, LOAD_BALANCE_FORMAT_DETAIL, indent, s));
 }
 
+flow_hash_config_t
+load_balance_get_default_flow_hash (dpo_proto_t lb_proto)
+{
+    switch (lb_proto)
+    {
+    case DPO_PROTO_IP4:
+    case DPO_PROTO_IP6:
+        return (IP_FLOW_HASH_DEFAULT);
+
+    case DPO_PROTO_MPLS:
+        return (MPLS_FLOW_HASH_DEFAULT);
+
+    case DPO_PROTO_ETHERNET:
+    case DPO_PROTO_BIER:
+    case DPO_PROTO_NSH:
+        break;
+    }
+
+    return (0);
+}
 
 static load_balance_t *
 load_balance_create_i (u32 num_buckets,
@@ -238,6 +310,27 @@ load_balance_is_drop (const dpo_id_t *dpo)
     return (0);
 }
 
+u16
+load_balance_n_buckets (index_t lbi)
+{
+    load_balance_t *lb;
+
+    lb = load_balance_get(lbi);
+
+    return (lb->lb_n_buckets);
+}
+
+void
+load_balance_set_fib_entry_flags (index_t lbi,
+                                  fib_entry_flag_t flags)
+{
+    load_balance_t *lb;
+
+    lb = load_balance_get(lbi);
+    lb->lb_fib_entry_flags = flags;
+}
+
+
 void
 load_balance_set_urpf (index_t lbi,
                       index_t urpf)
@@ -337,7 +430,7 @@ ip_multipath_normalize_next_hops (const load_balance_path_t * raw_next_hops,
     }
     else
     {
-        clib_memcpy (nhs, raw_next_hops, n_nhs * sizeof (raw_next_hops[0]));
+        clib_memcpy_fast (nhs, raw_next_hops, n_nhs * sizeof (raw_next_hops[0]));
         qsort (nhs, n_nhs, sizeof (nhs[0]), (void *) next_hop_sort_by_weight);
     }
 
@@ -381,7 +474,7 @@ ip_multipath_normalize_next_hops (const load_balance_path_t * raw_next_hops,
                 /*
                  * when the weight skew is high (norm is small) and n == nf.
                  * without this correction the path with a low weight would have
-                 * no represenation in the load-balanace - don't want that.
+                 * no representation in the load-balanace - don't want that.
                  * If the weight skew is high so the load-balance has many buckets
                  * to allow it. pays ya money takes ya choice.
                  */
@@ -435,12 +528,12 @@ load_balance_multipath_next_hop_fixup (const load_balance_path_t *nhs,
  * next hop adjacencies.
  */
 static void
-load_balance_fill_buckets (load_balance_t *lb,
-                           load_balance_path_t *nhs,
-                           dpo_id_t *buckets,
-                           u32 n_buckets)
+load_balance_fill_buckets_norm (load_balance_t *lb,
+                                load_balance_path_t *nhs,
+                                dpo_id_t *buckets,
+                                u32 n_buckets)
 {
-    load_balance_path_t * nh;
+    load_balance_path_t *nh;
     u16 ii, bucket;
 
     bucket = 0;
@@ -458,6 +551,69 @@ load_balance_fill_buckets (load_balance_t *lb,
         }
     }
 }
+static void
+load_balance_fill_buckets_sticky (load_balance_t *lb,
+                                  load_balance_path_t *nhs,
+                                  dpo_id_t *buckets,
+                                  u32 n_buckets)
+{
+    load_balance_path_t *nh, *fwding_paths;
+    u16 ii, bucket, fpath;
+
+    fpath = bucket = 0;
+    fwding_paths = NULL;
+
+    vec_foreach (nh, nhs)
+    {
+        if (!dpo_is_drop(&nh->path_dpo))
+        {
+            vec_add1(fwding_paths, *nh);
+        }
+    }
+    if (vec_len(fwding_paths) == 0)
+        fwding_paths = vec_dup(nhs);
+
+    /*
+     * the next-hops have normalised weights. that means their sum is the number
+     * of buckets we need to fill.
+     */
+    vec_foreach (nh, nhs)
+    {
+        for (ii = 0; ii < nh->path_weight; ii++)
+        {
+            ASSERT(bucket < n_buckets);
+            if (!dpo_is_drop(&nh->path_dpo))
+            {
+                load_balance_set_bucket_i(lb, bucket++, buckets, &nh->path_dpo);
+            }
+            else
+            {
+                /* fill the bucks from the next up path */
+                load_balance_set_bucket_i(lb, bucket++, buckets, &fwding_paths[fpath].path_dpo);
+                fpath = (fpath + 1) % vec_len(fwding_paths);
+            }
+        }
+    }
+
+    vec_free(fwding_paths);
+}
+
+static void
+load_balance_fill_buckets (load_balance_t *lb,
+                           load_balance_path_t *nhs,
+                           dpo_id_t *buckets,
+                           u32 n_buckets,
+                           load_balance_flags_t flags)
+{
+    if (flags & LOAD_BALANCE_FLAG_STICKY)
+    {
+        load_balance_fill_buckets_sticky(lb, nhs, buckets, n_buckets);
+    }
+    else
+    {
+        load_balance_fill_buckets_norm(lb, nhs, buckets, n_buckets);
+    }
+}
 
 static inline void
 load_balance_set_n_buckets (load_balance_t *lb,
@@ -482,6 +638,7 @@ load_balance_multipath_update (const dpo_id_t *dpo,
 
     ASSERT(DPO_LOAD_BALANCE == dpo->dpoi_type);
     lb = load_balance_get(dpo->dpoi_index);
+    lb->lb_flags = flags;
     fixed_nhs = load_balance_multipath_next_hop_fixup(raw_nhs, lb->lb_proto);
     n_buckets =
         ip_multipath_normalize_next_hops((NULL == fixed_nhs ?
@@ -521,7 +678,7 @@ load_balance_multipath_update (const dpo_id_t *dpo,
 
         load_balance_fill_buckets(lb, nhs,
                                   load_balance_get_buckets(lb),
-                                  n_buckets);
+                                  n_buckets, flags);
         lb->lb_map = lbmi;
     }
     else
@@ -542,7 +699,7 @@ load_balance_multipath_update (const dpo_id_t *dpo,
              */
             load_balance_fill_buckets(lb, nhs,
                                       load_balance_get_buckets(lb),
-                                      n_buckets);
+                                      n_buckets, flags);
             lb->lb_map = lbmi;
         }
         else if (n_buckets > lb->lb_n_buckets)
@@ -567,7 +724,7 @@ load_balance_multipath_update (const dpo_id_t *dpo,
 
                 load_balance_fill_buckets(lb, nhs,
                                           lb->lb_buckets,
-                                          n_buckets);
+                                          n_buckets, flags);
                 CLIB_MEMORY_BARRIER();
                 load_balance_set_n_buckets(lb, n_buckets);
 
@@ -588,7 +745,7 @@ load_balance_multipath_update (const dpo_id_t *dpo,
                      */
                     load_balance_fill_buckets(lb, nhs,
                                               load_balance_get_buckets(lb),
-                                              n_buckets);
+                                              n_buckets, flags);
                     CLIB_MEMORY_BARRIER();
                     load_balance_set_n_buckets(lb, n_buckets);
                 }
@@ -607,7 +764,8 @@ load_balance_multipath_update (const dpo_id_t *dpo,
                                          n_buckets - 1,
                                          CLIB_CACHE_LINE_BYTES);
 
-                    load_balance_fill_buckets(lb, nhs, new_buckets, n_buckets);
+                    load_balance_fill_buckets(lb, nhs, new_buckets,
+                                              n_buckets, flags);
                     CLIB_MEMORY_BARRIER();
                     lb->lb_buckets = new_buckets;
                     CLIB_MEMORY_BARRIER();
@@ -651,7 +809,7 @@ load_balance_multipath_update (const dpo_id_t *dpo,
                  */
                 load_balance_fill_buckets(lb, nhs,
                                           lb->lb_buckets_inline,
-                                          n_buckets);
+                                          n_buckets, flags);
                 CLIB_MEMORY_BARRIER();
                 load_balance_set_n_buckets(lb, n_buckets);
                 CLIB_MEMORY_BARRIER();
@@ -679,11 +837,10 @@ load_balance_multipath_update (const dpo_id_t *dpo,
                 load_balance_set_n_buckets(lb, n_buckets);
                 CLIB_MEMORY_BARRIER();
 
-                load_balance_fill_buckets(lb, nhs,
-                                          buckets,
-                                          n_buckets);
+                load_balance_fill_buckets(lb, nhs, buckets,
+                                          n_buckets, flags);
 
-                for (ii = old_n_buckets-n_buckets; ii < old_n_buckets; ii++)
+                for (ii = n_buckets; ii < old_n_buckets; ii++)
                 {
                     dpo_reset(&buckets[ii]);
                 }
@@ -799,19 +956,45 @@ const static char* const load_balance_l2_nodes[] =
     "l2-load-balance",
     NULL,
 };
+const static char* const load_balance_nsh_nodes[] =
+{
+    "nsh-load-balance",
+    NULL
+};
+const static char* const load_balance_bier_nodes[] =
+{
+    "bier-load-balance",
+    NULL,
+};
 const static char* const * const load_balance_nodes[DPO_PROTO_NUM] =
 {
     [DPO_PROTO_IP4]  = load_balance_ip4_nodes,
     [DPO_PROTO_IP6]  = load_balance_ip6_nodes,
     [DPO_PROTO_MPLS] = load_balance_mpls_nodes,
     [DPO_PROTO_ETHERNET] = load_balance_l2_nodes,
+    [DPO_PROTO_NSH] = load_balance_nsh_nodes,
+    [DPO_PROTO_BIER] = load_balance_bier_nodes,
 };
 
 void
 load_balance_module_init (void)
 {
+    index_t lbi;
+
     dpo_register(DPO_LOAD_BALANCE, &lb_vft, load_balance_nodes);
 
+    /*
+     * Special LB with index zero. we need to define this since the v4 mtrie
+     * assumes an index of 0 implies the ply is empty. therefore all 'real'
+     * adjs need a non-zero index.
+     * This should never be used, but just in case, stack it on a drop.
+     */
+    lbi = load_balance_create(1, DPO_PROTO_IP4, 0);
+    load_balance_set_bucket(lbi, 0, drop_dpo_get(DPO_PROTO_IP4));
+
+    load_balance_logger =
+        vlib_log_register_class("dpo", "load-balance");
+
     load_balance_map_module_init();
 }
 
@@ -832,19 +1015,26 @@ load_balance_show (vlib_main_t * vm,
 
     if (INDEX_INVALID != lbi)
     {
-        vlib_cli_output (vm, "%U", format_load_balance, lbi,
+        if (pool_is_free_index(load_balance_pool, lbi))
+        {
+            vlib_cli_output (vm, "no such load-balance:%d", lbi);
+        }
+        else
+        {
+            vlib_cli_output (vm, "%U", format_load_balance, lbi,
                          LOAD_BALANCE_FORMAT_DETAIL);
+        }
     }
     else
     {
         load_balance_t *lb;
 
-        pool_foreach(lb, load_balance_pool,
-        ({
+        pool_foreach (lb, load_balance_pool)
+         {
             vlib_cli_output (vm, "%U", format_load_balance,
                              load_balance_get_index(lb),
                              LOAD_BALANCE_FORMAT_NONE);
-        }));
+        }
     }
 
     return 0;
@@ -906,10 +1096,11 @@ typedef struct load_balance_trace_t_
     index_t lb_index;
 } load_balance_trace_t;
 
-static uword
-l2_load_balance (vlib_main_t * vm,
-                vlib_node_runtime_t * node,
-                vlib_frame_t * frame)
+always_inline uword
+load_balance_inline (vlib_main_t * vm,
+                    vlib_node_runtime_t * node,
+                    vlib_frame_t * frame,
+                    int is_l2)
 {
   u32 n_left_from, next_index, *from, *to_next;
 
@@ -944,9 +1135,18 @@ l2_load_balance (vlib_main_t * vm,
          lbi0 =  vnet_buffer (b0)->ip.adj_index[VLIB_TX];
          lb0 = load_balance_get(lbi0);
 
-         vnet_buffer(b0)->ip.flow_hash = l2_flow_hash(b0);
-
-         dpo0 = load_balance_get_bucket_i(lb0, 
+         if (is_l2)
+         {
+             vnet_buffer(b0)->ip.flow_hash = l2_flow_hash(b0);
+         }
+         else
+         {
+             /* it's BIER */
+             const bier_hdr_t *bh0 = vlib_buffer_get_current(b0);
+             vnet_buffer(b0)->ip.flow_hash = bier_compute_flow_hash(bh0);
+         }
+
+         dpo0 = load_balance_get_bucket_i(lb0,
                                           vnet_buffer(b0)->ip.flow_hash &
                                           (lb0->lb_n_buckets_minus_1));
 
@@ -969,8 +1169,16 @@ l2_load_balance (vlib_main_t * vm,
   return frame->n_vectors;
 }
 
+static uword
+l2_load_balance (vlib_main_t * vm,
+                vlib_node_runtime_t * node,
+                vlib_frame_t * frame)
+{
+    return (load_balance_inline(vm, node, frame, 1));
+}
+
 static u8 *
-format_load_balance_trace (u8 * s, va_list * args)
+format_l2_load_balance_trace (u8 * s, va_list * args)
 {
   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
@@ -988,9 +1196,130 @@ VLIB_REGISTER_NODE (l2_load_balance_node) = {
   .name = "l2-load-balance",
   .vector_size = sizeof (u32),
 
-  .format_trace = format_load_balance_trace,
+  .format_trace = format_l2_load_balance_trace,
   .n_next_nodes = 1,
   .next_nodes = {
       [0] = "error-drop",
   },
 };
+
+static uword
+nsh_load_balance (vlib_main_t * vm,
+                 vlib_node_runtime_t * node,
+                 vlib_frame_t * frame)
+{
+  u32 n_left_from, next_index, *from, *to_next;
+
+  from = vlib_frame_vector_args (frame);
+  n_left_from = frame->n_vectors;
+
+  next_index = node->cached_next_index;
+
+  while (n_left_from > 0)
+    {
+      u32 n_left_to_next;
+
+      vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+      while (n_left_from > 0 && n_left_to_next > 0)
+        {
+          vlib_buffer_t *b0;
+          u32 bi0, lbi0, next0, *nsh0;
+          const dpo_id_t *dpo0;
+          const load_balance_t *lb0;
+
+          bi0 = from[0];
+          to_next[0] = bi0;
+          from += 1;
+          to_next += 1;
+          n_left_from -= 1;
+          n_left_to_next -= 1;
+
+          b0 = vlib_get_buffer (vm, bi0);
+
+          lbi0 =  vnet_buffer (b0)->ip.adj_index[VLIB_TX];
+          lb0 = load_balance_get(lbi0);
+
+          /* SPI + SI are the second word of the NSH header */
+          nsh0 = vlib_buffer_get_current (b0);
+          vnet_buffer(b0)->ip.flow_hash = nsh0[1] % lb0->lb_n_buckets;
+
+          dpo0 = load_balance_get_bucket_i(lb0,
+                                           vnet_buffer(b0)->ip.flow_hash &
+                                           (lb0->lb_n_buckets_minus_1));
+
+          next0 = dpo0->dpoi_next_node;
+          vnet_buffer (b0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
+
+          if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+            {
+              load_balance_trace_t *tr = vlib_add_trace (vm, node, b0,
+                                                         sizeof (*tr));
+              tr->lb_index = lbi0;
+            }
+          vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+                                           n_left_to_next, bi0, next0);
+        }
+
+      vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+    }
+
+  return frame->n_vectors;
+}
+
+static u8 *
+format_nsh_load_balance_trace (u8 * s, va_list * args)
+{
+  CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+  CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+  load_balance_trace_t *t = va_arg (*args, load_balance_trace_t *);
+
+  s = format (s, "NSH-load-balance: index %d", t->lb_index);
+  return s;
+}
+
+/**
+ * @brief
+ */
+VLIB_REGISTER_NODE (nsh_load_balance_node) = {
+  .function = nsh_load_balance,
+  .name = "nsh-load-balance",
+  .vector_size = sizeof (u32),
+
+  .format_trace = format_nsh_load_balance_trace,
+  .n_next_nodes = 1,
+  .next_nodes = {
+      [0] = "error-drop",
+  },
+};
+
+static u8 *
+format_bier_load_balance_trace (u8 * s, va_list * args)
+{
+  CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+  CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+  load_balance_trace_t *t = va_arg (*args, load_balance_trace_t *);
+
+  s = format (s, "BIER-load-balance: index %d", t->lb_index);
+  return s;
+}
+
+static uword
+bier_load_balance (vlib_main_t * vm,
+                  vlib_node_runtime_t * node,
+                  vlib_frame_t * frame)
+{
+    return (load_balance_inline(vm, node, frame, 0));
+}
+
+/**
+ * @brief
+ */
+VLIB_REGISTER_NODE (bier_load_balance_node) = {
+  .function = bier_load_balance,
+  .name = "bier-load-balance",
+  .vector_size = sizeof (u32),
+
+  .format_trace = format_bier_load_balance_trace,
+  .sibling_of = "mpls-load-balance",
+};