X-Git-Url: https://gerrit.fd.io/r/gitweb?a=blobdiff_plain;f=src%2Fvnet%2Fdpo%2Fload_balance.c;h=a212532dffd65c201f817a5d237fafcc57d535b5;hb=8f5fef2c7;hp=af054f1c3f4bffaecd0f9f81ded74a92607cbc7f;hpb=f12a83f54ff2239d70494d577af3e1bb253692e1;p=vpp.git diff --git a/src/vnet/dpo/load_balance.c b/src/vnet/dpo/load_balance.c index af054f1c3f4..a212532dffd 100644 --- a/src/vnet/dpo/load_balance.c +++ b/src/vnet/dpo/load_balance.c @@ -13,7 +13,6 @@ * limitations under the License. */ -#include #include #include #include @@ -21,28 +20,33 @@ #include #include #include +#include +#include +#include +#include + +// clang-format off /* * distribution error tolerance for load-balancing */ const f64 multipath_next_hop_error_tolerance = 0.1; -#undef LB_DEBUG +static const char *load_balance_attr_names[] = LOAD_BALANCE_ATTR_NAMES; + +/** + * the logger + */ +vlib_log_class_t load_balance_logger; -#ifdef LB_DEBUG #define LB_DBG(_lb, _fmt, _args...) \ { \ - u8* _tmp =NULL; \ - clib_warning("lb:[%s]:" _fmt, \ - load_balance_format(load_balance_get_index((_lb)), \ - 0, _tmp), \ - ##_args); \ - vec_free(_tmp); \ + vlib_log_debug(load_balance_logger, \ + "lb:[%U]:" _fmt, \ + format_load_balance, load_balance_get_index(_lb), \ + LOAD_BALANCE_FORMAT_NONE, \ + ##_args); \ } -#else -#define LB_DBG(_p, _fmt, _args...) -#endif - /** * Pool of all DPOs. It's not static so the DP can have fast access @@ -52,7 +56,16 @@ load_balance_t *load_balance_pool; /** * The one instance of load-balance main */ -load_balance_main_t load_balance_main; +load_balance_main_t load_balance_main = { + .lbm_to_counters = { + .name = "route-to", + .stat_segment_name = "/net/route/to", + }, + .lbm_via_counters = { + .name = "route-via", + .stat_segment_name = "/net/route/via", + } +}; f64 load_balance_get_multipath_tolerance (void) @@ -83,12 +96,33 @@ static load_balance_t * load_balance_alloc_i (void) { load_balance_t *lb; + u8 need_barrier_sync = 0; + vlib_main_t *vm = vlib_get_main(); + ASSERT (vm->thread_index == 0); + + pool_get_aligned_will_expand (load_balance_pool, need_barrier_sync, + CLIB_CACHE_LINE_BYTES); + if (need_barrier_sync) + vlib_worker_thread_barrier_sync (vm); pool_get_aligned(load_balance_pool, lb, CLIB_CACHE_LINE_BYTES); - memset(lb, 0, sizeof(*lb)); + clib_memset(lb, 0, sizeof(*lb)); lb->lb_map = INDEX_INVALID; lb->lb_urpf = INDEX_INVALID; + + if (need_barrier_sync == 0) + { + need_barrier_sync += vlib_validate_combined_counter_will_expand + (&(load_balance_main.lbm_to_counters), + load_balance_get_index(lb)); + need_barrier_sync += vlib_validate_combined_counter_will_expand + (&(load_balance_main.lbm_via_counters), + load_balance_get_index(lb)); + if (need_barrier_sync) + vlib_worker_thread_barrier_sync (vm); + } + vlib_validate_combined_counter(&(load_balance_main.lbm_to_counters), load_balance_get_index(lb)); vlib_validate_combined_counter(&(load_balance_main.lbm_via_counters), @@ -98,6 +132,9 @@ load_balance_alloc_i (void) vlib_zero_combined_counter(&(load_balance_main.lbm_via_counters), load_balance_get_index(lb)); + if (need_barrier_sync) + vlib_worker_thread_barrier_release (vm); + return (lb); } @@ -121,6 +158,21 @@ load_balance_format (index_t lbi, s = format(s, "[proto:%U ", format_dpo_proto, lb->lb_proto); s = format(s, "index:%d buckets:%d ", lbi, lb->lb_n_buckets); s = format(s, "uRPF:%d ", lb->lb_urpf); + if (lb->lb_flags) + { + load_balance_attr_t attr; + + s = format(s, "flags:["); + + FOR_EACH_LOAD_BALANCE_ATTR(attr) + { + if (lb->lb_flags & (1 << attr)) + { + s = format (s, "%s", load_balance_attr_names[attr]); + } + } + s = format(s, "] "); + } s = format(s, "to:[%Ld:%Ld]", to.packets, to.bytes); if (0 != via.packets) { @@ -154,6 +206,7 @@ format_load_balance (u8 * s, va_list * args) return (load_balance_format(lbi, flags, 0, s)); } + static u8* format_load_balance_dpo (u8 * s, va_list * args) { @@ -163,6 +216,26 @@ format_load_balance_dpo (u8 * s, va_list * args) return (load_balance_format(lbi, LOAD_BALANCE_FORMAT_DETAIL, indent, s)); } +flow_hash_config_t +load_balance_get_default_flow_hash (dpo_proto_t lb_proto) +{ + switch (lb_proto) + { + case DPO_PROTO_IP4: + case DPO_PROTO_IP6: + return (IP_FLOW_HASH_DEFAULT); + + case DPO_PROTO_MPLS: + return (MPLS_FLOW_HASH_DEFAULT); + + case DPO_PROTO_ETHERNET: + case DPO_PROTO_BIER: + case DPO_PROTO_NSH: + break; + } + + return (0); +} static load_balance_t * load_balance_create_i (u32 num_buckets, @@ -239,6 +312,16 @@ load_balance_is_drop (const dpo_id_t *dpo) return (0); } +u16 +load_balance_n_buckets (index_t lbi) +{ + load_balance_t *lb; + + lb = load_balance_get(lbi); + + return (lb->lb_n_buckets); +} + void load_balance_set_fib_entry_flags (index_t lbi, fib_entry_flag_t flags) @@ -349,7 +432,7 @@ ip_multipath_normalize_next_hops (const load_balance_path_t * raw_next_hops, } else { - clib_memcpy (nhs, raw_next_hops, n_nhs * sizeof (raw_next_hops[0])); + clib_memcpy_fast (nhs, raw_next_hops, n_nhs * sizeof (raw_next_hops[0])); qsort (nhs, n_nhs, sizeof (nhs[0]), (void *) next_hop_sort_by_weight); } @@ -393,7 +476,7 @@ ip_multipath_normalize_next_hops (const load_balance_path_t * raw_next_hops, /* * when the weight skew is high (norm is small) and n == nf. * without this correction the path with a low weight would have - * no represenation in the load-balanace - don't want that. + * no representation in the load-balanace - don't want that. * If the weight skew is high so the load-balance has many buckets * to allow it. pays ya money takes ya choice. */ @@ -447,12 +530,12 @@ load_balance_multipath_next_hop_fixup (const load_balance_path_t *nhs, * next hop adjacencies. */ static void -load_balance_fill_buckets (load_balance_t *lb, - load_balance_path_t *nhs, - dpo_id_t *buckets, - u32 n_buckets) +load_balance_fill_buckets_norm (load_balance_t *lb, + load_balance_path_t *nhs, + dpo_id_t *buckets, + u32 n_buckets) { - load_balance_path_t * nh; + load_balance_path_t *nh; u16 ii, bucket; bucket = 0; @@ -470,6 +553,69 @@ load_balance_fill_buckets (load_balance_t *lb, } } } +static void +load_balance_fill_buckets_sticky (load_balance_t *lb, + load_balance_path_t *nhs, + dpo_id_t *buckets, + u32 n_buckets) +{ + load_balance_path_t *nh, *fwding_paths; + u16 ii, bucket, fpath; + + fpath = bucket = 0; + fwding_paths = NULL; + + vec_foreach (nh, nhs) + { + if (!dpo_is_drop(&nh->path_dpo)) + { + vec_add1(fwding_paths, *nh); + } + } + if (vec_len(fwding_paths) == 0) + fwding_paths = vec_dup(nhs); + + /* + * the next-hops have normalised weights. that means their sum is the number + * of buckets we need to fill. + */ + vec_foreach (nh, nhs) + { + for (ii = 0; ii < nh->path_weight; ii++) + { + ASSERT(bucket < n_buckets); + if (!dpo_is_drop(&nh->path_dpo)) + { + load_balance_set_bucket_i(lb, bucket++, buckets, &nh->path_dpo); + } + else + { + /* fill the bucks from the next up path */ + load_balance_set_bucket_i(lb, bucket++, buckets, &fwding_paths[fpath].path_dpo); + fpath = (fpath + 1) % vec_len(fwding_paths); + } + } + } + + vec_free(fwding_paths); +} + +static void +load_balance_fill_buckets (load_balance_t *lb, + load_balance_path_t *nhs, + dpo_id_t *buckets, + u32 n_buckets, + load_balance_flags_t flags) +{ + if (flags & LOAD_BALANCE_FLAG_STICKY) + { + load_balance_fill_buckets_sticky(lb, nhs, buckets, n_buckets); + } + else + { + load_balance_fill_buckets_norm(lb, nhs, buckets, n_buckets); + } +} static inline void load_balance_set_n_buckets (load_balance_t *lb, @@ -494,6 +640,7 @@ load_balance_multipath_update (const dpo_id_t *dpo, ASSERT(DPO_LOAD_BALANCE == dpo->dpoi_type); lb = load_balance_get(dpo->dpoi_index); + lb->lb_flags = flags; fixed_nhs = load_balance_multipath_next_hop_fixup(raw_nhs, lb->lb_proto); n_buckets = ip_multipath_normalize_next_hops((NULL == fixed_nhs ? @@ -533,7 +680,7 @@ load_balance_multipath_update (const dpo_id_t *dpo, load_balance_fill_buckets(lb, nhs, load_balance_get_buckets(lb), - n_buckets); + n_buckets, flags); lb->lb_map = lbmi; } else @@ -554,7 +701,7 @@ load_balance_multipath_update (const dpo_id_t *dpo, */ load_balance_fill_buckets(lb, nhs, load_balance_get_buckets(lb), - n_buckets); + n_buckets, flags); lb->lb_map = lbmi; } else if (n_buckets > lb->lb_n_buckets) @@ -579,7 +726,7 @@ load_balance_multipath_update (const dpo_id_t *dpo, load_balance_fill_buckets(lb, nhs, lb->lb_buckets, - n_buckets); + n_buckets, flags); CLIB_MEMORY_BARRIER(); load_balance_set_n_buckets(lb, n_buckets); @@ -600,7 +747,7 @@ load_balance_multipath_update (const dpo_id_t *dpo, */ load_balance_fill_buckets(lb, nhs, load_balance_get_buckets(lb), - n_buckets); + n_buckets, flags); CLIB_MEMORY_BARRIER(); load_balance_set_n_buckets(lb, n_buckets); } @@ -619,7 +766,8 @@ load_balance_multipath_update (const dpo_id_t *dpo, n_buckets - 1, CLIB_CACHE_LINE_BYTES); - load_balance_fill_buckets(lb, nhs, new_buckets, n_buckets); + load_balance_fill_buckets(lb, nhs, new_buckets, + n_buckets, flags); CLIB_MEMORY_BARRIER(); lb->lb_buckets = new_buckets; CLIB_MEMORY_BARRIER(); @@ -663,7 +811,7 @@ load_balance_multipath_update (const dpo_id_t *dpo, */ load_balance_fill_buckets(lb, nhs, lb->lb_buckets_inline, - n_buckets); + n_buckets, flags); CLIB_MEMORY_BARRIER(); load_balance_set_n_buckets(lb, n_buckets); CLIB_MEMORY_BARRIER(); @@ -691,9 +839,8 @@ load_balance_multipath_update (const dpo_id_t *dpo, load_balance_set_n_buckets(lb, n_buckets); CLIB_MEMORY_BARRIER(); - load_balance_fill_buckets(lb, nhs, - buckets, - n_buckets); + load_balance_fill_buckets(lb, nhs, buckets, + n_buckets, flags); for (ii = n_buckets; ii < old_n_buckets; ii++) { @@ -773,11 +920,30 @@ load_balance_mem_show (void) load_balance_map_show_mem(); } +static u16 +load_balance_dpo_get_mtu (const dpo_id_t *dpo) +{ + const dpo_id_t *buckets; + load_balance_t *lb; + u16 i, mtu = 0xffff; + + lb = load_balance_get(dpo->dpoi_index); + buckets = load_balance_get_buckets(lb); + + for (i = 0; i < lb->lb_n_buckets; i++) + { + mtu = clib_min (mtu, dpo_get_mtu (&buckets[i])); + } + + return (mtu); +} + const static dpo_vft_t lb_vft = { .dv_lock = load_balance_lock, .dv_unlock = load_balance_unlock, .dv_format = format_load_balance_dpo, .dv_mem_show = load_balance_mem_show, + .dv_get_mtu = load_balance_dpo_get_mtu, }; /** @@ -814,6 +980,11 @@ const static char* const load_balance_l2_nodes[] = const static char* const load_balance_nsh_nodes[] = { "nsh-load-balance", + NULL +}; +const static char* const load_balance_bier_nodes[] = +{ + "bier-load-balance", NULL, }; const static char* const * const load_balance_nodes[DPO_PROTO_NUM] = @@ -823,6 +994,7 @@ const static char* const * const load_balance_nodes[DPO_PROTO_NUM] = [DPO_PROTO_MPLS] = load_balance_mpls_nodes, [DPO_PROTO_ETHERNET] = load_balance_l2_nodes, [DPO_PROTO_NSH] = load_balance_nsh_nodes, + [DPO_PROTO_BIER] = load_balance_bier_nodes, }; void @@ -841,6 +1013,9 @@ load_balance_module_init (void) lbi = load_balance_create(1, DPO_PROTO_IP4, 0); load_balance_set_bucket(lbi, 0, drop_dpo_get(DPO_PROTO_IP4)); + load_balance_logger = + vlib_log_register_class("dpo", "load-balance"); + load_balance_map_module_init(); } @@ -861,19 +1036,26 @@ load_balance_show (vlib_main_t * vm, if (INDEX_INVALID != lbi) { - vlib_cli_output (vm, "%U", format_load_balance, lbi, + if (pool_is_free_index(load_balance_pool, lbi)) + { + vlib_cli_output (vm, "no such load-balance:%d", lbi); + } + else + { + vlib_cli_output (vm, "%U", format_load_balance, lbi, LOAD_BALANCE_FORMAT_DETAIL); + } } else { load_balance_t *lb; - pool_foreach(lb, load_balance_pool, - ({ + pool_foreach (lb, load_balance_pool) + { vlib_cli_output (vm, "%U", format_load_balance, load_balance_get_index(lb), LOAD_BALANCE_FORMAT_NONE); - })); + } } return 0; @@ -935,10 +1117,11 @@ typedef struct load_balance_trace_t_ index_t lb_index; } load_balance_trace_t; -static uword -l2_load_balance (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * frame) +always_inline uword +load_balance_inline (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame, + int is_l2) { u32 n_left_from, next_index, *from, *to_next; @@ -973,9 +1156,18 @@ l2_load_balance (vlib_main_t * vm, lbi0 = vnet_buffer (b0)->ip.adj_index[VLIB_TX]; lb0 = load_balance_get(lbi0); - vnet_buffer(b0)->ip.flow_hash = l2_flow_hash(b0); - - dpo0 = load_balance_get_bucket_i(lb0, + if (is_l2) + { + vnet_buffer(b0)->ip.flow_hash = l2_flow_hash(b0); + } + else + { + /* it's BIER */ + const bier_hdr_t *bh0 = vlib_buffer_get_current(b0); + vnet_buffer(b0)->ip.flow_hash = bier_compute_flow_hash(bh0); + } + + dpo0 = load_balance_get_bucket_i(lb0, vnet_buffer(b0)->ip.flow_hash & (lb0->lb_n_buckets_minus_1)); @@ -998,6 +1190,14 @@ l2_load_balance (vlib_main_t * vm, return frame->n_vectors; } +static uword +l2_load_balance (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + return (load_balance_inline(vm, node, frame, 1)); +} + static u8 * format_l2_load_balance_trace (u8 * s, va_list * args) { @@ -1113,3 +1313,36 @@ VLIB_REGISTER_NODE (nsh_load_balance_node) = { [0] = "error-drop", }, }; + +static u8 * +format_bier_load_balance_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + load_balance_trace_t *t = va_arg (*args, load_balance_trace_t *); + + s = format (s, "BIER-load-balance: index %d", t->lb_index); + return s; +} + +static uword +bier_load_balance (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + return (load_balance_inline(vm, node, frame, 0)); +} + +/** + * @brief + */ +VLIB_REGISTER_NODE (bier_load_balance_node) = { + .function = bier_load_balance, + .name = "bier-load-balance", + .vector_size = sizeof (u32), + + .format_trace = format_bier_load_balance_trace, + .sibling_of = "mpls-load-balance", +}; + +// clang-format on