2 * Copyright (c) 2016 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
16 #include <vnet/ip/lookup.h>
17 #include <vnet/dpo/load_balance.h>
18 #include <vnet/dpo/load_balance_map.h>
19 #include <vnet/dpo/drop_dpo.h>
20 #include <vppinfra/math.h> /* for fabs */
21 #include <vnet/adj/adj.h>
22 #include <vnet/adj/adj_internal.h>
23 #include <vnet/fib/fib_urpf_list.h>
24 #include <vnet/bier/bier_fwd.h>
27 * distribution error tolerance for load-balancing
29 const f64 multipath_next_hop_error_tolerance = 0.1;
31 static const char *load_balance_attr_names[] = LOAD_BALANCE_ATTR_NAMES;
36 vlib_log_class_t load_balance_logger;
38 #define LB_DBG(_lb, _fmt, _args...) \
40 vlib_log_debug(load_balance_logger, \
42 format_load_balance, load_balance_get_index(_lb), \
43 LOAD_BALANCE_FORMAT_NONE, \
48 * Pool of all DPOs. It's not static so the DP can have fast access
50 load_balance_t *load_balance_pool;
53 * The one instance of load-balance main
55 load_balance_main_t load_balance_main = {
58 .stat_segment_name = "/net/route/to",
62 .stat_segment_name = "/net/route/via",
67 load_balance_get_multipath_tolerance (void)
69 return (multipath_next_hop_error_tolerance);
73 load_balance_get_index (const load_balance_t *lb)
75 return (lb - load_balance_pool);
78 static inline dpo_id_t*
79 load_balance_get_buckets (load_balance_t *lb)
81 if (LB_HAS_INLINE_BUCKETS(lb))
83 return (lb->lb_buckets_inline);
87 return (lb->lb_buckets);
91 static load_balance_t *
92 load_balance_alloc_i (void)
96 pool_get_aligned(load_balance_pool, lb, CLIB_CACHE_LINE_BYTES);
97 clib_memset(lb, 0, sizeof(*lb));
99 lb->lb_map = INDEX_INVALID;
100 lb->lb_urpf = INDEX_INVALID;
101 vlib_validate_combined_counter(&(load_balance_main.lbm_to_counters),
102 load_balance_get_index(lb));
103 vlib_validate_combined_counter(&(load_balance_main.lbm_via_counters),
104 load_balance_get_index(lb));
105 vlib_zero_combined_counter(&(load_balance_main.lbm_to_counters),
106 load_balance_get_index(lb));
107 vlib_zero_combined_counter(&(load_balance_main.lbm_via_counters),
108 load_balance_get_index(lb));
114 load_balance_format (index_t lbi,
115 load_balance_format_flags_t flags,
119 vlib_counter_t to, via;
124 lb = load_balance_get(lbi);
125 vlib_get_combined_counter(&(load_balance_main.lbm_to_counters), lbi, &to);
126 vlib_get_combined_counter(&(load_balance_main.lbm_via_counters), lbi, &via);
127 buckets = load_balance_get_buckets(lb);
129 s = format(s, "%U: ", format_dpo_type, DPO_LOAD_BALANCE);
130 s = format(s, "[proto:%U ", format_dpo_proto, lb->lb_proto);
131 s = format(s, "index:%d buckets:%d ", lbi, lb->lb_n_buckets);
132 s = format(s, "uRPF:%d ", lb->lb_urpf);
135 load_balance_attr_t attr;
137 s = format(s, "flags:[");
139 FOR_EACH_LOAD_BALANCE_ATTR(attr)
141 if (lb->lb_flags & (1 << attr))
143 s = format (s, "%s", load_balance_attr_names[attr]);
148 s = format(s, "to:[%Ld:%Ld]", to.packets, to.bytes);
149 if (0 != via.packets)
151 s = format(s, " via:[%Ld:%Ld]",
152 via.packets, via.bytes);
156 if (INDEX_INVALID != lb->lb_map)
158 s = format(s, "\n%U%U",
159 format_white_space, indent+4,
160 format_load_balance_map, lb->lb_map, indent+4);
162 for (i = 0; i < lb->lb_n_buckets; i++)
164 s = format(s, "\n%U[%d] %U",
165 format_white_space, indent+2,
168 &buckets[i], indent+6);
174 format_load_balance (u8 * s, va_list * args)
176 index_t lbi = va_arg(*args, index_t);
177 load_balance_format_flags_t flags = va_arg(*args, load_balance_format_flags_t);
179 return (load_balance_format(lbi, flags, 0, s));
183 format_load_balance_dpo (u8 * s, va_list * args)
185 index_t lbi = va_arg(*args, index_t);
186 u32 indent = va_arg(*args, u32);
188 return (load_balance_format(lbi, LOAD_BALANCE_FORMAT_DETAIL, indent, s));
191 static load_balance_t *
192 load_balance_create_i (u32 num_buckets,
193 dpo_proto_t lb_proto,
194 flow_hash_config_t fhc)
198 lb = load_balance_alloc_i();
199 lb->lb_hash_config = fhc;
200 lb->lb_n_buckets = num_buckets;
201 lb->lb_n_buckets_minus_1 = num_buckets-1;
202 lb->lb_proto = lb_proto;
204 if (!LB_HAS_INLINE_BUCKETS(lb))
206 vec_validate_aligned(lb->lb_buckets,
207 lb->lb_n_buckets - 1,
208 CLIB_CACHE_LINE_BYTES);
211 LB_DBG(lb, "create");
217 load_balance_create (u32 n_buckets,
218 dpo_proto_t lb_proto,
219 flow_hash_config_t fhc)
221 return (load_balance_get_index(load_balance_create_i(n_buckets, lb_proto, fhc)));
225 load_balance_set_bucket_i (load_balance_t *lb,
228 const dpo_id_t *next)
230 dpo_stack(DPO_LOAD_BALANCE, lb->lb_proto, &buckets[bucket], next);
234 load_balance_set_bucket (index_t lbi,
236 const dpo_id_t *next)
241 lb = load_balance_get(lbi);
242 buckets = load_balance_get_buckets(lb);
244 ASSERT(bucket < lb->lb_n_buckets);
246 load_balance_set_bucket_i(lb, bucket, buckets, next);
250 load_balance_is_drop (const dpo_id_t *dpo)
254 if (DPO_LOAD_BALANCE != dpo->dpoi_type)
257 lb = load_balance_get(dpo->dpoi_index);
259 if (1 == lb->lb_n_buckets)
261 return (dpo_is_drop(load_balance_get_bucket_i(lb, 0)));
267 load_balance_n_buckets (index_t lbi)
271 lb = load_balance_get(lbi);
273 return (lb->lb_n_buckets);
277 load_balance_set_fib_entry_flags (index_t lbi,
278 fib_entry_flag_t flags)
282 lb = load_balance_get(lbi);
283 lb->lb_fib_entry_flags = flags;
288 load_balance_set_urpf (index_t lbi,
294 lb = load_balance_get(lbi);
297 * packets in flight we see this change. but it's atomic, so :P
302 fib_urpf_list_unlock(old);
303 fib_urpf_list_lock(urpf);
307 load_balance_get_urpf (index_t lbi)
311 lb = load_balance_get(lbi);
313 return (lb->lb_urpf);
317 load_balance_get_bucket (index_t lbi,
322 lb = load_balance_get(lbi);
324 return (load_balance_get_bucket_i(lb, bucket));
328 next_hop_sort_by_weight (const load_balance_path_t * n1,
329 const load_balance_path_t * n2)
331 return ((int) n1->path_weight - (int) n2->path_weight);
334 /* Given next hop vector is over-written with normalized one with sorted weights and
335 with weights corresponding to the number of adjacencies for each next hop.
336 Returns number of adjacencies in block. */
338 ip_multipath_normalize_next_hops (const load_balance_path_t * raw_next_hops,
339 load_balance_path_t ** normalized_next_hops,
341 f64 multipath_next_hop_error_tolerance)
343 load_balance_path_t * nhs;
344 uword n_nhs, n_adj, n_adj_left, i, sum_weight;
347 n_nhs = vec_len (raw_next_hops);
352 /* Allocate enough space for 2 copies; we'll use second copy to save original weights. */
353 nhs = *normalized_next_hops;
354 vec_validate (nhs, 2*n_nhs - 1);
356 /* Fast path: 1 next hop in block. */
360 nhs[0] = raw_next_hops[0];
361 nhs[0].path_weight = 1;
369 int cmp = next_hop_sort_by_weight (&raw_next_hops[0], &raw_next_hops[1]) < 0;
372 nhs[0] = raw_next_hops[cmp];
373 nhs[1] = raw_next_hops[cmp ^ 1];
375 /* Fast path: equal cost multipath with 2 next hops. */
376 if (nhs[0].path_weight == nhs[1].path_weight)
378 nhs[0].path_weight = nhs[1].path_weight = 1;
386 clib_memcpy_fast (nhs, raw_next_hops, n_nhs * sizeof (raw_next_hops[0]));
387 qsort (nhs, n_nhs, sizeof (nhs[0]), (void *) next_hop_sort_by_weight);
390 /* Find total weight to normalize weights. */
392 for (i = 0; i < n_nhs; i++)
393 sum_weight += nhs[i].path_weight;
395 /* In the unlikely case that all weights are given as 0, set them all to 1. */
398 for (i = 0; i < n_nhs; i++)
399 nhs[i].path_weight = 1;
403 /* Save copies of all next hop weights to avoid being overwritten in loop below. */
404 for (i = 0; i < n_nhs; i++)
405 nhs[n_nhs + i].path_weight = nhs[i].path_weight;
407 /* Try larger and larger power of 2 sized adjacency blocks until we
408 find one where traffic flows to within 1% of specified weights. */
409 for (n_adj = max_pow2 (n_nhs); ; n_adj *= 2)
413 norm = n_adj / ((f64) sum_weight);
415 for (i = 0; i < n_nhs; i++)
417 f64 nf = nhs[n_nhs + i].path_weight * norm; /* use saved weights */
418 word n = flt_round_nearest (nf);
420 n = n > n_adj_left ? n_adj_left : n;
422 error += fabs (nf - n);
423 nhs[i].path_weight = n;
425 if (0 == nhs[i].path_weight)
428 * when the weight skew is high (norm is small) and n == nf.
429 * without this correction the path with a low weight would have
430 * no represenation in the load-balanace - don't want that.
431 * If the weight skew is high so the load-balance has many buckets
432 * to allow it. pays ya money takes ya choice.
439 nhs[0].path_weight += n_adj_left;
441 /* Less than 5% average error per adjacency with this size adjacency block? */
442 if (error <= multipath_next_hop_error_tolerance*n_adj)
444 /* Truncate any next hops with zero weight. */
451 /* Save vector for next call. */
452 *normalized_next_hops = nhs;
453 *sum_weight_in = sum_weight;
457 static load_balance_path_t *
458 load_balance_multipath_next_hop_fixup (const load_balance_path_t *nhs,
459 dpo_proto_t drop_proto)
461 if (0 == vec_len(nhs))
463 load_balance_path_t *new_nhs = NULL, *nh;
466 * we need something for the load-balance. so use the drop
468 vec_add2(new_nhs, nh, 1);
471 dpo_copy(&nh->path_dpo, drop_dpo_get(drop_proto));
480 * Fill in adjacencies in block based on corresponding
481 * next hop adjacencies.
484 load_balance_fill_buckets_norm (load_balance_t *lb,
485 load_balance_path_t *nhs,
489 load_balance_path_t *nh;
495 * the next-hops have normalised weights. that means their sum is the number
496 * of buckets we need to fill.
498 vec_foreach (nh, nhs)
500 for (ii = 0; ii < nh->path_weight; ii++)
502 ASSERT(bucket < n_buckets);
503 load_balance_set_bucket_i(lb, bucket++, buckets, &nh->path_dpo);
508 load_balance_fill_buckets_sticky (load_balance_t *lb,
509 load_balance_path_t *nhs,
513 load_balance_path_t *nh, *fwding_paths;
514 u16 ii, bucket, fpath;
519 vec_foreach (nh, nhs)
521 if (!dpo_is_drop(&nh->path_dpo))
523 vec_add1(fwding_paths, *nh);
526 if (vec_len(fwding_paths) == 0)
527 fwding_paths = vec_dup(nhs);
530 * the next-hops have normalised weights. that means their sum is the number
531 * of buckets we need to fill.
533 vec_foreach (nh, nhs)
535 for (ii = 0; ii < nh->path_weight; ii++)
537 ASSERT(bucket < n_buckets);
538 if (!dpo_is_drop(&nh->path_dpo))
540 load_balance_set_bucket_i(lb, bucket++, buckets, &nh->path_dpo);
544 /* fill the bucks from the next up path */
545 load_balance_set_bucket_i(lb, bucket++, buckets, &fwding_paths[fpath].path_dpo);
546 fpath = (fpath + 1) % vec_len(fwding_paths);
551 vec_free(fwding_paths);
555 load_balance_fill_buckets (load_balance_t *lb,
556 load_balance_path_t *nhs,
559 load_balance_flags_t flags)
561 if (flags & LOAD_BALANCE_FLAG_STICKY)
563 load_balance_fill_buckets_sticky(lb, nhs, buckets, n_buckets);
567 load_balance_fill_buckets_norm(lb, nhs, buckets, n_buckets);
572 load_balance_set_n_buckets (load_balance_t *lb,
575 lb->lb_n_buckets = n_buckets;
576 lb->lb_n_buckets_minus_1 = n_buckets-1;
580 load_balance_multipath_update (const dpo_id_t *dpo,
581 const load_balance_path_t * raw_nhs,
582 load_balance_flags_t flags)
584 load_balance_path_t *nh, *nhs, *fixed_nhs;
585 u32 sum_of_weights, n_buckets, ii;
586 index_t lbmi, old_lbmi;
592 ASSERT(DPO_LOAD_BALANCE == dpo->dpoi_type);
593 lb = load_balance_get(dpo->dpoi_index);
594 lb->lb_flags = flags;
595 fixed_nhs = load_balance_multipath_next_hop_fixup(raw_nhs, lb->lb_proto);
597 ip_multipath_normalize_next_hops((NULL == fixed_nhs ?
602 multipath_next_hop_error_tolerance);
604 ASSERT (n_buckets >= vec_len (raw_nhs));
607 * Save the old load-balance map used, and get a new one if required.
609 old_lbmi = lb->lb_map;
610 if (flags & LOAD_BALANCE_FLAG_USES_MAP)
612 lbmi = load_balance_map_add_or_lock(n_buckets, sum_of_weights, nhs);
616 lbmi = INDEX_INVALID;
619 if (0 == lb->lb_n_buckets)
622 * first time initialisation. no packets inflight, so we can write
625 load_balance_set_n_buckets(lb, n_buckets);
627 if (!LB_HAS_INLINE_BUCKETS(lb))
628 vec_validate_aligned(lb->lb_buckets,
629 lb->lb_n_buckets - 1,
630 CLIB_CACHE_LINE_BYTES);
632 load_balance_fill_buckets(lb, nhs,
633 load_balance_get_buckets(lb),
640 * This is a modification of an existing load-balance.
641 * We need to ensure that packets inflight see a consistent state, that
642 * is the number of reported buckets the LB has (read from
643 * lb_n_buckets_minus_1) is not more than it actually has. So if the
644 * number of buckets is increasing, we must update the bucket array first,
645 * then the reported number. vice-versa if the number of buckets goes down.
647 if (n_buckets == lb->lb_n_buckets)
650 * no change in the number of buckets. we can simply fill what
651 * is new over what is old.
653 load_balance_fill_buckets(lb, nhs,
654 load_balance_get_buckets(lb),
658 else if (n_buckets > lb->lb_n_buckets)
661 * we have more buckets. the old load-balance map (if there is one)
662 * will remain valid, i.e. mapping to indices within range, so we
665 if (n_buckets > LB_NUM_INLINE_BUCKETS &&
666 lb->lb_n_buckets <= LB_NUM_INLINE_BUCKETS)
669 * the new increased number of buckets is crossing the threshold
670 * from the inline storage to out-line. Alloc the outline buckets
671 * first, then fixup the number. then reset the inlines.
673 ASSERT(NULL == lb->lb_buckets);
674 vec_validate_aligned(lb->lb_buckets,
676 CLIB_CACHE_LINE_BYTES);
678 load_balance_fill_buckets(lb, nhs,
681 CLIB_MEMORY_BARRIER();
682 load_balance_set_n_buckets(lb, n_buckets);
684 CLIB_MEMORY_BARRIER();
686 for (ii = 0; ii < LB_NUM_INLINE_BUCKETS; ii++)
688 dpo_reset(&lb->lb_buckets_inline[ii]);
693 if (n_buckets <= LB_NUM_INLINE_BUCKETS)
696 * we are not crossing the threshold and it's still inline buckets.
697 * we can write the new on the old..
699 load_balance_fill_buckets(lb, nhs,
700 load_balance_get_buckets(lb),
702 CLIB_MEMORY_BARRIER();
703 load_balance_set_n_buckets(lb, n_buckets);
708 * we are not crossing the threshold. We need a new bucket array to
709 * hold the increased number of choices.
711 dpo_id_t *new_buckets, *old_buckets, *tmp_dpo;
714 old_buckets = load_balance_get_buckets(lb);
716 vec_validate_aligned(new_buckets,
718 CLIB_CACHE_LINE_BYTES);
720 load_balance_fill_buckets(lb, nhs, new_buckets,
722 CLIB_MEMORY_BARRIER();
723 lb->lb_buckets = new_buckets;
724 CLIB_MEMORY_BARRIER();
725 load_balance_set_n_buckets(lb, n_buckets);
727 vec_foreach(tmp_dpo, old_buckets)
731 vec_free(old_buckets);
736 * buckets fixed. ready for the MAP update.
743 * bucket size shrinkage.
744 * Any map we have will be based on the old
745 * larger number of buckets, so will be translating to indices
746 * out of range. So the new MAP must be installed first.
749 CLIB_MEMORY_BARRIER();
752 if (n_buckets <= LB_NUM_INLINE_BUCKETS &&
753 lb->lb_n_buckets > LB_NUM_INLINE_BUCKETS)
756 * the new decreased number of buckets is crossing the threshold
757 * from out-line storage to inline:
758 * 1 - Fill the inline buckets,
759 * 2 - fixup the number (and this point the inline buckets are
761 * 3 - free the outline buckets
763 load_balance_fill_buckets(lb, nhs,
764 lb->lb_buckets_inline,
766 CLIB_MEMORY_BARRIER();
767 load_balance_set_n_buckets(lb, n_buckets);
768 CLIB_MEMORY_BARRIER();
770 vec_foreach(tmp_dpo, lb->lb_buckets)
774 vec_free(lb->lb_buckets);
779 * not crossing the threshold.
780 * 1 - update the number to the smaller size
781 * 2 - write the new buckets
782 * 3 - reset those no longer used.
787 old_n_buckets = lb->lb_n_buckets;
788 buckets = load_balance_get_buckets(lb);
790 load_balance_set_n_buckets(lb, n_buckets);
791 CLIB_MEMORY_BARRIER();
793 load_balance_fill_buckets(lb, nhs, buckets,
796 for (ii = n_buckets; ii < old_n_buckets; ii++)
798 dpo_reset(&buckets[ii]);
804 vec_foreach (nh, nhs)
806 dpo_reset(&nh->path_dpo);
811 load_balance_map_unlock(old_lbmi);
815 load_balance_lock (dpo_id_t *dpo)
819 lb = load_balance_get(dpo->dpoi_index);
825 load_balance_destroy (load_balance_t *lb)
830 buckets = load_balance_get_buckets(lb);
832 for (i = 0; i < lb->lb_n_buckets; i++)
834 dpo_reset(&buckets[i]);
837 LB_DBG(lb, "destroy");
838 if (!LB_HAS_INLINE_BUCKETS(lb))
840 vec_free(lb->lb_buckets);
843 fib_urpf_list_unlock(lb->lb_urpf);
844 load_balance_map_unlock(lb->lb_map);
846 pool_put(load_balance_pool, lb);
850 load_balance_unlock (dpo_id_t *dpo)
854 lb = load_balance_get(dpo->dpoi_index);
858 if (0 == lb->lb_locks)
860 load_balance_destroy(lb);
865 load_balance_mem_show (void)
867 fib_show_memory_usage("load-balance",
868 pool_elts(load_balance_pool),
869 pool_len(load_balance_pool),
870 sizeof(load_balance_t));
871 load_balance_map_show_mem();
874 const static dpo_vft_t lb_vft = {
875 .dv_lock = load_balance_lock,
876 .dv_unlock = load_balance_unlock,
877 .dv_format = format_load_balance_dpo,
878 .dv_mem_show = load_balance_mem_show,
882 * @brief The per-protocol VLIB graph nodes that are assigned to a load-balance
885 * this means that these graph nodes are ones from which a load-balance is the
886 * parent object in the DPO-graph.
888 * We do not list all the load-balance nodes, such as the *-lookup. instead
889 * we are relying on the correct use of the .sibling_of field when setting
890 * up these sibling nodes.
892 const static char* const load_balance_ip4_nodes[] =
897 const static char* const load_balance_ip6_nodes[] =
902 const static char* const load_balance_mpls_nodes[] =
907 const static char* const load_balance_l2_nodes[] =
912 const static char* const load_balance_nsh_nodes[] =
917 const static char* const load_balance_bier_nodes[] =
922 const static char* const * const load_balance_nodes[DPO_PROTO_NUM] =
924 [DPO_PROTO_IP4] = load_balance_ip4_nodes,
925 [DPO_PROTO_IP6] = load_balance_ip6_nodes,
926 [DPO_PROTO_MPLS] = load_balance_mpls_nodes,
927 [DPO_PROTO_ETHERNET] = load_balance_l2_nodes,
928 [DPO_PROTO_NSH] = load_balance_nsh_nodes,
929 [DPO_PROTO_BIER] = load_balance_bier_nodes,
933 load_balance_module_init (void)
937 dpo_register(DPO_LOAD_BALANCE, &lb_vft, load_balance_nodes);
940 * Special LB with index zero. we need to define this since the v4 mtrie
941 * assumes an index of 0 implies the ply is empty. therefore all 'real'
942 * adjs need a non-zero index.
943 * This should never be used, but just in case, stack it on a drop.
945 lbi = load_balance_create(1, DPO_PROTO_IP4, 0);
946 load_balance_set_bucket(lbi, 0, drop_dpo_get(DPO_PROTO_IP4));
948 load_balance_logger =
949 vlib_log_register_class("dpo", "load-balance");
951 load_balance_map_module_init();
954 static clib_error_t *
955 load_balance_show (vlib_main_t * vm,
956 unformat_input_t * input,
957 vlib_cli_command_t * cmd)
959 index_t lbi = INDEX_INVALID;
961 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
963 if (unformat (input, "%d", &lbi))
969 if (INDEX_INVALID != lbi)
971 if (pool_is_free_index(load_balance_pool, lbi))
973 vlib_cli_output (vm, "no such load-balance:%d", lbi);
977 vlib_cli_output (vm, "%U", format_load_balance, lbi,
978 LOAD_BALANCE_FORMAT_DETAIL);
985 pool_foreach(lb, load_balance_pool,
987 vlib_cli_output (vm, "%U", format_load_balance,
988 load_balance_get_index(lb),
989 LOAD_BALANCE_FORMAT_NONE);
996 VLIB_CLI_COMMAND (load_balance_show_command, static) = {
997 .path = "show load-balance",
998 .short_help = "show load-balance [<index>]",
999 .function = load_balance_show,
1004 ip_flow_hash (void *data)
1006 ip4_header_t *iph = (ip4_header_t *) data;
1008 if ((iph->ip_version_and_header_length & 0xF0) == 0x40)
1009 return ip4_compute_flow_hash (iph, IP_FLOW_HASH_DEFAULT);
1011 return ip6_compute_flow_hash ((ip6_header_t *) iph, IP_FLOW_HASH_DEFAULT);
1017 return (*((u64 *) m) & 0xffffffffffff);
1021 l2_flow_hash (vlib_buffer_t * b0)
1023 ethernet_header_t *eh;
1025 uword is_ip, eh_size;
1028 eh = vlib_buffer_get_current (b0);
1029 eh_type = clib_net_to_host_u16 (eh->type);
1030 eh_size = ethernet_buffer_header_size (b0);
1032 is_ip = (eh_type == ETHERNET_TYPE_IP4 || eh_type == ETHERNET_TYPE_IP6);
1034 /* since we have 2 cache lines, use them */
1036 a = ip_flow_hash ((u8 *) vlib_buffer_get_current (b0) + eh_size);
1040 b = mac_to_u64 ((u8 *) eh->dst_address);
1041 c = mac_to_u64 ((u8 *) eh->src_address);
1042 hash_mix64 (a, b, c);
1047 typedef struct load_balance_trace_t_
1050 } load_balance_trace_t;
1053 load_balance_inline (vlib_main_t * vm,
1054 vlib_node_runtime_t * node,
1055 vlib_frame_t * frame,
1058 u32 n_left_from, next_index, *from, *to_next;
1060 from = vlib_frame_vector_args (frame);
1061 n_left_from = frame->n_vectors;
1063 next_index = node->cached_next_index;
1065 while (n_left_from > 0)
1069 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1071 while (n_left_from > 0 && n_left_to_next > 0)
1074 u32 bi0, lbi0, next0;
1075 const dpo_id_t *dpo0;
1076 const load_balance_t *lb0;
1083 n_left_to_next -= 1;
1085 b0 = vlib_get_buffer (vm, bi0);
1087 /* lookup dst + src mac */
1088 lbi0 = vnet_buffer (b0)->ip.adj_index[VLIB_TX];
1089 lb0 = load_balance_get(lbi0);
1093 vnet_buffer(b0)->ip.flow_hash = l2_flow_hash(b0);
1098 const bier_hdr_t *bh0 = vlib_buffer_get_current(b0);
1099 vnet_buffer(b0)->ip.flow_hash = bier_compute_flow_hash(bh0);
1102 dpo0 = load_balance_get_bucket_i(lb0,
1103 vnet_buffer(b0)->ip.flow_hash &
1104 (lb0->lb_n_buckets_minus_1));
1106 next0 = dpo0->dpoi_next_node;
1107 vnet_buffer (b0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
1109 if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
1111 load_balance_trace_t *tr = vlib_add_trace (vm, node, b0,
1113 tr->lb_index = lbi0;
1115 vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
1116 n_left_to_next, bi0, next0);
1119 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1122 return frame->n_vectors;
1126 l2_load_balance (vlib_main_t * vm,
1127 vlib_node_runtime_t * node,
1128 vlib_frame_t * frame)
1130 return (load_balance_inline(vm, node, frame, 1));
1134 format_l2_load_balance_trace (u8 * s, va_list * args)
1136 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1137 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1138 load_balance_trace_t *t = va_arg (*args, load_balance_trace_t *);
1140 s = format (s, "L2-load-balance: index %d", t->lb_index);
1147 VLIB_REGISTER_NODE (l2_load_balance_node) = {
1148 .function = l2_load_balance,
1149 .name = "l2-load-balance",
1150 .vector_size = sizeof (u32),
1152 .format_trace = format_l2_load_balance_trace,
1160 nsh_load_balance (vlib_main_t * vm,
1161 vlib_node_runtime_t * node,
1162 vlib_frame_t * frame)
1164 u32 n_left_from, next_index, *from, *to_next;
1166 from = vlib_frame_vector_args (frame);
1167 n_left_from = frame->n_vectors;
1169 next_index = node->cached_next_index;
1171 while (n_left_from > 0)
1175 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1177 while (n_left_from > 0 && n_left_to_next > 0)
1180 u32 bi0, lbi0, next0, *nsh0;
1181 const dpo_id_t *dpo0;
1182 const load_balance_t *lb0;
1189 n_left_to_next -= 1;
1191 b0 = vlib_get_buffer (vm, bi0);
1193 lbi0 = vnet_buffer (b0)->ip.adj_index[VLIB_TX];
1194 lb0 = load_balance_get(lbi0);
1196 /* SPI + SI are the second word of the NSH header */
1197 nsh0 = vlib_buffer_get_current (b0);
1198 vnet_buffer(b0)->ip.flow_hash = nsh0[1] % lb0->lb_n_buckets;
1200 dpo0 = load_balance_get_bucket_i(lb0,
1201 vnet_buffer(b0)->ip.flow_hash &
1202 (lb0->lb_n_buckets_minus_1));
1204 next0 = dpo0->dpoi_next_node;
1205 vnet_buffer (b0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
1207 if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
1209 load_balance_trace_t *tr = vlib_add_trace (vm, node, b0,
1211 tr->lb_index = lbi0;
1213 vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
1214 n_left_to_next, bi0, next0);
1217 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1220 return frame->n_vectors;
1224 format_nsh_load_balance_trace (u8 * s, va_list * args)
1226 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1227 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1228 load_balance_trace_t *t = va_arg (*args, load_balance_trace_t *);
1230 s = format (s, "NSH-load-balance: index %d", t->lb_index);
1237 VLIB_REGISTER_NODE (nsh_load_balance_node) = {
1238 .function = nsh_load_balance,
1239 .name = "nsh-load-balance",
1240 .vector_size = sizeof (u32),
1242 .format_trace = format_nsh_load_balance_trace,
1250 format_bier_load_balance_trace (u8 * s, va_list * args)
1252 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1253 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1254 load_balance_trace_t *t = va_arg (*args, load_balance_trace_t *);
1256 s = format (s, "BIER-load-balance: index %d", t->lb_index);
1261 bier_load_balance (vlib_main_t * vm,
1262 vlib_node_runtime_t * node,
1263 vlib_frame_t * frame)
1265 return (load_balance_inline(vm, node, frame, 0));
1271 VLIB_REGISTER_NODE (bier_load_balance_node) = {
1272 .function = bier_load_balance,
1273 .name = "bier-load-balance",
1274 .vector_size = sizeof (u32),
1276 .format_trace = format_bier_load_balance_trace,
1277 .sibling_of = "mpls-load-balance",