2 * Copyright (c) 2016 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
16 #include <vnet/ip/lookup.h>
17 #include <vnet/dpo/replicate_dpo.h>
18 #include <vnet/dpo/drop_dpo.h>
19 #include <vnet/dpo/receive_dpo.h>
20 #include <vnet/adj/adj.h>
21 #include <vnet/mpls/mpls_types.h>
26 vlib_log_class_t replicate_logger;
28 #define REP_DBG(_rep, _fmt, _args...) \
30 vlib_log_debug(replicate_logger, \
33 replicate_get_index(_rep), \
34 REPLICATE_FORMAT_NONE, \
38 #define foreach_replicate_dpo_error \
39 _(BUFFER_ALLOCATION_FAILURE, "Buffer Allocation Failure")
42 #define _(sym,str) REPLICATE_DPO_ERROR_##sym,
43 foreach_replicate_dpo_error
45 REPLICATE_DPO_N_ERROR,
46 } replicate_dpo_error_t;
48 static char * replicate_dpo_error_strings[] = {
49 #define _(sym,string) string,
50 foreach_replicate_dpo_error
55 * Pool of all DPOs. It's not static so the DP can have fast access
57 replicate_t *replicate_pool;
60 * The one instance of replicate main
62 replicate_main_t replicate_main = {
65 .stat_segment_name = "/net/mroute",
70 replicate_get_index (const replicate_t *rep)
72 return (rep - replicate_pool);
75 static inline dpo_id_t*
76 replicate_get_buckets (replicate_t *rep)
78 if (REP_HAS_INLINE_BUCKETS(rep))
80 return (rep->rep_buckets_inline);
84 return (rep->rep_buckets);
89 replicate_alloc_i (void)
93 pool_get_aligned(replicate_pool, rep, CLIB_CACHE_LINE_BYTES);
94 clib_memset(rep, 0, sizeof(*rep));
96 vlib_validate_combined_counter(&(replicate_main.repm_counters),
97 replicate_get_index(rep));
98 vlib_zero_combined_counter(&(replicate_main.repm_counters),
99 replicate_get_index(rep));
105 format_replicate_flags (u8 *s, va_list *args)
107 int flags = va_arg (*args, int);
109 if (flags == REPLICATE_FLAGS_NONE)
111 s = format (s, "none");
113 else if (flags & REPLICATE_FLAGS_HAS_LOCAL)
115 s = format (s, "has-local ");
122 replicate_format (index_t repi,
123 replicate_format_flags_t flags,
132 repi &= ~MPLS_IS_REPLICATE;
133 rep = replicate_get(repi);
134 vlib_get_combined_counter(&(replicate_main.repm_counters), repi, &to);
135 buckets = replicate_get_buckets(rep);
137 s = format(s, "%U: ", format_dpo_type, DPO_REPLICATE);
138 s = format(s, "[index:%d buckets:%d ", repi, rep->rep_n_buckets);
139 s = format(s, "flags:[%U] ", format_replicate_flags, rep->rep_flags);
140 s = format(s, "to:[%Ld:%Ld]]", to.packets, to.bytes);
142 for (i = 0; i < rep->rep_n_buckets; i++)
144 s = format(s, "\n%U", format_white_space, indent+2);
145 s = format(s, "[%d]", i);
146 s = format(s, " %U", format_dpo_id, &buckets[i], indent+6);
152 format_replicate (u8 * s, va_list * args)
154 index_t repi = va_arg(*args, index_t);
155 replicate_format_flags_t flags = va_arg(*args, replicate_format_flags_t);
157 return (replicate_format(repi, flags, 0, s));
160 format_replicate_dpo (u8 * s, va_list * args)
162 index_t repi = va_arg(*args, index_t);
163 u32 indent = va_arg(*args, u32);
165 return (replicate_format(repi, REPLICATE_FORMAT_DETAIL, indent, s));
170 replicate_create_i (u32 num_buckets,
171 dpo_proto_t rep_proto)
175 ASSERT (num_buckets <= REP_MAX_BUCKETS);
177 rep = replicate_alloc_i();
178 rep->rep_n_buckets = num_buckets;
179 rep->rep_proto = rep_proto;
181 if (!REP_HAS_INLINE_BUCKETS(rep))
183 vec_validate_aligned(rep->rep_buckets,
184 rep->rep_n_buckets - 1,
185 CLIB_CACHE_LINE_BYTES);
188 REP_DBG(rep, "create");
194 replicate_create (u32 n_buckets,
195 dpo_proto_t rep_proto)
197 return (replicate_get_index(replicate_create_i(n_buckets, rep_proto)));
201 replicate_set_bucket_i (replicate_t *rep,
204 const dpo_id_t *next)
206 if (dpo_is_receive(&buckets[bucket]))
208 rep->rep_flags &= ~REPLICATE_FLAGS_HAS_LOCAL;
210 if (dpo_is_receive(next))
212 rep->rep_flags |= REPLICATE_FLAGS_HAS_LOCAL;
214 dpo_stack(DPO_REPLICATE, rep->rep_proto, &buckets[bucket], next);
218 replicate_set_bucket (index_t repi,
220 const dpo_id_t *next)
225 repi &= ~MPLS_IS_REPLICATE;
226 rep = replicate_get(repi);
227 buckets = replicate_get_buckets(rep);
229 ASSERT(bucket < rep->rep_n_buckets);
231 replicate_set_bucket_i(rep, bucket, buckets, next);
235 replicate_is_drop (const dpo_id_t *dpo)
240 if (DPO_REPLICATE != dpo->dpoi_type)
243 repi = dpo->dpoi_index & ~MPLS_IS_REPLICATE;
244 rep = replicate_get(repi);
246 if (1 == rep->rep_n_buckets)
248 return (dpo_is_drop(replicate_get_bucket_i(rep, 0)));
254 replicate_get_bucket (index_t repi,
259 repi &= ~MPLS_IS_REPLICATE;
260 rep = replicate_get(repi);
262 return (replicate_get_bucket_i(rep, bucket));
266 static load_balance_path_t *
267 replicate_multipath_next_hop_fixup (load_balance_path_t *nhs,
268 dpo_proto_t drop_proto)
270 if (0 == vec_len(nhs))
272 load_balance_path_t *nh;
275 * we need something for the replicate. so use the drop
277 vec_add2(nhs, nh, 1);
280 dpo_copy(&nh->path_dpo, drop_dpo_get(drop_proto));
287 * Fill in adjacencies in block based on corresponding
288 * next hop adjacencies.
291 replicate_fill_buckets (replicate_t *rep,
292 load_balance_path_t *nhs,
296 load_balance_path_t * nh;
302 * the next-hops have normalised weights. that means their sum is the number
303 * of buckets we need to fill.
305 vec_foreach (nh, nhs)
307 ASSERT(bucket < n_buckets);
308 replicate_set_bucket_i(rep, bucket++, buckets, &nh->path_dpo);
313 replicate_set_n_buckets (replicate_t *rep,
316 ASSERT (n_buckets <= REP_MAX_BUCKETS);
317 rep->rep_n_buckets = n_buckets;
321 replicate_multipath_update (const dpo_id_t *dpo,
322 load_balance_path_t * next_hops)
324 load_balance_path_t * nh, * nhs;
330 ASSERT(DPO_REPLICATE == dpo->dpoi_type);
331 repi = dpo->dpoi_index & ~MPLS_IS_REPLICATE;
332 rep = replicate_get(repi);
333 nhs = replicate_multipath_next_hop_fixup(next_hops,
335 n_buckets = vec_len(nhs);
337 if (n_buckets > REP_MAX_BUCKETS)
339 vlib_log_err (replicate_logger,
340 "Too many paths for replicate, truncating %d -> %d",
341 n_buckets, REP_MAX_BUCKETS);
342 for (int i = REP_MAX_BUCKETS; i < n_buckets; i++)
343 dpo_reset (&vec_elt (nhs, i).path_dpo);
344 vec_set_len (nhs, REP_MAX_BUCKETS);
345 n_buckets = REP_MAX_BUCKETS;
348 if (0 == rep->rep_n_buckets)
351 * first time initialisation. no packets inflight, so we can write
354 replicate_set_n_buckets(rep, n_buckets);
356 if (!REP_HAS_INLINE_BUCKETS(rep))
357 vec_validate_aligned(rep->rep_buckets,
358 rep->rep_n_buckets - 1,
359 CLIB_CACHE_LINE_BYTES);
361 replicate_fill_buckets(rep, nhs,
362 replicate_get_buckets(rep),
368 * This is a modification of an existing replicate.
369 * We need to ensure that packets in flight see a consistent state, that
370 * is the number of reported buckets the REP has
371 * is not more than it actually has. So if the
372 * number of buckets is increasing, we must update the bucket array first,
373 * then the reported number. vice-versa if the number of buckets goes down.
375 if (n_buckets == rep->rep_n_buckets)
378 * no change in the number of buckets. we can simply fill what
379 * is new over what is old.
381 replicate_fill_buckets(rep, nhs,
382 replicate_get_buckets(rep),
385 else if (n_buckets > rep->rep_n_buckets)
388 * we have more buckets. the old replicate map (if there is one)
389 * will remain valid, i.e. mapping to indices within range, so we
392 if (n_buckets > REP_NUM_INLINE_BUCKETS &&
393 rep->rep_n_buckets <= REP_NUM_INLINE_BUCKETS)
396 * the new increased number of buckets is crossing the threshold
397 * from the inline storage to out-line. Alloc the outline buckets
398 * first, then fixup the number. then reset the inlines.
400 ASSERT(NULL == rep->rep_buckets);
401 vec_validate_aligned(rep->rep_buckets,
403 CLIB_CACHE_LINE_BYTES);
405 replicate_fill_buckets(rep, nhs,
408 CLIB_MEMORY_BARRIER();
409 replicate_set_n_buckets(rep, n_buckets);
411 CLIB_MEMORY_BARRIER();
413 for (ii = 0; ii < REP_NUM_INLINE_BUCKETS; ii++)
415 dpo_reset(&rep->rep_buckets_inline[ii]);
420 if (n_buckets <= REP_NUM_INLINE_BUCKETS)
423 * we are not crossing the threshold and it's still inline buckets.
424 * we can write the new on the old..
426 replicate_fill_buckets(rep, nhs,
427 replicate_get_buckets(rep),
429 CLIB_MEMORY_BARRIER();
430 replicate_set_n_buckets(rep, n_buckets);
435 * we are not crossing the threshold. We need a new bucket array to
436 * hold the increased number of choices.
438 dpo_id_t *new_buckets, *old_buckets, *tmp_dpo;
441 old_buckets = replicate_get_buckets(rep);
443 vec_validate_aligned(new_buckets,
445 CLIB_CACHE_LINE_BYTES);
447 replicate_fill_buckets(rep, nhs, new_buckets, n_buckets);
448 CLIB_MEMORY_BARRIER();
449 rep->rep_buckets = new_buckets;
450 CLIB_MEMORY_BARRIER();
451 replicate_set_n_buckets(rep, n_buckets);
453 vec_foreach(tmp_dpo, old_buckets)
457 vec_free(old_buckets);
464 * bucket size shrinkage.
466 if (n_buckets <= REP_NUM_INLINE_BUCKETS &&
467 rep->rep_n_buckets > REP_NUM_INLINE_BUCKETS)
470 * the new decreased number of buckets is crossing the threshold
471 * from out-line storage to inline:
472 * 1 - Fill the inline buckets,
473 * 2 - fixup the number (and this point the inline buckets are
475 * 3 - free the outline buckets
477 replicate_fill_buckets(rep, nhs,
478 rep->rep_buckets_inline,
480 CLIB_MEMORY_BARRIER();
481 replicate_set_n_buckets(rep, n_buckets);
482 CLIB_MEMORY_BARRIER();
484 vec_foreach(tmp_dpo, rep->rep_buckets)
488 vec_free(rep->rep_buckets);
493 * not crossing the threshold.
494 * 1 - update the number to the smaller size
495 * 2 - write the new buckets
496 * 3 - reset those no longer used.
501 old_n_buckets = rep->rep_n_buckets;
502 buckets = replicate_get_buckets(rep);
504 replicate_set_n_buckets(rep, n_buckets);
505 CLIB_MEMORY_BARRIER();
507 replicate_fill_buckets(rep, nhs,
511 for (ii = n_buckets; ii < old_n_buckets; ii++)
513 dpo_reset(&buckets[ii]);
519 vec_foreach (nh, nhs)
521 dpo_reset(&nh->path_dpo);
527 replicate_lock (dpo_id_t *dpo)
531 rep = replicate_get(dpo->dpoi_index);
537 replicate_dup (replicate_flags_t flags,
540 replicate_t *rep, *copy;
542 rep = replicate_get(repi);
544 if (rep->rep_flags == flags ||
545 flags & REPLICATE_FLAGS_HAS_LOCAL)
548 * we can include all the buckets from the original in the copy
555 * caller doesn't want the local paths that the original has
557 if (rep->rep_n_buckets == 1)
560 * original has only one bucket that is the local, so create
561 * a new one with only the drop
563 copy = replicate_create_i (1, rep->rep_proto);
565 replicate_set_bucket_i(copy, 0,
566 replicate_get_buckets(copy),
567 drop_dpo_get(rep->rep_proto));
571 dpo_id_t *old_buckets, *copy_buckets;
574 copy = replicate_create_i(rep->rep_n_buckets - 1,
577 rep = replicate_get(repi);
578 old_buckets = replicate_get_buckets(rep);
579 copy_buckets = replicate_get_buckets(copy);
582 for (bucket = 0; bucket < rep->rep_n_buckets; bucket++)
584 if (!dpo_is_receive(&old_buckets[bucket]))
586 replicate_set_bucket_i(copy, pos, copy_buckets,
587 (&old_buckets[bucket]));
594 return (replicate_get_index(copy));
598 replicate_destroy (replicate_t *rep)
603 buckets = replicate_get_buckets(rep);
605 for (i = 0; i < rep->rep_n_buckets; i++)
607 dpo_reset(&buckets[i]);
610 REP_DBG(rep, "destroy");
611 if (!REP_HAS_INLINE_BUCKETS(rep))
613 vec_free(rep->rep_buckets);
616 pool_put(replicate_pool, rep);
620 replicate_unlock (dpo_id_t *dpo)
624 rep = replicate_get(dpo->dpoi_index);
628 if (0 == rep->rep_locks)
630 replicate_destroy(rep);
635 replicate_mem_show (void)
637 fib_show_memory_usage("replicate",
638 pool_elts(replicate_pool),
639 pool_len(replicate_pool),
640 sizeof(replicate_t));
643 const static dpo_vft_t rep_vft = {
644 .dv_lock = replicate_lock,
645 .dv_unlock = replicate_unlock,
646 .dv_format = format_replicate_dpo,
647 .dv_mem_show = replicate_mem_show,
651 * @brief The per-protocol VLIB graph nodes that are assigned to a replicate
654 * this means that these graph nodes are ones from which a replicate is the
655 * parent object in the DPO-graph.
657 const static char* const replicate_ip4_nodes[] =
662 const static char* const replicate_ip6_nodes[] =
667 const static char* const replicate_mpls_nodes[] =
673 const static char* const * const replicate_nodes[DPO_PROTO_NUM] =
675 [DPO_PROTO_IP4] = replicate_ip4_nodes,
676 [DPO_PROTO_IP6] = replicate_ip6_nodes,
677 [DPO_PROTO_MPLS] = replicate_mpls_nodes,
681 replicate_module_init (void)
683 dpo_register(DPO_REPLICATE, &rep_vft, replicate_nodes);
684 replicate_logger = vlib_log_register_class("dpo", "replicate");
687 static clib_error_t *
688 replicate_show (vlib_main_t * vm,
689 unformat_input_t * input,
690 vlib_cli_command_t * cmd)
692 index_t repi = INDEX_INVALID;
694 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
696 if (unformat (input, "%d", &repi))
702 if (INDEX_INVALID != repi)
704 if (pool_is_free_index (replicate_pool, repi))
705 vlib_cli_output (vm, "no such index %d", repi);
707 vlib_cli_output (vm, "%U", format_replicate, repi,
708 REPLICATE_FORMAT_DETAIL);
714 pool_foreach (rep, replicate_pool)
716 vlib_cli_output (vm, "%U", format_replicate,
717 replicate_get_index(rep),
718 REPLICATE_FORMAT_NONE);
725 VLIB_CLI_COMMAND (replicate_show_command, static) = {
726 .path = "show replicate",
727 .short_help = "show replicate [<index>]",
728 .function = replicate_show,
731 typedef struct replicate_trace_t_
738 replicate_inline (vlib_main_t * vm,
739 vlib_node_runtime_t * node,
740 vlib_frame_t * frame)
742 vlib_combined_counter_main_t * cm = &replicate_main.repm_counters;
743 replicate_main_t * rm = &replicate_main;
744 u32 n_left_from, * from, * to_next, next_index;
745 u32 thread_index = vlib_get_thread_index();
747 from = vlib_frame_vector_args (frame);
748 n_left_from = frame->n_vectors;
749 next_index = node->cached_next_index;
751 while (n_left_from > 0)
755 vlib_get_next_frame (vm, node, next_index,
756 to_next, n_left_to_next);
758 while (n_left_from > 0 && n_left_to_next > 0)
760 u32 next0, ci0, bi0, bucket, repi0;
761 const replicate_t *rep0;
762 vlib_buffer_t * b0, *c0;
763 const dpo_id_t *dpo0;
770 b0 = vlib_get_buffer (vm, bi0);
771 repi0 = vnet_buffer (b0)->ip.adj_index[VLIB_TX];
772 rep0 = replicate_get(repi0);
774 vlib_increment_combined_counter(
775 cm, thread_index, repi0, 1,
776 vlib_buffer_length_in_chain(vm, b0));
778 vec_validate (rm->clones[thread_index], rep0->rep_n_buckets - 1);
780 num_cloned = vlib_buffer_clone (vm, bi0, rm->clones[thread_index],
782 VLIB_BUFFER_CLONE_HEAD_SIZE);
784 if (num_cloned != rep0->rep_n_buckets)
786 vlib_node_increment_counter
787 (vm, node->node_index,
788 REPLICATE_DPO_ERROR_BUFFER_ALLOCATION_FAILURE, 1);
791 for (bucket = 0; bucket < num_cloned; bucket++)
793 ci0 = rm->clones[thread_index][bucket];
794 c0 = vlib_get_buffer(vm, ci0);
800 dpo0 = replicate_get_bucket_i(rep0, bucket);
801 next0 = dpo0->dpoi_next_node;
802 vnet_buffer (c0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
804 if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
806 replicate_trace_t *t;
808 t = vlib_add_trace (vm, node, c0, sizeof (*t));
809 t->rep_index = repi0;
813 vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
814 to_next, n_left_to_next,
816 if (PREDICT_FALSE (n_left_to_next == 0))
818 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
819 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
822 vec_reset_length (rm->clones[thread_index]);
825 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
828 return frame->n_vectors;
832 format_replicate_trace (u8 * s, va_list * args)
834 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
835 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
836 replicate_trace_t *t = va_arg (*args, replicate_trace_t *);
838 s = format (s, "replicate: %d via %U",
840 format_dpo_id, &t->dpo, 0);
845 ip4_replicate (vlib_main_t * vm,
846 vlib_node_runtime_t * node,
847 vlib_frame_t * frame)
849 return (replicate_inline (vm, node, frame));
853 * @brief IP4 replication node
855 VLIB_REGISTER_NODE (ip4_replicate_node) = {
856 .function = ip4_replicate,
857 .name = "ip4-replicate",
858 .vector_size = sizeof (u32),
860 .n_errors = ARRAY_LEN(replicate_dpo_error_strings),
861 .error_strings = replicate_dpo_error_strings,
863 .format_trace = format_replicate_trace,
871 ip6_replicate (vlib_main_t * vm,
872 vlib_node_runtime_t * node,
873 vlib_frame_t * frame)
875 return (replicate_inline (vm, node, frame));
879 * @brief IPv6 replication node
881 VLIB_REGISTER_NODE (ip6_replicate_node) = {
882 .function = ip6_replicate,
883 .name = "ip6-replicate",
884 .vector_size = sizeof (u32),
886 .n_errors = ARRAY_LEN(replicate_dpo_error_strings),
887 .error_strings = replicate_dpo_error_strings,
889 .format_trace = format_replicate_trace,
897 mpls_replicate (vlib_main_t * vm,
898 vlib_node_runtime_t * node,
899 vlib_frame_t * frame)
901 return (replicate_inline (vm, node, frame));
905 * @brief MPLS replication node
907 VLIB_REGISTER_NODE (mpls_replicate_node) = {
908 .function = mpls_replicate,
909 .name = "mpls-replicate",
910 .vector_size = sizeof (u32),
912 .n_errors = ARRAY_LEN(replicate_dpo_error_strings),
913 .error_strings = replicate_dpo_error_strings,
915 .format_trace = format_replicate_trace,
923 replicate_dpo_init (vlib_main_t * vm)
925 replicate_main_t * rm = &replicate_main;
927 vec_validate (rm->clones, vlib_num_workers());
932 VLIB_INIT_FUNCTION (replicate_dpo_init);