2 * Copyright (c) 2016 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
16 #include <vnet/ip/lookup.h>
17 #include <vnet/dpo/replicate_dpo.h>
18 #include <vnet/dpo/drop_dpo.h>
19 #include <vnet/dpo/receive_dpo.h>
20 #include <vnet/adj/adj.h>
21 #include <vnet/mpls/mpls_types.h>
26 vlib_log_class_t replicate_logger;
28 #define REP_DBG(_rep, _fmt, _args...) \
30 vlib_log_debug(replicate_logger, \
33 replicate_get_index(_rep), \
34 REPLICATE_FORMAT_NONE, \
38 #define foreach_replicate_dpo_error \
39 _(BUFFER_ALLOCATION_FAILURE, "Buffer Allocation Failure")
42 #define _(sym,str) REPLICATE_DPO_ERROR_##sym,
43 foreach_replicate_dpo_error
45 REPLICATE_DPO_N_ERROR,
46 } replicate_dpo_error_t;
48 static char * replicate_dpo_error_strings[] = {
49 #define _(sym,string) string,
50 foreach_replicate_dpo_error
55 * Pool of all DPOs. It's not static so the DP can have fast access
57 replicate_t *replicate_pool;
60 * The one instance of replicate main
62 replicate_main_t replicate_main = {
65 .stat_segment_name = "/net/mroute",
70 replicate_get_index (const replicate_t *rep)
72 return (rep - replicate_pool);
75 static inline dpo_id_t*
76 replicate_get_buckets (replicate_t *rep)
78 if (REP_HAS_INLINE_BUCKETS(rep))
80 return (rep->rep_buckets_inline);
84 return (rep->rep_buckets);
89 replicate_alloc_i (void)
93 pool_get_aligned(replicate_pool, rep, CLIB_CACHE_LINE_BYTES);
94 clib_memset(rep, 0, sizeof(*rep));
96 vlib_validate_combined_counter(&(replicate_main.repm_counters),
97 replicate_get_index(rep));
98 vlib_zero_combined_counter(&(replicate_main.repm_counters),
99 replicate_get_index(rep));
105 format_replicate_flags (u8 *s, va_list *args)
107 int flags = va_arg (*args, int);
109 if (flags == REPLICATE_FLAGS_NONE)
111 s = format (s, "none");
113 else if (flags & REPLICATE_FLAGS_HAS_LOCAL)
115 s = format (s, "has-local ");
122 replicate_format (index_t repi,
123 replicate_format_flags_t flags,
132 repi &= ~MPLS_IS_REPLICATE;
133 rep = replicate_get(repi);
134 vlib_get_combined_counter(&(replicate_main.repm_counters), repi, &to);
135 buckets = replicate_get_buckets(rep);
137 s = format(s, "%U: ", format_dpo_type, DPO_REPLICATE);
138 s = format(s, "[index:%d buckets:%d ", repi, rep->rep_n_buckets);
139 s = format(s, "flags:[%U] ", format_replicate_flags, rep->rep_flags);
140 s = format(s, "to:[%Ld:%Ld]]", to.packets, to.bytes);
142 for (i = 0; i < rep->rep_n_buckets; i++)
144 s = format(s, "\n%U", format_white_space, indent+2);
145 s = format(s, "[%d]", i);
146 s = format(s, " %U", format_dpo_id, &buckets[i], indent+6);
152 format_replicate (u8 * s, va_list * args)
154 index_t repi = va_arg(*args, index_t);
155 replicate_format_flags_t flags = va_arg(*args, replicate_format_flags_t);
157 return (replicate_format(repi, flags, 0, s));
160 format_replicate_dpo (u8 * s, va_list * args)
162 index_t repi = va_arg(*args, index_t);
163 u32 indent = va_arg(*args, u32);
165 return (replicate_format(repi, REPLICATE_FORMAT_DETAIL, indent, s));
170 replicate_create_i (u32 num_buckets,
171 dpo_proto_t rep_proto)
175 rep = replicate_alloc_i();
176 rep->rep_n_buckets = num_buckets;
177 rep->rep_proto = rep_proto;
179 if (!REP_HAS_INLINE_BUCKETS(rep))
181 vec_validate_aligned(rep->rep_buckets,
182 rep->rep_n_buckets - 1,
183 CLIB_CACHE_LINE_BYTES);
186 REP_DBG(rep, "create");
192 replicate_create (u32 n_buckets,
193 dpo_proto_t rep_proto)
195 return (replicate_get_index(replicate_create_i(n_buckets, rep_proto)));
199 replicate_set_bucket_i (replicate_t *rep,
202 const dpo_id_t *next)
204 if (dpo_is_receive(&buckets[bucket]))
206 rep->rep_flags &= ~REPLICATE_FLAGS_HAS_LOCAL;
208 if (dpo_is_receive(next))
210 rep->rep_flags |= REPLICATE_FLAGS_HAS_LOCAL;
212 dpo_stack(DPO_REPLICATE, rep->rep_proto, &buckets[bucket], next);
216 replicate_set_bucket (index_t repi,
218 const dpo_id_t *next)
223 repi &= ~MPLS_IS_REPLICATE;
224 rep = replicate_get(repi);
225 buckets = replicate_get_buckets(rep);
227 ASSERT(bucket < rep->rep_n_buckets);
229 replicate_set_bucket_i(rep, bucket, buckets, next);
233 replicate_is_drop (const dpo_id_t *dpo)
238 if (DPO_REPLICATE != dpo->dpoi_type)
241 repi = dpo->dpoi_index & ~MPLS_IS_REPLICATE;
242 rep = replicate_get(repi);
244 if (1 == rep->rep_n_buckets)
246 return (dpo_is_drop(replicate_get_bucket_i(rep, 0)));
252 replicate_get_bucket (index_t repi,
257 repi &= ~MPLS_IS_REPLICATE;
258 rep = replicate_get(repi);
260 return (replicate_get_bucket_i(rep, bucket));
264 static load_balance_path_t *
265 replicate_multipath_next_hop_fixup (load_balance_path_t *nhs,
266 dpo_proto_t drop_proto)
268 if (0 == vec_len(nhs))
270 load_balance_path_t *nh;
273 * we need something for the replicate. so use the drop
275 vec_add2(nhs, nh, 1);
278 dpo_copy(&nh->path_dpo, drop_dpo_get(drop_proto));
285 * Fill in adjacencies in block based on corresponding
286 * next hop adjacencies.
289 replicate_fill_buckets (replicate_t *rep,
290 load_balance_path_t *nhs,
294 load_balance_path_t * nh;
300 * the next-hops have normalised weights. that means their sum is the number
301 * of buckets we need to fill.
303 vec_foreach (nh, nhs)
305 ASSERT(bucket < n_buckets);
306 replicate_set_bucket_i(rep, bucket++, buckets, &nh->path_dpo);
311 replicate_set_n_buckets (replicate_t *rep,
314 rep->rep_n_buckets = n_buckets;
318 replicate_multipath_update (const dpo_id_t *dpo,
319 load_balance_path_t * next_hops)
321 load_balance_path_t * nh, * nhs;
327 ASSERT(DPO_REPLICATE == dpo->dpoi_type);
328 repi = dpo->dpoi_index & ~MPLS_IS_REPLICATE;
329 rep = replicate_get(repi);
330 nhs = replicate_multipath_next_hop_fixup(next_hops,
332 n_buckets = vec_len(nhs);
334 if (0 == rep->rep_n_buckets)
337 * first time initialisation. no packets inflight, so we can write
340 replicate_set_n_buckets(rep, n_buckets);
342 if (!REP_HAS_INLINE_BUCKETS(rep))
343 vec_validate_aligned(rep->rep_buckets,
344 rep->rep_n_buckets - 1,
345 CLIB_CACHE_LINE_BYTES);
347 replicate_fill_buckets(rep, nhs,
348 replicate_get_buckets(rep),
354 * This is a modification of an existing replicate.
355 * We need to ensure that packets in flight see a consistent state, that
356 * is the number of reported buckets the REP has
357 * is not more than it actually has. So if the
358 * number of buckets is increasing, we must update the bucket array first,
359 * then the reported number. vice-versa if the number of buckets goes down.
361 if (n_buckets == rep->rep_n_buckets)
364 * no change in the number of buckets. we can simply fill what
365 * is new over what is old.
367 replicate_fill_buckets(rep, nhs,
368 replicate_get_buckets(rep),
371 else if (n_buckets > rep->rep_n_buckets)
374 * we have more buckets. the old replicate map (if there is one)
375 * will remain valid, i.e. mapping to indices within range, so we
378 if (n_buckets > REP_NUM_INLINE_BUCKETS &&
379 rep->rep_n_buckets <= REP_NUM_INLINE_BUCKETS)
382 * the new increased number of buckets is crossing the threshold
383 * from the inline storage to out-line. Alloc the outline buckets
384 * first, then fixup the number. then reset the inlines.
386 ASSERT(NULL == rep->rep_buckets);
387 vec_validate_aligned(rep->rep_buckets,
389 CLIB_CACHE_LINE_BYTES);
391 replicate_fill_buckets(rep, nhs,
394 CLIB_MEMORY_BARRIER();
395 replicate_set_n_buckets(rep, n_buckets);
397 CLIB_MEMORY_BARRIER();
399 for (ii = 0; ii < REP_NUM_INLINE_BUCKETS; ii++)
401 dpo_reset(&rep->rep_buckets_inline[ii]);
406 if (n_buckets <= REP_NUM_INLINE_BUCKETS)
409 * we are not crossing the threshold and it's still inline buckets.
410 * we can write the new on the old..
412 replicate_fill_buckets(rep, nhs,
413 replicate_get_buckets(rep),
415 CLIB_MEMORY_BARRIER();
416 replicate_set_n_buckets(rep, n_buckets);
421 * we are not crossing the threshold. We need a new bucket array to
422 * hold the increased number of choices.
424 dpo_id_t *new_buckets, *old_buckets, *tmp_dpo;
427 old_buckets = replicate_get_buckets(rep);
429 vec_validate_aligned(new_buckets,
431 CLIB_CACHE_LINE_BYTES);
433 replicate_fill_buckets(rep, nhs, new_buckets, n_buckets);
434 CLIB_MEMORY_BARRIER();
435 rep->rep_buckets = new_buckets;
436 CLIB_MEMORY_BARRIER();
437 replicate_set_n_buckets(rep, n_buckets);
439 vec_foreach(tmp_dpo, old_buckets)
443 vec_free(old_buckets);
450 * bucket size shrinkage.
452 if (n_buckets <= REP_NUM_INLINE_BUCKETS &&
453 rep->rep_n_buckets > REP_NUM_INLINE_BUCKETS)
456 * the new decreased number of buckets is crossing the threshold
457 * from out-line storage to inline:
458 * 1 - Fill the inline buckets,
459 * 2 - fixup the number (and this point the inline buckets are
461 * 3 - free the outline buckets
463 replicate_fill_buckets(rep, nhs,
464 rep->rep_buckets_inline,
466 CLIB_MEMORY_BARRIER();
467 replicate_set_n_buckets(rep, n_buckets);
468 CLIB_MEMORY_BARRIER();
470 vec_foreach(tmp_dpo, rep->rep_buckets)
474 vec_free(rep->rep_buckets);
479 * not crossing the threshold.
480 * 1 - update the number to the smaller size
481 * 2 - write the new buckets
482 * 3 - reset those no longer used.
487 old_n_buckets = rep->rep_n_buckets;
488 buckets = replicate_get_buckets(rep);
490 replicate_set_n_buckets(rep, n_buckets);
491 CLIB_MEMORY_BARRIER();
493 replicate_fill_buckets(rep, nhs,
497 for (ii = n_buckets; ii < old_n_buckets; ii++)
499 dpo_reset(&buckets[ii]);
505 vec_foreach (nh, nhs)
507 dpo_reset(&nh->path_dpo);
513 replicate_lock (dpo_id_t *dpo)
517 rep = replicate_get(dpo->dpoi_index);
523 replicate_dup (replicate_flags_t flags,
526 replicate_t *rep, *copy;
528 rep = replicate_get(repi);
530 if (rep->rep_flags == flags ||
531 flags & REPLICATE_FLAGS_HAS_LOCAL)
534 * we can include all the buckets from the original in the copy
541 * caller doesn't want the local paths that the original has
543 if (rep->rep_n_buckets == 1)
546 * original has only one bucket that is the local, so create
547 * a new one with only the drop
549 copy = replicate_create_i (1, rep->rep_proto);
551 replicate_set_bucket_i(copy, 0,
552 replicate_get_buckets(copy),
553 drop_dpo_get(rep->rep_proto));
557 dpo_id_t *old_buckets, *copy_buckets;
560 copy = replicate_create_i(rep->rep_n_buckets - 1,
563 rep = replicate_get(repi);
564 old_buckets = replicate_get_buckets(rep);
565 copy_buckets = replicate_get_buckets(copy);
568 for (bucket = 0; bucket < rep->rep_n_buckets; bucket++)
570 if (!dpo_is_receive(&old_buckets[bucket]))
572 replicate_set_bucket_i(copy, pos, copy_buckets,
573 (&old_buckets[bucket]));
580 return (replicate_get_index(copy));
584 replicate_destroy (replicate_t *rep)
589 buckets = replicate_get_buckets(rep);
591 for (i = 0; i < rep->rep_n_buckets; i++)
593 dpo_reset(&buckets[i]);
596 REP_DBG(rep, "destroy");
597 if (!REP_HAS_INLINE_BUCKETS(rep))
599 vec_free(rep->rep_buckets);
602 pool_put(replicate_pool, rep);
606 replicate_unlock (dpo_id_t *dpo)
610 rep = replicate_get(dpo->dpoi_index);
614 if (0 == rep->rep_locks)
616 replicate_destroy(rep);
621 replicate_mem_show (void)
623 fib_show_memory_usage("replicate",
624 pool_elts(replicate_pool),
625 pool_len(replicate_pool),
626 sizeof(replicate_t));
629 const static dpo_vft_t rep_vft = {
630 .dv_lock = replicate_lock,
631 .dv_unlock = replicate_unlock,
632 .dv_format = format_replicate_dpo,
633 .dv_mem_show = replicate_mem_show,
637 * @brief The per-protocol VLIB graph nodes that are assigned to a replicate
640 * this means that these graph nodes are ones from which a replicate is the
641 * parent object in the DPO-graph.
643 const static char* const replicate_ip4_nodes[] =
648 const static char* const replicate_ip6_nodes[] =
653 const static char* const replicate_mpls_nodes[] =
659 const static char* const * const replicate_nodes[DPO_PROTO_NUM] =
661 [DPO_PROTO_IP4] = replicate_ip4_nodes,
662 [DPO_PROTO_IP6] = replicate_ip6_nodes,
663 [DPO_PROTO_MPLS] = replicate_mpls_nodes,
667 replicate_module_init (void)
669 dpo_register(DPO_REPLICATE, &rep_vft, replicate_nodes);
670 replicate_logger = vlib_log_register_class("dpo", "replicate");
673 static clib_error_t *
674 replicate_show (vlib_main_t * vm,
675 unformat_input_t * input,
676 vlib_cli_command_t * cmd)
678 index_t repi = INDEX_INVALID;
680 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
682 if (unformat (input, "%d", &repi))
688 if (INDEX_INVALID != repi)
690 vlib_cli_output (vm, "%U", format_replicate, repi,
691 REPLICATE_FORMAT_DETAIL);
697 pool_foreach(rep, replicate_pool,
699 vlib_cli_output (vm, "%U", format_replicate,
700 replicate_get_index(rep),
701 REPLICATE_FORMAT_NONE);
708 VLIB_CLI_COMMAND (replicate_show_command, static) = {
709 .path = "show replicate",
710 .short_help = "show replicate [<index>]",
711 .function = replicate_show,
714 typedef struct replicate_trace_t_
721 replicate_inline (vlib_main_t * vm,
722 vlib_node_runtime_t * node,
723 vlib_frame_t * frame)
725 vlib_combined_counter_main_t * cm = &replicate_main.repm_counters;
726 replicate_main_t * rm = &replicate_main;
727 u32 n_left_from, * from, * to_next, next_index;
728 u32 thread_index = vlib_get_thread_index();
730 from = vlib_frame_vector_args (frame);
731 n_left_from = frame->n_vectors;
732 next_index = node->cached_next_index;
734 while (n_left_from > 0)
738 vlib_get_next_frame (vm, node, next_index,
739 to_next, n_left_to_next);
741 while (n_left_from > 0 && n_left_to_next > 0)
743 u32 next0, ci0, bi0, bucket, repi0;
744 const replicate_t *rep0;
745 vlib_buffer_t * b0, *c0;
746 const dpo_id_t *dpo0;
753 b0 = vlib_get_buffer (vm, bi0);
754 repi0 = vnet_buffer (b0)->ip.adj_index[VLIB_TX];
755 rep0 = replicate_get(repi0);
757 vlib_increment_combined_counter(
758 cm, thread_index, repi0, 1,
759 vlib_buffer_length_in_chain(vm, b0));
761 vec_validate (rm->clones[thread_index], rep0->rep_n_buckets - 1);
763 num_cloned = vlib_buffer_clone (vm, bi0, rm->clones[thread_index],
765 VLIB_BUFFER_CLONE_HEAD_SIZE);
767 if (num_cloned != rep0->rep_n_buckets)
769 vlib_node_increment_counter
770 (vm, node->node_index,
771 REPLICATE_DPO_ERROR_BUFFER_ALLOCATION_FAILURE, 1);
774 for (bucket = 0; bucket < num_cloned; bucket++)
776 ci0 = rm->clones[thread_index][bucket];
777 c0 = vlib_get_buffer(vm, ci0);
783 dpo0 = replicate_get_bucket_i(rep0, bucket);
784 next0 = dpo0->dpoi_next_node;
785 vnet_buffer (c0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
787 if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
789 replicate_trace_t *t;
793 vlib_buffer_copy_trace_flag (vm, b0, ci0);
794 VLIB_BUFFER_TRACE_TRAJECTORY_INIT (c0);
796 t = vlib_add_trace (vm, node, c0, sizeof (*t));
797 t->rep_index = repi0;
801 vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
802 to_next, n_left_to_next,
804 if (PREDICT_FALSE (n_left_to_next == 0))
806 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
807 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
810 vec_reset_length (rm->clones[thread_index]);
813 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
816 return frame->n_vectors;
820 format_replicate_trace (u8 * s, va_list * args)
822 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
823 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
824 replicate_trace_t *t = va_arg (*args, replicate_trace_t *);
826 s = format (s, "replicate: %d via %U",
828 format_dpo_id, &t->dpo, 0);
833 ip4_replicate (vlib_main_t * vm,
834 vlib_node_runtime_t * node,
835 vlib_frame_t * frame)
837 return (replicate_inline (vm, node, frame));
841 * @brief IP4 replication node
843 VLIB_REGISTER_NODE (ip4_replicate_node) = {
844 .function = ip4_replicate,
845 .name = "ip4-replicate",
846 .vector_size = sizeof (u32),
848 .n_errors = ARRAY_LEN(replicate_dpo_error_strings),
849 .error_strings = replicate_dpo_error_strings,
851 .format_trace = format_replicate_trace,
859 ip6_replicate (vlib_main_t * vm,
860 vlib_node_runtime_t * node,
861 vlib_frame_t * frame)
863 return (replicate_inline (vm, node, frame));
867 * @brief IPv6 replication node
869 VLIB_REGISTER_NODE (ip6_replicate_node) = {
870 .function = ip6_replicate,
871 .name = "ip6-replicate",
872 .vector_size = sizeof (u32),
874 .n_errors = ARRAY_LEN(replicate_dpo_error_strings),
875 .error_strings = replicate_dpo_error_strings,
877 .format_trace = format_replicate_trace,
885 mpls_replicate (vlib_main_t * vm,
886 vlib_node_runtime_t * node,
887 vlib_frame_t * frame)
889 return (replicate_inline (vm, node, frame));
893 * @brief MPLS replication node
895 VLIB_REGISTER_NODE (mpls_replicate_node) = {
896 .function = mpls_replicate,
897 .name = "mpls-replicate",
898 .vector_size = sizeof (u32),
900 .n_errors = ARRAY_LEN(replicate_dpo_error_strings),
901 .error_strings = replicate_dpo_error_strings,
903 .format_trace = format_replicate_trace,
911 replicate_dpo_init (vlib_main_t * vm)
913 replicate_main_t * rm = &replicate_main;
915 vec_validate (rm->clones, vlib_num_workers());
920 VLIB_INIT_FUNCTION (replicate_dpo_init);