2 * Copyright (c) 2016 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
16 #include <vnet/ip/lookup.h>
17 #include <vnet/dpo/replicate_dpo.h>
18 #include <vnet/dpo/drop_dpo.h>
19 #include <vnet/adj/adj.h>
20 #include <vnet/mpls/mpls_types.h>
25 vlib_log_class_t replicate_logger;
27 #define REP_DBG(_rep, _fmt, _args...) \
29 vlib_log_debug(replicate_logger, \
32 replicate_get_index(_rep), \
33 REPLICATE_FORMAT_NONE, \
37 #define foreach_replicate_dpo_error \
38 _(BUFFER_ALLOCATION_FAILURE, "Buffer Allocation Failure")
41 #define _(sym,str) REPLICATE_DPO_ERROR_##sym,
42 foreach_replicate_dpo_error
44 REPLICATE_DPO_N_ERROR,
45 } replicate_dpo_error_t;
47 static char * replicate_dpo_error_strings[] = {
48 #define _(sym,string) string,
49 foreach_replicate_dpo_error
54 * Pool of all DPOs. It's not static so the DP can have fast access
56 replicate_t *replicate_pool;
59 * The one instance of replicate main
61 replicate_main_t replicate_main = {
64 .stat_segment_name = "/net/mroute",
69 replicate_get_index (const replicate_t *rep)
71 return (rep - replicate_pool);
74 static inline dpo_id_t*
75 replicate_get_buckets (replicate_t *rep)
77 if (REP_HAS_INLINE_BUCKETS(rep))
79 return (rep->rep_buckets_inline);
83 return (rep->rep_buckets);
88 replicate_alloc_i (void)
92 pool_get_aligned(replicate_pool, rep, CLIB_CACHE_LINE_BYTES);
93 clib_memset(rep, 0, sizeof(*rep));
95 vlib_validate_combined_counter(&(replicate_main.repm_counters),
96 replicate_get_index(rep));
97 vlib_zero_combined_counter(&(replicate_main.repm_counters),
98 replicate_get_index(rep));
104 replicate_format (index_t repi,
105 replicate_format_flags_t flags,
114 repi &= ~MPLS_IS_REPLICATE;
115 rep = replicate_get(repi);
116 vlib_get_combined_counter(&(replicate_main.repm_counters), repi, &to);
117 buckets = replicate_get_buckets(rep);
119 s = format(s, "%U: ", format_dpo_type, DPO_REPLICATE);
120 s = format(s, "[index:%d buckets:%d ", repi, rep->rep_n_buckets);
121 s = format(s, "to:[%Ld:%Ld]]", to.packets, to.bytes);
123 for (i = 0; i < rep->rep_n_buckets; i++)
125 s = format(s, "\n%U", format_white_space, indent+2);
126 s = format(s, "[%d]", i);
127 s = format(s, " %U", format_dpo_id, &buckets[i], indent+6);
133 format_replicate (u8 * s, va_list * args)
135 index_t repi = va_arg(*args, index_t);
136 replicate_format_flags_t flags = va_arg(*args, replicate_format_flags_t);
138 return (replicate_format(repi, flags, 0, s));
141 format_replicate_dpo (u8 * s, va_list * args)
143 index_t repi = va_arg(*args, index_t);
144 u32 indent = va_arg(*args, u32);
146 return (replicate_format(repi, REPLICATE_FORMAT_DETAIL, indent, s));
151 replicate_create_i (u32 num_buckets,
152 dpo_proto_t rep_proto)
156 rep = replicate_alloc_i();
157 rep->rep_n_buckets = num_buckets;
158 rep->rep_proto = rep_proto;
160 if (!REP_HAS_INLINE_BUCKETS(rep))
162 vec_validate_aligned(rep->rep_buckets,
163 rep->rep_n_buckets - 1,
164 CLIB_CACHE_LINE_BYTES);
167 REP_DBG(rep, "create");
173 replicate_create (u32 n_buckets,
174 dpo_proto_t rep_proto)
176 return (replicate_get_index(replicate_create_i(n_buckets, rep_proto)));
180 replicate_set_bucket_i (replicate_t *rep,
183 const dpo_id_t *next)
185 dpo_stack(DPO_REPLICATE, rep->rep_proto, &buckets[bucket], next);
189 replicate_set_bucket (index_t repi,
191 const dpo_id_t *next)
196 repi &= ~MPLS_IS_REPLICATE;
197 rep = replicate_get(repi);
198 buckets = replicate_get_buckets(rep);
200 ASSERT(bucket < rep->rep_n_buckets);
202 replicate_set_bucket_i(rep, bucket, buckets, next);
206 replicate_is_drop (const dpo_id_t *dpo)
211 if (DPO_REPLICATE != dpo->dpoi_type)
214 repi = dpo->dpoi_index & ~MPLS_IS_REPLICATE;
215 rep = replicate_get(repi);
217 if (1 == rep->rep_n_buckets)
219 return (dpo_is_drop(replicate_get_bucket_i(rep, 0)));
225 replicate_get_bucket (index_t repi,
230 repi &= ~MPLS_IS_REPLICATE;
231 rep = replicate_get(repi);
233 return (replicate_get_bucket_i(rep, bucket));
237 static load_balance_path_t *
238 replicate_multipath_next_hop_fixup (load_balance_path_t *nhs,
239 dpo_proto_t drop_proto)
241 if (0 == vec_len(nhs))
243 load_balance_path_t *nh;
246 * we need something for the replicate. so use the drop
248 vec_add2(nhs, nh, 1);
251 dpo_copy(&nh->path_dpo, drop_dpo_get(drop_proto));
258 * Fill in adjacencies in block based on corresponding
259 * next hop adjacencies.
262 replicate_fill_buckets (replicate_t *rep,
263 load_balance_path_t *nhs,
267 load_balance_path_t * nh;
273 * the next-hops have normalised weights. that means their sum is the number
274 * of buckets we need to fill.
276 vec_foreach (nh, nhs)
278 ASSERT(bucket < n_buckets);
279 replicate_set_bucket_i(rep, bucket++, buckets, &nh->path_dpo);
284 replicate_set_n_buckets (replicate_t *rep,
287 rep->rep_n_buckets = n_buckets;
291 replicate_multipath_update (const dpo_id_t *dpo,
292 load_balance_path_t * next_hops)
294 load_balance_path_t * nh, * nhs;
300 ASSERT(DPO_REPLICATE == dpo->dpoi_type);
301 repi = dpo->dpoi_index & ~MPLS_IS_REPLICATE;
302 rep = replicate_get(repi);
303 nhs = replicate_multipath_next_hop_fixup(next_hops,
305 n_buckets = vec_len(nhs);
307 if (0 == rep->rep_n_buckets)
310 * first time initialisation. no packets inflight, so we can write
313 replicate_set_n_buckets(rep, n_buckets);
315 if (!REP_HAS_INLINE_BUCKETS(rep))
316 vec_validate_aligned(rep->rep_buckets,
317 rep->rep_n_buckets - 1,
318 CLIB_CACHE_LINE_BYTES);
320 replicate_fill_buckets(rep, nhs,
321 replicate_get_buckets(rep),
327 * This is a modification of an existing replicate.
328 * We need to ensure that packets in flight see a consistent state, that
329 * is the number of reported buckets the REP has
330 * is not more than it actually has. So if the
331 * number of buckets is increasing, we must update the bucket array first,
332 * then the reported number. vice-versa if the number of buckets goes down.
334 if (n_buckets == rep->rep_n_buckets)
337 * no change in the number of buckets. we can simply fill what
338 * is new over what is old.
340 replicate_fill_buckets(rep, nhs,
341 replicate_get_buckets(rep),
344 else if (n_buckets > rep->rep_n_buckets)
347 * we have more buckets. the old replicate map (if there is one)
348 * will remain valid, i.e. mapping to indices within range, so we
351 if (n_buckets > REP_NUM_INLINE_BUCKETS &&
352 rep->rep_n_buckets <= REP_NUM_INLINE_BUCKETS)
355 * the new increased number of buckets is crossing the threshold
356 * from the inline storage to out-line. Alloc the outline buckets
357 * first, then fixup the number. then reset the inlines.
359 ASSERT(NULL == rep->rep_buckets);
360 vec_validate_aligned(rep->rep_buckets,
362 CLIB_CACHE_LINE_BYTES);
364 replicate_fill_buckets(rep, nhs,
367 CLIB_MEMORY_BARRIER();
368 replicate_set_n_buckets(rep, n_buckets);
370 CLIB_MEMORY_BARRIER();
372 for (ii = 0; ii < REP_NUM_INLINE_BUCKETS; ii++)
374 dpo_reset(&rep->rep_buckets_inline[ii]);
379 if (n_buckets <= REP_NUM_INLINE_BUCKETS)
382 * we are not crossing the threshold and it's still inline buckets.
383 * we can write the new on the old..
385 replicate_fill_buckets(rep, nhs,
386 replicate_get_buckets(rep),
388 CLIB_MEMORY_BARRIER();
389 replicate_set_n_buckets(rep, n_buckets);
394 * we are not crossing the threshold. We need a new bucket array to
395 * hold the increased number of choices.
397 dpo_id_t *new_buckets, *old_buckets, *tmp_dpo;
400 old_buckets = replicate_get_buckets(rep);
402 vec_validate_aligned(new_buckets,
404 CLIB_CACHE_LINE_BYTES);
406 replicate_fill_buckets(rep, nhs, new_buckets, n_buckets);
407 CLIB_MEMORY_BARRIER();
408 rep->rep_buckets = new_buckets;
409 CLIB_MEMORY_BARRIER();
410 replicate_set_n_buckets(rep, n_buckets);
412 vec_foreach(tmp_dpo, old_buckets)
416 vec_free(old_buckets);
423 * bucket size shrinkage.
425 if (n_buckets <= REP_NUM_INLINE_BUCKETS &&
426 rep->rep_n_buckets > REP_NUM_INLINE_BUCKETS)
429 * the new decreased number of buckets is crossing the threshold
430 * from out-line storage to inline:
431 * 1 - Fill the inline buckets,
432 * 2 - fixup the number (and this point the inline buckets are
434 * 3 - free the outline buckets
436 replicate_fill_buckets(rep, nhs,
437 rep->rep_buckets_inline,
439 CLIB_MEMORY_BARRIER();
440 replicate_set_n_buckets(rep, n_buckets);
441 CLIB_MEMORY_BARRIER();
443 vec_foreach(tmp_dpo, rep->rep_buckets)
447 vec_free(rep->rep_buckets);
452 * not crossing the threshold.
453 * 1 - update the number to the smaller size
454 * 2 - write the new buckets
455 * 3 - reset those no longer used.
460 old_n_buckets = rep->rep_n_buckets;
461 buckets = replicate_get_buckets(rep);
463 replicate_set_n_buckets(rep, n_buckets);
464 CLIB_MEMORY_BARRIER();
466 replicate_fill_buckets(rep, nhs,
470 for (ii = n_buckets; ii < old_n_buckets; ii++)
472 dpo_reset(&buckets[ii]);
478 vec_foreach (nh, nhs)
480 dpo_reset(&nh->path_dpo);
486 replicate_lock (dpo_id_t *dpo)
490 rep = replicate_get(dpo->dpoi_index);
496 replicate_destroy (replicate_t *rep)
501 buckets = replicate_get_buckets(rep);
503 for (i = 0; i < rep->rep_n_buckets; i++)
505 dpo_reset(&buckets[i]);
508 REP_DBG(rep, "destroy");
509 if (!REP_HAS_INLINE_BUCKETS(rep))
511 vec_free(rep->rep_buckets);
514 pool_put(replicate_pool, rep);
518 replicate_unlock (dpo_id_t *dpo)
522 rep = replicate_get(dpo->dpoi_index);
526 if (0 == rep->rep_locks)
528 replicate_destroy(rep);
533 replicate_mem_show (void)
535 fib_show_memory_usage("replicate",
536 pool_elts(replicate_pool),
537 pool_len(replicate_pool),
538 sizeof(replicate_t));
541 const static dpo_vft_t rep_vft = {
542 .dv_lock = replicate_lock,
543 .dv_unlock = replicate_unlock,
544 .dv_format = format_replicate_dpo,
545 .dv_mem_show = replicate_mem_show,
549 * @brief The per-protocol VLIB graph nodes that are assigned to a replicate
552 * this means that these graph nodes are ones from which a replicate is the
553 * parent object in the DPO-graph.
555 const static char* const replicate_ip4_nodes[] =
560 const static char* const replicate_ip6_nodes[] =
565 const static char* const replicate_mpls_nodes[] =
571 const static char* const * const replicate_nodes[DPO_PROTO_NUM] =
573 [DPO_PROTO_IP4] = replicate_ip4_nodes,
574 [DPO_PROTO_IP6] = replicate_ip6_nodes,
575 [DPO_PROTO_MPLS] = replicate_mpls_nodes,
579 replicate_module_init (void)
581 dpo_register(DPO_REPLICATE, &rep_vft, replicate_nodes);
582 replicate_logger = vlib_log_register_class("dpo", "replicate");
585 static clib_error_t *
586 replicate_show (vlib_main_t * vm,
587 unformat_input_t * input,
588 vlib_cli_command_t * cmd)
590 index_t repi = INDEX_INVALID;
592 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
594 if (unformat (input, "%d", &repi))
600 if (INDEX_INVALID != repi)
602 vlib_cli_output (vm, "%U", format_replicate, repi,
603 REPLICATE_FORMAT_DETAIL);
609 pool_foreach(rep, replicate_pool,
611 vlib_cli_output (vm, "%U", format_replicate,
612 replicate_get_index(rep),
613 REPLICATE_FORMAT_NONE);
620 VLIB_CLI_COMMAND (replicate_show_command, static) = {
621 .path = "show replicate",
622 .short_help = "show replicate [<index>]",
623 .function = replicate_show,
626 typedef struct replicate_trace_t_
633 replicate_inline (vlib_main_t * vm,
634 vlib_node_runtime_t * node,
635 vlib_frame_t * frame)
637 vlib_combined_counter_main_t * cm = &replicate_main.repm_counters;
638 replicate_main_t * rm = &replicate_main;
639 u32 n_left_from, * from, * to_next, next_index;
640 u32 thread_index = vlib_get_thread_index();
642 from = vlib_frame_vector_args (frame);
643 n_left_from = frame->n_vectors;
644 next_index = node->cached_next_index;
646 while (n_left_from > 0)
650 vlib_get_next_frame (vm, node, next_index,
651 to_next, n_left_to_next);
653 while (n_left_from > 0 && n_left_to_next > 0)
655 u32 next0, ci0, bi0, bucket, repi0;
656 const replicate_t *rep0;
657 vlib_buffer_t * b0, *c0;
658 const dpo_id_t *dpo0;
665 b0 = vlib_get_buffer (vm, bi0);
666 repi0 = vnet_buffer (b0)->ip.adj_index[VLIB_TX];
667 rep0 = replicate_get(repi0);
669 vlib_increment_combined_counter(
670 cm, thread_index, repi0, 1,
671 vlib_buffer_length_in_chain(vm, b0));
673 vec_validate (rm->clones[thread_index], rep0->rep_n_buckets - 1);
675 num_cloned = vlib_buffer_clone (vm, bi0, rm->clones[thread_index],
677 VLIB_BUFFER_CLONE_HEAD_SIZE);
679 if (num_cloned != rep0->rep_n_buckets)
681 vlib_node_increment_counter
682 (vm, node->node_index,
683 REPLICATE_DPO_ERROR_BUFFER_ALLOCATION_FAILURE, 1);
686 for (bucket = 0; bucket < num_cloned; bucket++)
688 ci0 = rm->clones[thread_index][bucket];
689 c0 = vlib_get_buffer(vm, ci0);
695 dpo0 = replicate_get_bucket_i(rep0, bucket);
696 next0 = dpo0->dpoi_next_node;
697 vnet_buffer (c0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
699 if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
701 replicate_trace_t *t;
705 vlib_buffer_copy_trace_flag (vm, b0, ci0);
706 VLIB_BUFFER_TRACE_TRAJECTORY_INIT (c0);
708 t = vlib_add_trace (vm, node, c0, sizeof (*t));
709 t->rep_index = repi0;
713 vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
714 to_next, n_left_to_next,
716 if (PREDICT_FALSE (n_left_to_next == 0))
718 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
719 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
722 vec_reset_length (rm->clones[thread_index]);
725 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
728 return frame->n_vectors;
732 format_replicate_trace (u8 * s, va_list * args)
734 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
735 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
736 replicate_trace_t *t = va_arg (*args, replicate_trace_t *);
738 s = format (s, "replicate: %d via %U",
740 format_dpo_id, &t->dpo, 0);
745 ip4_replicate (vlib_main_t * vm,
746 vlib_node_runtime_t * node,
747 vlib_frame_t * frame)
749 return (replicate_inline (vm, node, frame));
753 * @brief IP4 replication node
755 VLIB_REGISTER_NODE (ip4_replicate_node) = {
756 .function = ip4_replicate,
757 .name = "ip4-replicate",
758 .vector_size = sizeof (u32),
760 .n_errors = ARRAY_LEN(replicate_dpo_error_strings),
761 .error_strings = replicate_dpo_error_strings,
763 .format_trace = format_replicate_trace,
771 ip6_replicate (vlib_main_t * vm,
772 vlib_node_runtime_t * node,
773 vlib_frame_t * frame)
775 return (replicate_inline (vm, node, frame));
779 * @brief IPv6 replication node
781 VLIB_REGISTER_NODE (ip6_replicate_node) = {
782 .function = ip6_replicate,
783 .name = "ip6-replicate",
784 .vector_size = sizeof (u32),
786 .n_errors = ARRAY_LEN(replicate_dpo_error_strings),
787 .error_strings = replicate_dpo_error_strings,
789 .format_trace = format_replicate_trace,
797 mpls_replicate (vlib_main_t * vm,
798 vlib_node_runtime_t * node,
799 vlib_frame_t * frame)
801 return (replicate_inline (vm, node, frame));
805 * @brief MPLS replication node
807 VLIB_REGISTER_NODE (mpls_replicate_node) = {
808 .function = mpls_replicate,
809 .name = "mpls-replicate",
810 .vector_size = sizeof (u32),
812 .n_errors = ARRAY_LEN(replicate_dpo_error_strings),
813 .error_strings = replicate_dpo_error_strings,
815 .format_trace = format_replicate_trace,
823 replicate_dpo_init (vlib_main_t * vm)
825 replicate_main_t * rm = &replicate_main;
827 vec_validate (rm->clones, vlib_num_workers());
832 VLIB_INIT_FUNCTION (replicate_dpo_init);