2 * Copyright (c) 2016 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
16 #include <vnet/ip/lookup.h>
17 #include <vnet/dpo/replicate_dpo.h>
18 #include <vnet/dpo/drop_dpo.h>
19 #include <vnet/adj/adj.h>
24 #define REP_DBG(_rep, _fmt, _args...) \
27 clib_warning("rep:[%s]:" _fmt, \
28 replicate_format(replicate_get_index((_rep)), \
34 #define REP_DBG(_p, _fmt, _args...)
39 * Pool of all DPOs. It's not static so the DP can have fast access
41 replicate_t *replicate_pool;
44 * The one instance of replicate main
46 replicate_main_t replicate_main;
49 replicate_get_index (const replicate_t *rep)
51 return (rep - replicate_pool);
54 static inline dpo_id_t*
55 replicate_get_buckets (replicate_t *rep)
57 if (REP_HAS_INLINE_BUCKETS(rep))
59 return (rep->rep_buckets_inline);
63 return (rep->rep_buckets);
68 replicate_alloc_i (void)
72 pool_get_aligned(replicate_pool, rep, CLIB_CACHE_LINE_BYTES);
73 memset(rep, 0, sizeof(*rep));
75 vlib_validate_combined_counter(&(replicate_main.repm_counters),
76 replicate_get_index(rep));
77 vlib_zero_combined_counter(&(replicate_main.repm_counters),
78 replicate_get_index(rep));
84 replicate_format (index_t repi,
85 replicate_format_flags_t flags,
94 rep = replicate_get(repi);
95 vlib_get_combined_counter(&(replicate_main.repm_counters), repi, &to);
96 buckets = replicate_get_buckets(rep);
98 s = format(s, "%U: ", format_dpo_type, DPO_REPLICATE);
99 s = format(s, "[index:%d buckets:%d ", repi, rep->rep_n_buckets);
100 s = format(s, "to:[%Ld:%Ld]]", to.packets, to.bytes);
102 for (i = 0; i < rep->rep_n_buckets; i++)
104 s = format(s, "\n%U", format_white_space, indent+2);
105 s = format(s, "[%d]", i);
106 s = format(s, " %U", format_dpo_id, &buckets[i], indent+6);
112 format_replicate (u8 * s, va_list * args)
114 index_t repi = va_arg(*args, index_t);
115 replicate_format_flags_t flags = va_arg(*args, replicate_format_flags_t);
117 return (replicate_format(repi, flags, 0, s));
120 format_replicate_dpo (u8 * s, va_list * args)
122 index_t repi = va_arg(*args, index_t);
123 u32 indent = va_arg(*args, u32);
125 return (replicate_format(repi, REPLICATE_FORMAT_DETAIL, indent, s));
130 replicate_create_i (u32 num_buckets,
131 dpo_proto_t rep_proto)
135 rep = replicate_alloc_i();
136 rep->rep_n_buckets = num_buckets;
137 rep->rep_proto = rep_proto;
139 if (!REP_HAS_INLINE_BUCKETS(rep))
141 vec_validate_aligned(rep->rep_buckets,
142 rep->rep_n_buckets - 1,
143 CLIB_CACHE_LINE_BYTES);
146 REP_DBG(rep, "create");
152 replicate_create (u32 n_buckets,
153 dpo_proto_t rep_proto)
155 return (replicate_get_index(replicate_create_i(n_buckets, rep_proto)));
159 replicate_set_bucket_i (replicate_t *rep,
162 const dpo_id_t *next)
164 dpo_stack(DPO_REPLICATE, rep->rep_proto, &buckets[bucket], next);
168 replicate_set_bucket (index_t repi,
170 const dpo_id_t *next)
175 rep = replicate_get(repi);
176 buckets = replicate_get_buckets(rep);
178 ASSERT(bucket < rep->rep_n_buckets);
180 replicate_set_bucket_i(rep, bucket, buckets, next);
184 replicate_is_drop (const dpo_id_t *dpo)
188 if (DPO_REPLICATE != dpo->dpoi_type)
191 rep = replicate_get(dpo->dpoi_index);
193 if (1 == rep->rep_n_buckets)
195 return (dpo_is_drop(replicate_get_bucket_i(rep, 0)));
201 replicate_get_bucket (index_t repi,
206 rep = replicate_get(repi);
208 return (replicate_get_bucket_i(rep, bucket));
212 static load_balance_path_t *
213 replicate_multipath_next_hop_fixup (load_balance_path_t *nhs,
214 dpo_proto_t drop_proto)
216 if (0 == vec_len(nhs))
218 load_balance_path_t *nh;
221 * we need something for the replicate. so use the drop
223 vec_add2(nhs, nh, 1);
226 dpo_copy(&nh->path_dpo, drop_dpo_get(drop_proto));
233 * Fill in adjacencies in block based on corresponding
234 * next hop adjacencies.
237 replicate_fill_buckets (replicate_t *rep,
238 load_balance_path_t *nhs,
242 load_balance_path_t * nh;
248 * the next-hops have normalised weights. that means their sum is the number
249 * of buckets we need to fill.
251 vec_foreach (nh, nhs)
253 for (ii = 0; ii < nh->path_weight; ii++)
255 ASSERT(bucket < n_buckets);
256 replicate_set_bucket_i(rep, bucket++, buckets, &nh->path_dpo);
262 replicate_set_n_buckets (replicate_t *rep,
265 rep->rep_n_buckets = n_buckets;
269 replicate_multipath_update (const dpo_id_t *dpo,
270 load_balance_path_t * next_hops)
272 load_balance_path_t * nh, * nhs;
277 ASSERT(DPO_REPLICATE == dpo->dpoi_type);
278 rep = replicate_get(dpo->dpoi_index);
279 nhs = replicate_multipath_next_hop_fixup(next_hops,
281 n_buckets = vec_len(nhs);
283 if (0 == rep->rep_n_buckets)
286 * first time initialisation. no packets inflight, so we can write
289 replicate_set_n_buckets(rep, n_buckets);
291 if (!REP_HAS_INLINE_BUCKETS(rep))
292 vec_validate_aligned(rep->rep_buckets,
293 rep->rep_n_buckets - 1,
294 CLIB_CACHE_LINE_BYTES);
296 replicate_fill_buckets(rep, nhs,
297 replicate_get_buckets(rep),
303 * This is a modification of an existing replicate.
304 * We need to ensure that packets in flight see a consistent state, that
305 * is the number of reported buckets the REP has
306 * is not more than it actually has. So if the
307 * number of buckets is increasing, we must update the bucket array first,
308 * then the reported number. vice-versa if the number of buckets goes down.
310 if (n_buckets == rep->rep_n_buckets)
313 * no change in the number of buckets. we can simply fill what
314 * is new over what is old.
316 replicate_fill_buckets(rep, nhs,
317 replicate_get_buckets(rep),
320 else if (n_buckets > rep->rep_n_buckets)
323 * we have more buckets. the old replicate map (if there is one)
324 * will remain valid, i.e. mapping to indices within range, so we
327 if (n_buckets > REP_NUM_INLINE_BUCKETS &&
328 rep->rep_n_buckets <= REP_NUM_INLINE_BUCKETS)
331 * the new increased number of buckets is crossing the threshold
332 * from the inline storage to out-line. Alloc the outline buckets
333 * first, then fixup the number. then reset the inlines.
335 ASSERT(NULL == rep->rep_buckets);
336 vec_validate_aligned(rep->rep_buckets,
338 CLIB_CACHE_LINE_BYTES);
340 replicate_fill_buckets(rep, nhs,
343 CLIB_MEMORY_BARRIER();
344 replicate_set_n_buckets(rep, n_buckets);
346 CLIB_MEMORY_BARRIER();
348 for (ii = 0; ii < REP_NUM_INLINE_BUCKETS; ii++)
350 dpo_reset(&rep->rep_buckets_inline[ii]);
355 if (n_buckets <= REP_NUM_INLINE_BUCKETS)
358 * we are not crossing the threshold and it's still inline buckets.
359 * we can write the new on the old..
361 replicate_fill_buckets(rep, nhs,
362 replicate_get_buckets(rep),
364 CLIB_MEMORY_BARRIER();
365 replicate_set_n_buckets(rep, n_buckets);
370 * we are not crossing the threshold. We need a new bucket array to
371 * hold the increased number of choices.
373 dpo_id_t *new_buckets, *old_buckets, *tmp_dpo;
376 old_buckets = replicate_get_buckets(rep);
378 vec_validate_aligned(new_buckets,
380 CLIB_CACHE_LINE_BYTES);
382 replicate_fill_buckets(rep, nhs, new_buckets, n_buckets);
383 CLIB_MEMORY_BARRIER();
384 rep->rep_buckets = new_buckets;
385 CLIB_MEMORY_BARRIER();
386 replicate_set_n_buckets(rep, n_buckets);
388 vec_foreach(tmp_dpo, old_buckets)
392 vec_free(old_buckets);
399 * bucket size shrinkage.
401 if (n_buckets <= REP_NUM_INLINE_BUCKETS &&
402 rep->rep_n_buckets > REP_NUM_INLINE_BUCKETS)
405 * the new decreased number of buckets is crossing the threshold
406 * from out-line storage to inline:
407 * 1 - Fill the inline buckets,
408 * 2 - fixup the number (and this point the inline buckets are
410 * 3 - free the outline buckets
412 replicate_fill_buckets(rep, nhs,
413 rep->rep_buckets_inline,
415 CLIB_MEMORY_BARRIER();
416 replicate_set_n_buckets(rep, n_buckets);
417 CLIB_MEMORY_BARRIER();
419 vec_foreach(tmp_dpo, rep->rep_buckets)
423 vec_free(rep->rep_buckets);
428 * not crossing the threshold.
429 * 1 - update the number to the smaller size
430 * 2 - write the new buckets
431 * 3 - reset those no longer used.
436 old_n_buckets = rep->rep_n_buckets;
437 buckets = replicate_get_buckets(rep);
439 replicate_set_n_buckets(rep, n_buckets);
440 CLIB_MEMORY_BARRIER();
442 replicate_fill_buckets(rep, nhs,
446 for (ii = n_buckets; ii < old_n_buckets; ii++)
448 dpo_reset(&buckets[ii]);
454 vec_foreach (nh, nhs)
456 dpo_reset(&nh->path_dpo);
462 replicate_lock (dpo_id_t *dpo)
466 rep = replicate_get(dpo->dpoi_index);
472 replicate_destroy (replicate_t *rep)
477 buckets = replicate_get_buckets(rep);
479 for (i = 0; i < rep->rep_n_buckets; i++)
481 dpo_reset(&buckets[i]);
484 REP_DBG(rep, "destroy");
485 if (!REP_HAS_INLINE_BUCKETS(rep))
487 vec_free(rep->rep_buckets);
490 pool_put(replicate_pool, rep);
494 replicate_unlock (dpo_id_t *dpo)
498 rep = replicate_get(dpo->dpoi_index);
502 if (0 == rep->rep_locks)
504 replicate_destroy(rep);
509 replicate_mem_show (void)
511 fib_show_memory_usage("replicate",
512 pool_elts(replicate_pool),
513 pool_len(replicate_pool),
514 sizeof(replicate_t));
517 const static dpo_vft_t rep_vft = {
518 .dv_lock = replicate_lock,
519 .dv_unlock = replicate_unlock,
520 .dv_format = format_replicate_dpo,
521 .dv_mem_show = replicate_mem_show,
525 * @brief The per-protocol VLIB graph nodes that are assigned to a replicate
528 * this means that these graph nodes are ones from which a replicate is the
529 * parent object in the DPO-graph.
531 const static char* const replicate_ip4_nodes[] =
536 const static char* const replicate_ip6_nodes[] =
541 const static char* const replicate_mpls_nodes[] =
547 const static char* const * const replicate_nodes[DPO_PROTO_NUM] =
549 [DPO_PROTO_IP4] = replicate_ip4_nodes,
550 [DPO_PROTO_IP6] = replicate_ip6_nodes,
551 [DPO_PROTO_MPLS] = replicate_mpls_nodes,
555 replicate_module_init (void)
557 dpo_register(DPO_REPLICATE, &rep_vft, replicate_nodes);
560 static clib_error_t *
561 replicate_show (vlib_main_t * vm,
562 unformat_input_t * input,
563 vlib_cli_command_t * cmd)
565 index_t repi = INDEX_INVALID;
567 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
569 if (unformat (input, "%d", &repi))
575 if (INDEX_INVALID != repi)
577 vlib_cli_output (vm, "%U", format_replicate, repi,
578 REPLICATE_FORMAT_DETAIL);
584 pool_foreach(rep, replicate_pool,
586 vlib_cli_output (vm, "%U", format_replicate,
587 replicate_get_index(rep),
588 REPLICATE_FORMAT_NONE);
595 VLIB_CLI_COMMAND (replicate_show_command, static) = {
596 .path = "show replicate",
597 .short_help = "show replicate [<index>]",
598 .function = replicate_show,
601 typedef struct replicate_trace_t_
608 replicate_inline (vlib_main_t * vm,
609 vlib_node_runtime_t * node,
610 vlib_frame_t * frame)
612 vlib_combined_counter_main_t * cm = &replicate_main.repm_counters;
613 u32 n_left_from, * from, * to_next, next_index;
614 u32 cpu_index = os_get_cpu_number();
616 from = vlib_frame_vector_args (frame);
617 n_left_from = frame->n_vectors;
618 next_index = node->cached_next_index;
620 while (n_left_from > 0)
624 vlib_get_next_frame (vm, node, next_index,
625 to_next, n_left_to_next);
627 while (n_left_from > 0 && n_left_to_next > 0)
629 u32 next0, ci0, bi0, bucket, repi0;
630 const replicate_t *rep0;
631 vlib_buffer_t * b0, *c0;
632 const dpo_id_t *dpo0;
641 b0 = vlib_get_buffer (vm, bi0);
642 repi0 = vnet_buffer (b0)->ip.adj_index[VLIB_TX];
643 rep0 = replicate_get(repi0);
645 vlib_increment_combined_counter(
646 cm, cpu_index, repi0, 1,
647 vlib_buffer_length_in_chain(vm, b0));
649 /* ship the original to the first bucket */
650 dpo0 = replicate_get_bucket_i(rep0, 0);
651 next0 = dpo0->dpoi_next_node;
652 vnet_buffer (b0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
654 if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
656 replicate_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t));
657 t->rep_index = repi0;
660 vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
661 to_next, n_left_to_next,
664 /* ship copies to the rest of the buckets */
665 for (bucket = 1; bucket < rep0->rep_n_buckets; bucket++)
668 * After the enqueue of the first buffer, and of all subsequent
669 * buffers in this loop, it is possible that we over-flow the
670 * frame of the to-next node. When this happens we need to 'put'
671 * that full frame to the node and get a fresh empty one.
672 * Note that these are macros with side effects that change
673 * to_next & n_left_to_next
675 if (PREDICT_FALSE(0 == n_left_to_next))
677 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
678 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
682 c0 = vlib_buffer_copy(vm, b0);
683 ci0 = vlib_get_buffer_index(vm, c0);
689 dpo0 = replicate_get_bucket_i(rep0, bucket);
690 next0 = dpo0->dpoi_next_node;
691 vnet_buffer (c0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
693 if (PREDICT_FALSE(c0->flags & VLIB_BUFFER_IS_TRACED))
695 replicate_trace_t *t = vlib_add_trace (vm, node, c0, sizeof (*t));
696 t->rep_index = repi0;
700 vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
701 to_next, n_left_to_next,
706 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
709 return frame->n_vectors;
713 format_replicate_trace (u8 * s, va_list * args)
715 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
716 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
717 replicate_trace_t *t = va_arg (*args, replicate_trace_t *);
719 s = format (s, "replicate: %d via %U",
721 format_dpo_id, &t->dpo);
726 ip4_replicate (vlib_main_t * vm,
727 vlib_node_runtime_t * node,
728 vlib_frame_t * frame)
730 return (replicate_inline (vm, node, frame));
736 VLIB_REGISTER_NODE (ip4_replicate_node) = {
737 .function = ip4_replicate,
738 .name = "ip4-replicate",
739 .vector_size = sizeof (u32),
741 .format_trace = format_replicate_trace,
749 ip6_replicate (vlib_main_t * vm,
750 vlib_node_runtime_t * node,
751 vlib_frame_t * frame)
753 return (replicate_inline (vm, node, frame));
759 VLIB_REGISTER_NODE (ip6_replicate_node) = {
760 .function = ip6_replicate,
761 .name = "ip6-replicate",
762 .vector_size = sizeof (u32),
764 .format_trace = format_replicate_trace,