Fix SR multicast post mfib commit
[vpp.git] / src / vnet / dpo / replicate_dpo.c
1 /*
2  * Copyright (c) 2016 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15
16 #include <vnet/ip/lookup.h>
17 #include <vnet/dpo/replicate_dpo.h>
18 #include <vnet/dpo/drop_dpo.h>
19 #include <vnet/adj/adj.h>
20
21 #undef REP_DEBUG
22
23 #ifdef REP_DEBUG
24 #define REP_DBG(_rep, _fmt, _args...)                                   \
25 {                                                                       \
26     u8* _tmp =NULL;                                                     \
27     clib_warning("rep:[%s]:" _fmt,                                      \
28                  replicate_format(replicate_get_index((_rep)),          \
29                                   0, _tmp),                             \
30                  ##_args);                                              \
31     vec_free(_tmp);                                                     \
32 }
33 #else
34 #define REP_DBG(_p, _fmt, _args...)
35 #endif
36
37
38 /**
39  * Pool of all DPOs. It's not static so the DP can have fast access
40  */
41 replicate_t *replicate_pool;
42
43 /**
44  * The one instance of replicate main
45  */
46 replicate_main_t replicate_main;
47
48 static inline index_t
49 replicate_get_index (const replicate_t *rep)
50 {
51     return (rep - replicate_pool);
52 }
53
54 static inline dpo_id_t*
55 replicate_get_buckets (replicate_t *rep)
56 {
57     if (REP_HAS_INLINE_BUCKETS(rep))
58     {
59         return (rep->rep_buckets_inline);
60     }
61     else
62     {
63         return (rep->rep_buckets);
64     }
65 }
66
67 static replicate_t *
68 replicate_alloc_i (void)
69 {
70     replicate_t *rep;
71
72     pool_get_aligned(replicate_pool, rep, CLIB_CACHE_LINE_BYTES);
73     memset(rep, 0, sizeof(*rep));
74
75     vlib_validate_combined_counter(&(replicate_main.repm_counters),
76                                    replicate_get_index(rep));
77     vlib_zero_combined_counter(&(replicate_main.repm_counters),
78                                replicate_get_index(rep));
79
80     return (rep);
81 }
82
83 static u8*
84 replicate_format (index_t repi,
85                   replicate_format_flags_t flags,
86                   u32 indent,
87                   u8 *s)
88 {
89     vlib_counter_t to;
90     replicate_t *rep;
91     dpo_id_t *buckets;
92     u32 i;
93
94     rep = replicate_get(repi);
95     vlib_get_combined_counter(&(replicate_main.repm_counters), repi, &to);
96     buckets = replicate_get_buckets(rep);
97
98     s = format(s, "%U: ", format_dpo_type, DPO_REPLICATE);
99     s = format(s, "[index:%d buckets:%d ", repi, rep->rep_n_buckets);
100     s = format(s, "to:[%Ld:%Ld]]", to.packets, to.bytes);
101
102     for (i = 0; i < rep->rep_n_buckets; i++)
103     {
104         s = format(s, "\n%U", format_white_space, indent+2);
105         s = format(s, "[%d]", i);
106         s = format(s, " %U", format_dpo_id, &buckets[i], indent+6);
107     }
108     return (s);
109 }
110
111 u8*
112 format_replicate (u8 * s, va_list * args)
113 {
114     index_t repi = va_arg(*args, index_t);
115     replicate_format_flags_t flags = va_arg(*args, replicate_format_flags_t);
116
117     return (replicate_format(repi, flags, 0, s));
118 }
119 static u8*
120 format_replicate_dpo (u8 * s, va_list * args)
121 {
122     index_t repi = va_arg(*args, index_t);
123     u32 indent = va_arg(*args, u32);
124
125     return (replicate_format(repi, REPLICATE_FORMAT_DETAIL, indent, s));
126 }
127
128
129 static replicate_t *
130 replicate_create_i (u32 num_buckets,
131                     dpo_proto_t rep_proto)
132 {
133     replicate_t *rep;
134
135     rep = replicate_alloc_i();
136     rep->rep_n_buckets = num_buckets;
137     rep->rep_proto = rep_proto;
138
139     if (!REP_HAS_INLINE_BUCKETS(rep))
140     {
141         vec_validate_aligned(rep->rep_buckets,
142                              rep->rep_n_buckets - 1,
143                              CLIB_CACHE_LINE_BYTES);
144     }
145
146     REP_DBG(rep, "create");
147
148     return (rep);
149 }
150
151 index_t
152 replicate_create (u32 n_buckets,
153                   dpo_proto_t rep_proto)
154 {
155     return (replicate_get_index(replicate_create_i(n_buckets, rep_proto)));
156 }
157
158 static inline void
159 replicate_set_bucket_i (replicate_t *rep,
160                         u32 bucket,
161                         dpo_id_t *buckets,
162                         const dpo_id_t *next)
163 {
164     dpo_stack(DPO_REPLICATE, rep->rep_proto, &buckets[bucket], next);
165 }
166
167 void
168 replicate_set_bucket (index_t repi,
169                       u32 bucket,
170                       const dpo_id_t *next)
171 {
172     replicate_t *rep;
173     dpo_id_t *buckets;
174
175     rep = replicate_get(repi);
176     buckets = replicate_get_buckets(rep);
177
178     ASSERT(bucket < rep->rep_n_buckets);
179
180     replicate_set_bucket_i(rep, bucket, buckets, next);
181 }
182
183 int
184 replicate_is_drop (const dpo_id_t *dpo)
185 {
186     replicate_t *rep;
187
188     if (DPO_REPLICATE != dpo->dpoi_type)
189         return (0);
190
191     rep = replicate_get(dpo->dpoi_index);
192
193     if (1 == rep->rep_n_buckets)
194     {
195         return (dpo_is_drop(replicate_get_bucket_i(rep, 0)));
196     }
197     return (0);
198 }
199
200 const dpo_id_t *
201 replicate_get_bucket (index_t repi,
202                       u32 bucket)
203 {
204     replicate_t *rep;
205
206     rep = replicate_get(repi);
207
208     return (replicate_get_bucket_i(rep, bucket));
209 }
210
211
212 static load_balance_path_t *
213 replicate_multipath_next_hop_fixup (load_balance_path_t *nhs,
214                                     dpo_proto_t drop_proto)
215 {
216     if (0 == vec_len(nhs))
217     {
218         load_balance_path_t *nh;
219
220         /*
221          * we need something for the replicate. so use the drop
222          */
223         vec_add2(nhs, nh, 1);
224
225         nh->path_weight = 1;
226         dpo_copy(&nh->path_dpo, drop_dpo_get(drop_proto));
227     }
228
229     return (nhs);
230 }
231
232 /*
233  * Fill in adjacencies in block based on corresponding
234  * next hop adjacencies.
235  */
236 static void
237 replicate_fill_buckets (replicate_t *rep,
238                         load_balance_path_t *nhs,
239                         dpo_id_t *buckets,
240                         u32 n_buckets)
241 {
242     load_balance_path_t * nh;
243     u16 ii, bucket;
244
245     bucket = 0;
246
247     /*
248      * the next-hops have normalised weights. that means their sum is the number
249      * of buckets we need to fill.
250      */
251     vec_foreach (nh, nhs)
252     {
253         for (ii = 0; ii < nh->path_weight; ii++)
254         {
255             ASSERT(bucket < n_buckets);
256             replicate_set_bucket_i(rep, bucket++, buckets, &nh->path_dpo);
257         }
258     }
259 }
260
261 static inline void
262 replicate_set_n_buckets (replicate_t *rep,
263                          u32 n_buckets)
264 {
265     rep->rep_n_buckets = n_buckets;
266 }
267
268 void
269 replicate_multipath_update (const dpo_id_t *dpo,
270                             load_balance_path_t * next_hops)
271 {
272     load_balance_path_t * nh, * nhs;
273     dpo_id_t *tmp_dpo;
274     u32 ii, n_buckets;
275     replicate_t *rep;
276
277     ASSERT(DPO_REPLICATE == dpo->dpoi_type);
278     rep = replicate_get(dpo->dpoi_index);
279     nhs = replicate_multipath_next_hop_fixup(next_hops,
280                                              rep->rep_proto);
281     n_buckets = vec_len(nhs);
282
283     if (0 == rep->rep_n_buckets)
284     {
285         /*
286          * first time initialisation. no packets inflight, so we can write
287          * at leisure.
288          */
289         replicate_set_n_buckets(rep, n_buckets);
290
291         if (!REP_HAS_INLINE_BUCKETS(rep))
292             vec_validate_aligned(rep->rep_buckets,
293                                  rep->rep_n_buckets - 1,
294                                  CLIB_CACHE_LINE_BYTES);
295
296         replicate_fill_buckets(rep, nhs,
297                                replicate_get_buckets(rep),
298                                n_buckets);
299     }
300     else
301     {
302         /*
303          * This is a modification of an existing replicate.
304          * We need to ensure that packets in flight see a consistent state, that
305          * is the number of reported buckets the REP has
306          * is not more than it actually has. So if the
307          * number of buckets is increasing, we must update the bucket array first,
308          * then the reported number. vice-versa if the number of buckets goes down.
309          */
310         if (n_buckets == rep->rep_n_buckets)
311         {
312             /*
313              * no change in the number of buckets. we can simply fill what
314              * is new over what is old.
315              */
316             replicate_fill_buckets(rep, nhs,
317                                    replicate_get_buckets(rep),
318                                    n_buckets);
319         }
320         else if (n_buckets > rep->rep_n_buckets)
321         {
322             /*
323              * we have more buckets. the old replicate map (if there is one)
324              * will remain valid, i.e. mapping to indices within range, so we
325              * update it last.
326              */
327             if (n_buckets > REP_NUM_INLINE_BUCKETS &&
328                 rep->rep_n_buckets <= REP_NUM_INLINE_BUCKETS)
329             {
330                 /*
331                  * the new increased number of buckets is crossing the threshold
332                  * from the inline storage to out-line. Alloc the outline buckets
333                  * first, then fixup the number. then reset the inlines.
334                  */
335                 ASSERT(NULL == rep->rep_buckets);
336                 vec_validate_aligned(rep->rep_buckets,
337                                      n_buckets - 1,
338                                      CLIB_CACHE_LINE_BYTES);
339
340                 replicate_fill_buckets(rep, nhs,
341                                        rep->rep_buckets,
342                                        n_buckets);
343                 CLIB_MEMORY_BARRIER();
344                 replicate_set_n_buckets(rep, n_buckets);
345
346                 CLIB_MEMORY_BARRIER();
347
348                 for (ii = 0; ii < REP_NUM_INLINE_BUCKETS; ii++)
349                 {
350                     dpo_reset(&rep->rep_buckets_inline[ii]);
351                 }
352             }
353             else
354             {
355                 if (n_buckets <= REP_NUM_INLINE_BUCKETS)
356                 {
357                     /*
358                      * we are not crossing the threshold and it's still inline buckets.
359                      * we can write the new on the old..
360                      */
361                     replicate_fill_buckets(rep, nhs,
362                                            replicate_get_buckets(rep),
363                                            n_buckets);
364                     CLIB_MEMORY_BARRIER();
365                     replicate_set_n_buckets(rep, n_buckets);
366                 }
367                 else
368                 {
369                     /*
370                      * we are not crossing the threshold. We need a new bucket array to
371                      * hold the increased number of choices.
372                      */
373                     dpo_id_t *new_buckets, *old_buckets, *tmp_dpo;
374
375                     new_buckets = NULL;
376                     old_buckets = replicate_get_buckets(rep);
377
378                     vec_validate_aligned(new_buckets,
379                                          n_buckets - 1,
380                                          CLIB_CACHE_LINE_BYTES);
381
382                     replicate_fill_buckets(rep, nhs, new_buckets, n_buckets);
383                     CLIB_MEMORY_BARRIER();
384                     rep->rep_buckets = new_buckets;
385                     CLIB_MEMORY_BARRIER();
386                     replicate_set_n_buckets(rep, n_buckets);
387
388                     vec_foreach(tmp_dpo, old_buckets)
389                     {
390                         dpo_reset(tmp_dpo);
391                     }
392                     vec_free(old_buckets);
393                 }
394             }
395         }
396         else
397         {
398             /*
399              * bucket size shrinkage.
400              */
401             if (n_buckets <= REP_NUM_INLINE_BUCKETS &&
402                 rep->rep_n_buckets > REP_NUM_INLINE_BUCKETS)
403             {
404                 /*
405                  * the new decreased number of buckets is crossing the threshold
406                  * from out-line storage to inline:
407                  *   1 - Fill the inline buckets,
408                  *   2 - fixup the number (and this point the inline buckets are
409                  *       used).
410                  *   3 - free the outline buckets
411                  */
412                 replicate_fill_buckets(rep, nhs,
413                                        rep->rep_buckets_inline,
414                                        n_buckets);
415                 CLIB_MEMORY_BARRIER();
416                 replicate_set_n_buckets(rep, n_buckets);
417                 CLIB_MEMORY_BARRIER();
418
419                 vec_foreach(tmp_dpo, rep->rep_buckets)
420                 {
421                     dpo_reset(tmp_dpo);
422                 }
423                 vec_free(rep->rep_buckets);
424             }
425             else
426             {
427                 /*
428                  * not crossing the threshold.
429                  *  1 - update the number to the smaller size
430                  *  2 - write the new buckets
431                  *  3 - reset those no longer used.
432                  */
433                 dpo_id_t *buckets;
434                 u32 old_n_buckets;
435
436                 old_n_buckets = rep->rep_n_buckets;
437                 buckets = replicate_get_buckets(rep);
438
439                 replicate_set_n_buckets(rep, n_buckets);
440                 CLIB_MEMORY_BARRIER();
441
442                 replicate_fill_buckets(rep, nhs,
443                                        buckets,
444                                        n_buckets);
445
446                 for (ii = n_buckets; ii < old_n_buckets; ii++)
447                 {
448                     dpo_reset(&buckets[ii]);
449                 }
450             }
451         }
452     }
453
454     vec_foreach (nh, nhs)
455     {
456         dpo_reset(&nh->path_dpo);
457     }
458     vec_free(nhs);
459 }
460
461 static void
462 replicate_lock (dpo_id_t *dpo)
463 {
464     replicate_t *rep;
465
466     rep = replicate_get(dpo->dpoi_index);
467
468     rep->rep_locks++;
469 }
470
471 static void
472 replicate_destroy (replicate_t *rep)
473 {
474     dpo_id_t *buckets;
475     int i;
476
477     buckets = replicate_get_buckets(rep);
478
479     for (i = 0; i < rep->rep_n_buckets; i++)
480     {
481         dpo_reset(&buckets[i]);
482     }
483
484     REP_DBG(rep, "destroy");
485     if (!REP_HAS_INLINE_BUCKETS(rep))
486     {
487         vec_free(rep->rep_buckets);
488     }
489
490     pool_put(replicate_pool, rep);
491 }
492
493 static void
494 replicate_unlock (dpo_id_t *dpo)
495 {
496     replicate_t *rep;
497
498     rep = replicate_get(dpo->dpoi_index);
499
500     rep->rep_locks--;
501
502     if (0 == rep->rep_locks)
503     {
504         replicate_destroy(rep);
505     }
506 }
507
508 static void
509 replicate_mem_show (void)
510 {
511     fib_show_memory_usage("replicate",
512                           pool_elts(replicate_pool),
513                           pool_len(replicate_pool),
514                           sizeof(replicate_t));
515 }
516
517 const static dpo_vft_t rep_vft = {
518     .dv_lock = replicate_lock,
519     .dv_unlock = replicate_unlock,
520     .dv_format = format_replicate_dpo,
521     .dv_mem_show = replicate_mem_show,
522 };
523
524 /**
525  * @brief The per-protocol VLIB graph nodes that are assigned to a replicate
526  *        object.
527  *
528  * this means that these graph nodes are ones from which a replicate is the
529  * parent object in the DPO-graph.
530  */
531 const static char* const replicate_ip4_nodes[] =
532 {
533     "ip4-replicate",
534     NULL,
535 };
536 const static char* const replicate_ip6_nodes[] =
537 {
538     "ip6-replicate",
539     NULL,
540 };
541 const static char* const replicate_mpls_nodes[] =
542 {
543     "mpls-replicate",
544     NULL,
545 };
546
547 const static char* const * const replicate_nodes[DPO_PROTO_NUM] =
548 {
549     [DPO_PROTO_IP4]  = replicate_ip4_nodes,
550     [DPO_PROTO_IP6]  = replicate_ip6_nodes,
551     [DPO_PROTO_MPLS] = replicate_mpls_nodes,
552 };
553
554 void
555 replicate_module_init (void)
556 {
557     dpo_register(DPO_REPLICATE, &rep_vft, replicate_nodes);
558 }
559
560 static clib_error_t *
561 replicate_show (vlib_main_t * vm,
562                 unformat_input_t * input,
563                 vlib_cli_command_t * cmd)
564 {
565     index_t repi = INDEX_INVALID;
566
567     while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
568     {
569         if (unformat (input, "%d", &repi))
570             ;
571         else
572             break;
573     }
574
575     if (INDEX_INVALID != repi)
576     {
577         vlib_cli_output (vm, "%U", format_replicate, repi,
578                          REPLICATE_FORMAT_DETAIL);
579     }
580     else
581     {
582         replicate_t *rep;
583
584         pool_foreach(rep, replicate_pool,
585         ({
586             vlib_cli_output (vm, "%U", format_replicate,
587                              replicate_get_index(rep),
588                              REPLICATE_FORMAT_NONE);
589         }));
590     }
591
592     return 0;
593 }
594
595 VLIB_CLI_COMMAND (replicate_show_command, static) = {
596     .path = "show replicate",
597     .short_help = "show replicate [<index>]",
598     .function = replicate_show,
599 };
600
601 typedef struct replicate_trace_t_
602 {
603     index_t rep_index;
604     dpo_id_t dpo;
605 } replicate_trace_t;
606
607 static uword
608 replicate_inline (vlib_main_t * vm,
609                   vlib_node_runtime_t * node,
610                   vlib_frame_t * frame)
611 {
612     vlib_combined_counter_main_t * cm = &replicate_main.repm_counters;
613     u32 n_left_from, * from, * to_next, next_index;
614     u32 cpu_index = os_get_cpu_number();
615
616     from = vlib_frame_vector_args (frame);
617     n_left_from = frame->n_vectors;
618     next_index = node->cached_next_index;
619   
620     while (n_left_from > 0)
621     {
622         u32 n_left_to_next;
623
624         vlib_get_next_frame (vm, node, next_index,
625                              to_next, n_left_to_next);
626
627         while (n_left_from > 0 && n_left_to_next > 0)
628         {
629             u32 next0, ci0, bi0, bucket, repi0;
630             const replicate_t *rep0;
631             vlib_buffer_t * b0, *c0;
632             const dpo_id_t *dpo0;
633
634             bi0 = from[0];
635             to_next[0] = bi0;
636             from += 1;
637             to_next += 1;
638             n_left_from -= 1;
639             n_left_to_next -= 1;
640
641             b0 = vlib_get_buffer (vm, bi0);
642             repi0 = vnet_buffer (b0)->ip.adj_index[VLIB_TX];
643             rep0 = replicate_get(repi0);
644
645             vlib_increment_combined_counter(
646                 cm, cpu_index, repi0, 1,
647                 vlib_buffer_length_in_chain(vm, b0));
648
649             /* ship the original to the first bucket */
650             dpo0 = replicate_get_bucket_i(rep0, 0);
651             next0 = dpo0->dpoi_next_node;
652             vnet_buffer (b0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
653
654             if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
655             {
656                 replicate_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t));
657                 t->rep_index = repi0;
658                 t->dpo = *dpo0;
659             }
660             vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
661                                              to_next, n_left_to_next,
662                                              bi0, next0);
663
664             /* ship copies to the rest of the buckets */
665             for (bucket = 1; bucket < rep0->rep_n_buckets; bucket++)
666             {
667                 /* Make a copy */
668                 c0 = vlib_buffer_copy(vm, b0);
669                 ci0 = vlib_get_buffer_index(vm, c0);
670
671                 to_next[0] = ci0;
672                 to_next += 1;
673                 n_left_to_next -= 1;
674
675                 dpo0 = replicate_get_bucket_i(rep0, bucket);
676                 next0 = dpo0->dpoi_next_node;
677                 vnet_buffer (c0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
678
679                 if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
680                 {
681                     replicate_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t));
682                     t->rep_index = repi0;
683                     t->dpo = *dpo0;
684                 }
685
686                 vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
687                                                  to_next, n_left_to_next,
688                                                  ci0, next0);
689             }
690         }
691
692         vlib_put_next_frame (vm, node, next_index, n_left_to_next);
693     }
694
695     return frame->n_vectors;
696 }
697
698 static u8 *
699 format_replicate_trace (u8 * s, va_list * args)
700 {
701   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
702   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
703   replicate_trace_t *t = va_arg (*args, replicate_trace_t *);
704
705   s = format (s, "replicate: %d via %U",
706               t->rep_index,
707               format_dpo_id, &t->dpo);
708   return s;
709 }
710
711 static uword
712 ip4_replicate (vlib_main_t * vm,
713                vlib_node_runtime_t * node,
714                vlib_frame_t * frame)
715 {
716     return (replicate_inline (vm, node, frame));
717 }
718
719 /**
720  * @brief
721  */
722 VLIB_REGISTER_NODE (ip4_replicate_node) = {
723   .function = ip4_replicate,
724   .name = "ip4-replicate",
725   .vector_size = sizeof (u32),
726
727   .format_trace = format_replicate_trace,
728   .n_next_nodes = 1,
729   .next_nodes = {
730       [0] = "error-drop",
731   },
732 };
733
734 static uword
735 ip6_replicate (vlib_main_t * vm,
736                vlib_node_runtime_t * node,
737                vlib_frame_t * frame)
738 {
739     return (replicate_inline (vm, node, frame));
740 }
741
742 /**
743  * @brief
744  */
745 VLIB_REGISTER_NODE (ip6_replicate_node) = {
746   .function = ip6_replicate,
747   .name = "ip6-replicate",
748   .vector_size = sizeof (u32),
749
750   .format_trace = format_replicate_trace,
751   .n_next_nodes = 1,
752   .next_nodes = {
753       [0] = "error-drop",
754   },
755 };