MTRIE Optimisations 2
[vpp.git] / src / vnet / fib / fib_path.c
1 /*
2  * Copyright (c) 2016 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15
16 #include <vlib/vlib.h>
17 #include <vnet/vnet.h>
18 #include <vnet/ip/format.h>
19 #include <vnet/ip/ip.h>
20 #include <vnet/dpo/drop_dpo.h>
21 #include <vnet/dpo/receive_dpo.h>
22 #include <vnet/dpo/load_balance_map.h>
23 #include <vnet/dpo/lookup_dpo.h>
24
25 #include <vnet/adj/adj.h>
26 #include <vnet/adj/adj_mcast.h>
27
28 #include <vnet/fib/fib_path.h>
29 #include <vnet/fib/fib_node.h>
30 #include <vnet/fib/fib_table.h>
31 #include <vnet/fib/fib_entry.h>
32 #include <vnet/fib/fib_path_list.h>
33 #include <vnet/fib/fib_internal.h>
34 #include <vnet/fib/fib_urpf_list.h>
35 #include <vnet/fib/mpls_fib.h>
36
37 /**
38  * Enurmeration of path types
39  */
40 typedef enum fib_path_type_t_ {
41     /**
42      * Marker. Add new types after this one.
43      */
44     FIB_PATH_TYPE_FIRST = 0,
45     /**
46      * Attached-nexthop. An interface and a nexthop are known.
47      */
48     FIB_PATH_TYPE_ATTACHED_NEXT_HOP = FIB_PATH_TYPE_FIRST,
49     /**
50      * attached. Only the interface is known.
51      */
52     FIB_PATH_TYPE_ATTACHED,
53     /**
54      * recursive. Only the next-hop is known.
55      */
56     FIB_PATH_TYPE_RECURSIVE,
57     /**
58      * special. nothing is known. so we drop.
59      */
60     FIB_PATH_TYPE_SPECIAL,
61     /**
62      * exclusive. user provided adj.
63      */
64     FIB_PATH_TYPE_EXCLUSIVE,
65     /**
66      * deag. Link to a lookup adj in the next table
67      */
68     FIB_PATH_TYPE_DEAG,
69     /**
70      * receive. it's for-us.
71      */
72     FIB_PATH_TYPE_RECEIVE,
73     /**
74      * Marker. Add new types before this one, then update it.
75      */
76     FIB_PATH_TYPE_LAST = FIB_PATH_TYPE_RECEIVE,
77 } __attribute__ ((packed)) fib_path_type_t;
78
79 /**
80  * The maximum number of path_types
81  */
82 #define FIB_PATH_TYPE_MAX (FIB_PATH_TYPE_LAST + 1)
83
84 #define FIB_PATH_TYPES {                                        \
85     [FIB_PATH_TYPE_ATTACHED_NEXT_HOP] = "attached-nexthop",     \
86     [FIB_PATH_TYPE_ATTACHED]          = "attached",             \
87     [FIB_PATH_TYPE_RECURSIVE]         = "recursive",            \
88     [FIB_PATH_TYPE_SPECIAL]           = "special",              \
89     [FIB_PATH_TYPE_EXCLUSIVE]         = "exclusive",            \
90     [FIB_PATH_TYPE_DEAG]              = "deag",                 \
91     [FIB_PATH_TYPE_RECEIVE]           = "receive",              \
92 }
93
94 #define FOR_EACH_FIB_PATH_TYPE(_item) \
95     for (_item = FIB_PATH_TYPE_FIRST; _item <= FIB_PATH_TYPE_LAST; _item++)
96
97 /**
98  * Enurmeration of path operational (i.e. derived) attributes
99  */
100 typedef enum fib_path_oper_attribute_t_ {
101     /**
102      * Marker. Add new types after this one.
103      */
104     FIB_PATH_OPER_ATTRIBUTE_FIRST = 0,
105     /**
106      * The path forms part of a recursive loop.
107      */
108     FIB_PATH_OPER_ATTRIBUTE_RECURSIVE_LOOP = FIB_PATH_OPER_ATTRIBUTE_FIRST,
109     /**
110      * The path is resolved
111      */
112     FIB_PATH_OPER_ATTRIBUTE_RESOLVED,
113     /**
114      * The path is attached, despite what the next-hop may say.
115      */
116     FIB_PATH_OPER_ATTRIBUTE_ATTACHED,
117     /**
118      * The path has become a permanent drop.
119      */
120     FIB_PATH_OPER_ATTRIBUTE_DROP,
121     /**
122      * Marker. Add new types before this one, then update it.
123      */
124     FIB_PATH_OPER_ATTRIBUTE_LAST = FIB_PATH_OPER_ATTRIBUTE_DROP,
125 } __attribute__ ((packed)) fib_path_oper_attribute_t;
126
127 /**
128  * The maximum number of path operational attributes
129  */
130 #define FIB_PATH_OPER_ATTRIBUTE_MAX (FIB_PATH_OPER_ATTRIBUTE_LAST + 1)
131
132 #define FIB_PATH_OPER_ATTRIBUTES {                                      \
133     [FIB_PATH_OPER_ATTRIBUTE_RECURSIVE_LOOP] = "recursive-loop",        \
134     [FIB_PATH_OPER_ATTRIBUTE_RESOLVED]       = "resolved",              \
135     [FIB_PATH_OPER_ATTRIBUTE_DROP]           = "drop",                  \
136 }
137
138 #define FOR_EACH_FIB_PATH_OPER_ATTRIBUTE(_item) \
139     for (_item = FIB_PATH_OPER_ATTRIBUTE_FIRST; \
140          _item <= FIB_PATH_OPER_ATTRIBUTE_LAST; \
141          _item++)
142
143 /**
144  * Path flags from the attributes
145  */
146 typedef enum fib_path_oper_flags_t_ {
147     FIB_PATH_OPER_FLAG_NONE = 0,
148     FIB_PATH_OPER_FLAG_RECURSIVE_LOOP = (1 << FIB_PATH_OPER_ATTRIBUTE_RECURSIVE_LOOP),
149     FIB_PATH_OPER_FLAG_DROP = (1 << FIB_PATH_OPER_ATTRIBUTE_DROP),
150     FIB_PATH_OPER_FLAG_RESOLVED = (1 << FIB_PATH_OPER_ATTRIBUTE_RESOLVED),
151     FIB_PATH_OPER_FLAG_ATTACHED = (1 << FIB_PATH_OPER_ATTRIBUTE_ATTACHED),
152 } __attribute__ ((packed)) fib_path_oper_flags_t;
153
154 /**
155  * A FIB path
156  */
157 typedef struct fib_path_t_ {
158     /**
159      * A path is a node in the FIB graph.
160      */
161     fib_node_t fp_node;
162
163     /**
164      * The index of the path-list to which this path belongs
165      */
166     u32 fp_pl_index;
167
168     /**
169      * This marks the start of the memory area used to hash
170      * the path
171      */
172     STRUCT_MARK(path_hash_start);
173
174     /**
175      * Configuration Flags
176      */
177     fib_path_cfg_flags_t fp_cfg_flags;
178
179     /**
180      * The type of the path. This is the selector for the union
181      */
182     fib_path_type_t fp_type;
183
184     /**
185      * The protocol of the next-hop, i.e. the address family of the
186      * next-hop's address. We can't derive this from the address itself
187      * since the address can be all zeros
188      */
189     fib_protocol_t fp_nh_proto;
190
191     /**
192      * UCMP [unnormalised] weigt
193      */
194     u32 fp_weight;
195
196     /**
197      * per-type union of the data required to resolve the path
198      */
199     union {
200         struct {
201             /**
202              * The next-hop
203              */
204             ip46_address_t fp_nh;
205             /**
206              * The interface
207              */
208             u32 fp_interface;
209         } attached_next_hop;
210         struct {
211             /**
212              * The interface
213              */
214             u32 fp_interface;
215         } attached;
216         struct {
217             union
218             {
219                 /**
220                  * The next-hop
221                  */
222                 ip46_address_t fp_ip;
223                 /**
224                  * The local label to resolve through.
225                  */
226                 mpls_label_t fp_local_label;
227             } fp_nh;
228             /**
229              * The FIB table index in which to find the next-hop.
230              * This needs to be fixed. We should lookup the adjacencies in
231              * a separate table of adjacencies, rather than from the FIB.
232              * Two reasons I can think of:
233              *   - consider:
234              *       int ip addr Gig0 10.0.0.1/24
235              *       ip route 10.0.0.2/32 via Gig1 192.168.1.2
236              *       ip route 1.1.1.1/32 via Gig0 10.0.0.2
237              *     this is perfectly valid.
238              *     Packets addressed to 10.0.0.2 should be sent via Gig1.
239              *     Packets address to 1.1.1.1 should be sent via Gig0.
240              *    when we perform the adj resolution from the FIB for the path
241              *    "via Gig0 10.0.0.2" the lookup will result in the route via Gig1
242              *    and so we will pick up the adj via Gig1 - which was not what the
243              *    operator wanted.
244              *  - we can only return link-type IPv4 and so not the link-type MPLS.
245              *    more on this in a later commit.
246              *
247              * The table ID should only belong to a recursive path and indicate
248              * which FIB should be used to resolve the next-hop.
249              */
250             fib_node_index_t fp_tbl_id;
251         } recursive;
252         struct {
253             /**
254              * The FIB index in which to perfom the next lookup
255              */
256             fib_node_index_t fp_tbl_id;
257         } deag;
258         struct {
259         } special;
260         struct {
261             /**
262              * The user provided 'exclusive' DPO
263              */
264             dpo_id_t fp_ex_dpo;
265         } exclusive;
266         struct {
267             /**
268              * The interface on which the local address is configured
269              */
270             u32 fp_interface;
271             /**
272              * The next-hop
273              */
274             ip46_address_t fp_addr;
275         } receive;
276     };
277     STRUCT_MARK(path_hash_end);
278
279     /**
280      * Memebers in this last section represent information that is
281      * dervied during resolution. It should not be copied to new paths
282      * nor compared.
283      */
284
285     /**
286      * Operational Flags
287      */
288     fib_path_oper_flags_t fp_oper_flags;
289
290     /**
291      * the resolving via fib. not part of the union, since it it not part
292      * of the path's hash.
293      */
294     fib_node_index_t fp_via_fib;
295
296     /**
297      * The Data-path objects through which this path resolves for IP.
298      */
299     dpo_id_t fp_dpo;
300
301     /**
302      * the index of this path in the parent's child list.
303      */
304     u32 fp_sibling;
305 } fib_path_t;
306
307 /*
308  * Array of strings/names for the path types and attributes
309  */
310 static const char *fib_path_type_names[] = FIB_PATH_TYPES;
311 static const char *fib_path_oper_attribute_names[] = FIB_PATH_OPER_ATTRIBUTES;
312 static const char *fib_path_cfg_attribute_names[]  = FIB_PATH_CFG_ATTRIBUTES;
313
314 /*
315  * The memory pool from which we allocate all the paths
316  */
317 static fib_path_t *fib_path_pool;
318
319 /*
320  * Debug macro
321  */
322 #ifdef FIB_DEBUG
323 #define FIB_PATH_DBG(_p, _fmt, _args...)                        \
324 {                                                               \
325     u8 *_tmp = NULL;                                            \
326     _tmp = fib_path_format(fib_path_get_index(_p), _tmp);       \
327     clib_warning("path:[%d:%s]:" _fmt,                          \
328                  fib_path_get_index(_p), _tmp,                  \
329                  ##_args);                                      \
330     vec_free(_tmp);                                             \
331 }
332 #else
333 #define FIB_PATH_DBG(_p, _fmt, _args...)
334 #endif
335
336 static fib_path_t *
337 fib_path_get (fib_node_index_t index)
338 {
339     return (pool_elt_at_index(fib_path_pool, index));
340 }
341
342 static fib_node_index_t 
343 fib_path_get_index (fib_path_t *path)
344 {
345     return (path - fib_path_pool);
346 }
347
348 static fib_node_t *
349 fib_path_get_node (fib_node_index_t index)
350 {
351     return ((fib_node_t*)fib_path_get(index));
352 }
353
354 static fib_path_t*
355 fib_path_from_fib_node (fib_node_t *node)
356 {
357 #if CLIB_DEBUG > 0
358     ASSERT(FIB_NODE_TYPE_PATH == node->fn_type);
359 #endif
360     return ((fib_path_t*)node);
361 }
362
363 u8 *
364 format_fib_path (u8 * s, va_list * args)
365 {
366     fib_path_t *path = va_arg (*args, fib_path_t *);
367     vnet_main_t * vnm = vnet_get_main();
368     fib_path_oper_attribute_t oattr;
369     fib_path_cfg_attribute_t cattr;
370
371     s = format (s, "      index:%d ", fib_path_get_index(path));
372     s = format (s, "pl-index:%d ", path->fp_pl_index);
373     s = format (s, "%U ", format_fib_protocol, path->fp_nh_proto);
374     s = format (s, "weight=%d ", path->fp_weight);
375     s = format (s, "%s: ", fib_path_type_names[path->fp_type]);
376     if (FIB_PATH_OPER_FLAG_NONE != path->fp_oper_flags) {
377         s = format(s, " oper-flags:");
378         FOR_EACH_FIB_PATH_OPER_ATTRIBUTE(oattr) {
379             if ((1<<oattr) & path->fp_oper_flags) {
380                 s = format (s, "%s,", fib_path_oper_attribute_names[oattr]);
381             }
382         }
383     }
384     if (FIB_PATH_CFG_FLAG_NONE != path->fp_cfg_flags) {
385         s = format(s, " cfg-flags:");
386         FOR_EACH_FIB_PATH_CFG_ATTRIBUTE(cattr) {
387             if ((1<<cattr) & path->fp_cfg_flags) {
388                 s = format (s, "%s,", fib_path_cfg_attribute_names[cattr]);
389             }
390         }
391     }
392     s = format(s, "\n       ");
393
394     switch (path->fp_type)
395     {
396     case FIB_PATH_TYPE_ATTACHED_NEXT_HOP:
397         s = format (s, "%U", format_ip46_address,
398                     &path->attached_next_hop.fp_nh,
399                     IP46_TYPE_ANY);
400         if (path->fp_oper_flags & FIB_PATH_OPER_FLAG_DROP)
401         {
402             s = format (s, " if_index:%d", path->attached_next_hop.fp_interface);
403         }
404         else
405         {
406             s = format (s, " %U",
407                         format_vnet_sw_interface_name,
408                         vnm,
409                         vnet_get_sw_interface(
410                             vnm,
411                             path->attached_next_hop.fp_interface));
412             if (vnet_sw_interface_is_p2p(vnet_get_main(),
413                                          path->attached_next_hop.fp_interface))
414             {
415                 s = format (s, " (p2p)");
416             }
417         }
418         if (!dpo_id_is_valid(&path->fp_dpo))
419         {
420             s = format(s, "\n          unresolved");
421         }
422         else
423         {
424             s = format(s, "\n          %U",
425                        format_dpo_id,
426                        &path->fp_dpo, 13);
427         }
428         break;
429     case FIB_PATH_TYPE_ATTACHED:
430         if (path->fp_oper_flags & FIB_PATH_OPER_FLAG_DROP)
431         {
432             s = format (s, " if_index:%d", path->attached_next_hop.fp_interface);
433         }
434         else
435         {
436             s = format (s, " %U",
437                         format_vnet_sw_interface_name,
438                         vnm,
439                         vnet_get_sw_interface(
440                             vnm,
441                             path->attached.fp_interface));
442         }
443         break;
444     case FIB_PATH_TYPE_RECURSIVE:
445         if (FIB_PROTOCOL_MPLS == path->fp_nh_proto)
446         {
447             s = format (s, "via %U",
448                         format_mpls_unicast_label,
449                         path->recursive.fp_nh.fp_local_label);
450         }
451         else
452         {
453             s = format (s, "via %U",
454                         format_ip46_address,
455                         &path->recursive.fp_nh.fp_ip,
456                         IP46_TYPE_ANY);
457         }
458         s = format (s, " in fib:%d",
459                     path->recursive.fp_tbl_id,
460                     path->fp_via_fib); 
461         s = format (s, " via-fib:%d", path->fp_via_fib); 
462         s = format (s, " via-dpo:[%U:%d]",
463                     format_dpo_type, path->fp_dpo.dpoi_type, 
464                     path->fp_dpo.dpoi_index);
465
466         break;
467     case FIB_PATH_TYPE_RECEIVE:
468     case FIB_PATH_TYPE_SPECIAL:
469     case FIB_PATH_TYPE_DEAG:
470     case FIB_PATH_TYPE_EXCLUSIVE:
471         if (dpo_id_is_valid(&path->fp_dpo))
472         {
473             s = format(s, "%U", format_dpo_id,
474                        &path->fp_dpo, 2);
475         }
476         break;
477     }
478     return (s);
479 }
480
481 u8 *
482 fib_path_format (fib_node_index_t pi, u8 *s)
483 {
484     fib_path_t *path;
485
486     path = fib_path_get(pi);
487     ASSERT(NULL != path);
488
489     return (format (s, "%U", format_fib_path, path));
490 }
491
492 u8 *
493 fib_path_adj_format (fib_node_index_t pi,
494                      u32 indent,
495                      u8 *s)
496 {
497     fib_path_t *path;
498
499     path = fib_path_get(pi);
500     ASSERT(NULL != path);
501
502     if (!dpo_id_is_valid(&path->fp_dpo))
503     {
504         s = format(s, " unresolved");
505     }
506     else
507     {
508         s = format(s, "%U", format_dpo_id,
509                    &path->fp_dpo, 2);
510     }
511
512     return (s);
513 }
514
515 /*
516  * fib_path_last_lock_gone
517  *
518  * We don't share paths, we share path lists, so the [un]lock functions
519  * are no-ops
520  */
521 static void
522 fib_path_last_lock_gone (fib_node_t *node)
523 {
524     ASSERT(0);
525 }
526
527 static const adj_index_t
528 fib_path_attached_next_hop_get_adj (fib_path_t *path,
529                                     vnet_link_t link)
530 {
531     if (vnet_sw_interface_is_p2p(vnet_get_main(),
532                                  path->attached_next_hop.fp_interface))
533     {
534         /*
535          * if the interface is p2p then the adj for the specific
536          * neighbour on that link will never exist. on p2p links
537          * the subnet address (the attached route) links to the
538          * auto-adj (see below), we want that adj here too.
539          */
540         return (adj_nbr_add_or_lock(path->fp_nh_proto,
541                                     link,
542                                     &zero_addr,
543                                     path->attached_next_hop.fp_interface));
544     }
545     else
546     {
547         return (adj_nbr_add_or_lock(path->fp_nh_proto,
548                                     link,
549                                     &path->attached_next_hop.fp_nh,
550                                     path->attached_next_hop.fp_interface));
551     }
552 }
553
554 static void
555 fib_path_attached_next_hop_set (fib_path_t *path)
556 {
557     /*
558      * resolve directly via the adjacnecy discribed by the
559      * interface and next-hop
560      */
561     if (!vnet_sw_interface_is_admin_up(vnet_get_main(),
562                                       path->attached_next_hop.fp_interface))
563     {
564         path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RESOLVED;
565     }
566
567     dpo_set(&path->fp_dpo,
568             DPO_ADJACENCY,
569             fib_proto_to_dpo(path->fp_nh_proto),
570             fib_path_attached_next_hop_get_adj(
571                  path,
572                  fib_proto_to_link(path->fp_nh_proto)));
573
574     /*
575      * become a child of the adjacency so we receive updates
576      * when its rewrite changes
577      */
578     path->fp_sibling = adj_child_add(path->fp_dpo.dpoi_index,
579                                      FIB_NODE_TYPE_PATH,
580                                      fib_path_get_index(path));
581 }
582
583 /*
584  * create of update the paths recursive adj
585  */
586 static void
587 fib_path_recursive_adj_update (fib_path_t *path,
588                                fib_forward_chain_type_t fct,
589                                dpo_id_t *dpo)
590 {
591     dpo_id_t via_dpo = DPO_INVALID;
592
593     /*
594      * get the DPO to resolve through from the via-entry
595      */
596     fib_entry_contribute_forwarding(path->fp_via_fib,
597                                     fct,
598                                     &via_dpo);
599
600
601     /*
602      * hope for the best - clear if restrictions apply.
603      */
604     path->fp_oper_flags |= FIB_PATH_OPER_FLAG_RESOLVED;
605
606     /*
607      * Validate any recursion constraints and over-ride the via
608      * adj if not met
609      */
610     if (path->fp_oper_flags & FIB_PATH_OPER_FLAG_RECURSIVE_LOOP)
611     {
612         path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RESOLVED;
613         dpo_copy(&via_dpo, drop_dpo_get(fib_proto_to_dpo(path->fp_nh_proto)));
614     }
615     else if (path->fp_cfg_flags & FIB_PATH_CFG_FLAG_RESOLVE_HOST)
616     {
617         /*
618          * the via FIB must be a host route.
619          * note the via FIB just added will always be a host route
620          * since it is an RR source added host route. So what we need to
621          * check is whether the route has other sources. If it does then
622          * some other source has added it as a host route. If it doesn't
623          * then it was added only here and inherits forwarding from a cover.
624          * the cover is not a host route.
625          * The RR source is the lowest priority source, so we check if it
626          * is the best. if it is there are no other sources.
627          */
628         if (fib_entry_get_best_source(path->fp_via_fib) >= FIB_SOURCE_RR)
629         {
630             path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RESOLVED;
631             dpo_copy(&via_dpo, drop_dpo_get(fib_proto_to_dpo(path->fp_nh_proto)));
632
633             /*
634              * PIC edge trigger. let the load-balance maps know
635              */
636             load_balance_map_path_state_change(fib_path_get_index(path));
637         }
638     }
639     else if (path->fp_cfg_flags & FIB_PATH_CFG_FLAG_RESOLVE_ATTACHED)
640     {
641         /*
642          * RR source entries inherit the flags from the cover, so
643          * we can check the via directly
644          */
645         if (!(FIB_ENTRY_FLAG_ATTACHED & fib_entry_get_flags(path->fp_via_fib)))
646         {
647             path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RESOLVED;
648             dpo_copy(&via_dpo, drop_dpo_get(fib_proto_to_dpo(path->fp_nh_proto)));
649
650             /*
651              * PIC edge trigger. let the load-balance maps know
652              */
653             load_balance_map_path_state_change(fib_path_get_index(path));
654         }
655     }
656
657     /*
658      * update the path's contributed DPO
659      */
660     dpo_copy(dpo, &via_dpo);
661
662     FIB_PATH_DBG(path, "recursive update: %U",
663                  fib_get_lookup_main(path->fp_nh_proto),
664                  &path->fp_dpo, 2);
665
666     dpo_reset(&via_dpo);
667 }
668
669 /*
670  * fib_path_is_permanent_drop
671  *
672  * Return !0 if the path is configured to permanently drop,
673  * despite other attributes.
674  */
675 static int
676 fib_path_is_permanent_drop (fib_path_t *path)
677 {
678     return ((path->fp_cfg_flags & FIB_PATH_CFG_FLAG_DROP) ||
679             (path->fp_oper_flags & FIB_PATH_OPER_FLAG_DROP));
680 }
681
682 /*
683  * fib_path_unresolve
684  *
685  * Remove our dependency on the resolution target
686  */
687 static void
688 fib_path_unresolve (fib_path_t *path)
689 {
690     /*
691      * the forced drop path does not need unresolving
692      */
693     if (fib_path_is_permanent_drop(path))
694     {
695         return;
696     }
697
698     switch (path->fp_type)
699     {
700     case FIB_PATH_TYPE_RECURSIVE:
701         if (FIB_NODE_INDEX_INVALID != path->fp_via_fib)
702         {
703             fib_prefix_t pfx;
704
705             fib_entry_get_prefix(path->fp_via_fib, &pfx);
706             fib_entry_child_remove(path->fp_via_fib,
707                                    path->fp_sibling);
708             fib_table_entry_special_remove(path->recursive.fp_tbl_id,
709                                            &pfx,
710                                            FIB_SOURCE_RR);
711             path->fp_via_fib = FIB_NODE_INDEX_INVALID;
712         }
713         break;
714     case FIB_PATH_TYPE_ATTACHED_NEXT_HOP:
715     case FIB_PATH_TYPE_ATTACHED:
716         adj_child_remove(path->fp_dpo.dpoi_index,
717                          path->fp_sibling);
718         adj_unlock(path->fp_dpo.dpoi_index);
719         break;
720     case FIB_PATH_TYPE_EXCLUSIVE:
721         dpo_reset(&path->exclusive.fp_ex_dpo);
722         break;
723     case FIB_PATH_TYPE_SPECIAL:
724     case FIB_PATH_TYPE_RECEIVE:
725     case FIB_PATH_TYPE_DEAG:
726         /*
727          * these hold only the path's DPO, which is reset below.
728          */
729         break;
730     }
731
732     /*
733      * release the adj we were holding and pick up the
734      * drop just in case.
735      */
736     dpo_reset(&path->fp_dpo);
737     path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RESOLVED;
738
739     return;
740 }
741
742 static fib_forward_chain_type_t
743 fib_path_proto_to_chain_type (fib_protocol_t proto)
744 {
745     switch (proto)
746     {
747     case FIB_PROTOCOL_IP4:
748         return (FIB_FORW_CHAIN_TYPE_UNICAST_IP4);
749     case FIB_PROTOCOL_IP6:
750         return (FIB_FORW_CHAIN_TYPE_UNICAST_IP6);
751     case FIB_PROTOCOL_MPLS:
752         return (FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS);
753     }
754     return (FIB_FORW_CHAIN_TYPE_UNICAST_IP4);
755 }
756
757 /*
758  * fib_path_back_walk_notify
759  *
760  * A back walk has reach this path.
761  */
762 static fib_node_back_walk_rc_t
763 fib_path_back_walk_notify (fib_node_t *node,
764                            fib_node_back_walk_ctx_t *ctx)
765 {
766     fib_path_t *path;
767
768     path = fib_path_from_fib_node(node);
769
770     switch (path->fp_type)
771     {
772     case FIB_PATH_TYPE_RECURSIVE:
773         if (FIB_NODE_BW_REASON_FLAG_EVALUATE & ctx->fnbw_reason)
774         {
775             /*
776              * modify the recursive adjacency to use the new forwarding
777              * of the via-fib.
778              * this update is visible to packets in flight in the DP.
779              */
780             fib_path_recursive_adj_update(
781                 path,
782                 fib_path_proto_to_chain_type(path->fp_nh_proto),
783                 &path->fp_dpo);
784         }
785         if ((FIB_NODE_BW_REASON_FLAG_ADJ_UPDATE & ctx->fnbw_reason) ||
786             (FIB_NODE_BW_REASON_FLAG_ADJ_DOWN   & ctx->fnbw_reason))
787         {
788             /*
789              * ADJ updates (complete<->incomplete) do not need to propagate to
790              * recursive entries.
791              * The only reason its needed as far back as here, is that the adj
792              * and the incomplete adj are a different DPO type, so the LBs need
793              * to re-stack.
794              * If this walk was quashed in the fib_entry, then any non-fib_path
795              * children (like tunnels that collapse out the LB when they stack)
796              * would not see the update.
797              */
798             return (FIB_NODE_BACK_WALK_CONTINUE);
799         }
800         break;
801     case FIB_PATH_TYPE_ATTACHED_NEXT_HOP:
802         /*
803 FIXME comment
804          * ADJ_UPDATE backwalk pass silently through here and up to
805          * the path-list when the multipath adj collapse occurs.
806          * The reason we do this is that the assumtption is that VPP
807          * runs in an environment where the Control-Plane is remote
808          * and hence reacts slowly to link up down. In order to remove
809          * this down link from the ECMP set quickly, we back-walk.
810          * VPP also has dedicated CPUs, so we are not stealing resources
811          * from the CP to do so.
812          */
813         if (FIB_NODE_BW_REASON_FLAG_INTERFACE_UP & ctx->fnbw_reason)
814         {
815             if (path->fp_oper_flags & FIB_PATH_OPER_FLAG_RESOLVED)
816             {
817                 /*
818                  * alreday resolved. no need to walk back again
819                  */
820                 return (FIB_NODE_BACK_WALK_CONTINUE);
821             }
822             path->fp_oper_flags |= FIB_PATH_OPER_FLAG_RESOLVED;
823         }
824         if (FIB_NODE_BW_REASON_FLAG_INTERFACE_DOWN & ctx->fnbw_reason)
825         {
826             if (!(path->fp_oper_flags & FIB_PATH_OPER_FLAG_RESOLVED))
827             {
828                 /*
829                  * alreday unresolved. no need to walk back again
830                  */
831                 return (FIB_NODE_BACK_WALK_CONTINUE);
832             }
833             path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RESOLVED;
834         }
835         if (FIB_NODE_BW_REASON_FLAG_INTERFACE_DELETE & ctx->fnbw_reason)
836         {
837             /*
838              * The interface this path resolves through has been deleted.
839              * This will leave the path in a permanent drop state. The route
840              * needs to be removed and readded (and hence the path-list deleted)
841              * before it can forward again.
842              */
843             fib_path_unresolve(path);
844             path->fp_oper_flags |= FIB_PATH_OPER_FLAG_DROP;
845         }
846         if (FIB_NODE_BW_REASON_FLAG_ADJ_UPDATE & ctx->fnbw_reason)
847         {
848             /*
849              * restack the DPO to pick up the correct DPO sub-type
850              */
851             uword if_is_up;
852             adj_index_t ai;
853
854             if_is_up = vnet_sw_interface_is_admin_up(
855                            vnet_get_main(),
856                            path->attached_next_hop.fp_interface);
857
858             if (if_is_up)
859             {
860                 path->fp_oper_flags |= FIB_PATH_OPER_FLAG_RESOLVED;
861             }
862
863             ai = fib_path_attached_next_hop_get_adj(
864                      path,
865                      fib_proto_to_link(path->fp_nh_proto));
866
867             dpo_set(&path->fp_dpo, DPO_ADJACENCY,
868                     fib_proto_to_dpo(path->fp_nh_proto),
869                     ai);
870             adj_unlock(ai);
871
872             if (!if_is_up)
873             {
874                 /*
875                  * If the interface is not up there is no reason to walk
876                  * back to children. if we did they would only evalute
877                  * that this path is unresolved and hence it would
878                  * not contribute the adjacency - so it would be wasted
879                  * CPU time.
880                  */
881                 return (FIB_NODE_BACK_WALK_CONTINUE);
882             }
883         }
884         if (FIB_NODE_BW_REASON_FLAG_ADJ_DOWN & ctx->fnbw_reason)
885         {
886             if (!(path->fp_oper_flags & FIB_PATH_OPER_FLAG_RESOLVED))
887             {
888                 /*
889                  * alreday unresolved. no need to walk back again
890                  */
891                 return (FIB_NODE_BACK_WALK_CONTINUE);
892             }
893             /*
894              * the adj has gone down. the path is no longer resolved.
895              */
896             path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RESOLVED;
897         }
898         break;
899     case FIB_PATH_TYPE_ATTACHED:
900         /*
901          * FIXME; this could schedule a lower priority walk, since attached
902          * routes are not usually in ECMP configurations so the backwalk to
903          * the FIB entry does not need to be high priority
904          */
905         if (FIB_NODE_BW_REASON_FLAG_INTERFACE_UP & ctx->fnbw_reason)
906         {
907             path->fp_oper_flags |= FIB_PATH_OPER_FLAG_RESOLVED;
908         }
909         if (FIB_NODE_BW_REASON_FLAG_INTERFACE_DOWN & ctx->fnbw_reason)
910         {
911             path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RESOLVED;
912         }
913         if (FIB_NODE_BW_REASON_FLAG_INTERFACE_DELETE & ctx->fnbw_reason)
914         {
915             fib_path_unresolve(path);
916             path->fp_oper_flags |= FIB_PATH_OPER_FLAG_DROP;
917         }
918         break;
919     case FIB_PATH_TYPE_DEAG:
920         /*
921          * FIXME When VRF delete is allowed this will need a poke.
922          */
923     case FIB_PATH_TYPE_SPECIAL:
924     case FIB_PATH_TYPE_RECEIVE:
925     case FIB_PATH_TYPE_EXCLUSIVE:
926         /*
927          * these path types have no parents. so to be
928          * walked from one is unexpected.
929          */
930         ASSERT(0);
931         break;
932     }
933
934     /*
935      * propagate the backwalk further to the path-list
936      */
937     fib_path_list_back_walk(path->fp_pl_index, ctx);
938
939     return (FIB_NODE_BACK_WALK_CONTINUE);
940 }
941
942 static void
943 fib_path_memory_show (void)
944 {
945     fib_show_memory_usage("Path",
946                           pool_elts(fib_path_pool),
947                           pool_len(fib_path_pool),
948                           sizeof(fib_path_t));
949 }
950
951 /*
952  * The FIB path's graph node virtual function table
953  */
954 static const fib_node_vft_t fib_path_vft = {
955     .fnv_get = fib_path_get_node,
956     .fnv_last_lock = fib_path_last_lock_gone,
957     .fnv_back_walk = fib_path_back_walk_notify,
958     .fnv_mem_show = fib_path_memory_show,
959 };
960
961 static fib_path_cfg_flags_t
962 fib_path_route_flags_to_cfg_flags (const fib_route_path_t *rpath)
963 {
964     fib_path_cfg_flags_t cfg_flags = FIB_PATH_CFG_FLAG_NONE;
965
966     if (rpath->frp_flags & FIB_ROUTE_PATH_RESOLVE_VIA_HOST)
967         cfg_flags |= FIB_PATH_CFG_FLAG_RESOLVE_HOST;
968     if (rpath->frp_flags & FIB_ROUTE_PATH_RESOLVE_VIA_ATTACHED)
969         cfg_flags |= FIB_PATH_CFG_FLAG_RESOLVE_ATTACHED;
970     if (rpath->frp_flags & FIB_ROUTE_PATH_LOCAL)
971         cfg_flags |= FIB_PATH_CFG_FLAG_LOCAL;
972     if (rpath->frp_flags & FIB_ROUTE_PATH_ATTACHED)
973         cfg_flags |= FIB_PATH_CFG_FLAG_ATTACHED;
974
975     return (cfg_flags);
976 }
977
978 /*
979  * fib_path_create
980  *
981  * Create and initialise a new path object.
982  * return the index of the path.
983  */
984 fib_node_index_t
985 fib_path_create (fib_node_index_t pl_index,
986                  fib_protocol_t nh_proto,
987                  fib_path_cfg_flags_t flags,
988                  const fib_route_path_t *rpath)
989 {
990     fib_path_t *path;
991
992     pool_get(fib_path_pool, path);
993     memset(path, 0, sizeof(*path));
994
995     fib_node_init(&path->fp_node,
996                   FIB_NODE_TYPE_PATH);
997
998     dpo_reset(&path->fp_dpo);
999     path->fp_pl_index = pl_index;
1000     path->fp_nh_proto = nh_proto;
1001     path->fp_via_fib = FIB_NODE_INDEX_INVALID;
1002     path->fp_weight = rpath->frp_weight;
1003     if (0 == path->fp_weight)
1004     {
1005         /*
1006          * a weight of 0 is a meaningless value. We could either reject it, and thus force
1007          * clients to always use 1, or we can accept it and fixup approrpiately.
1008          */
1009         path->fp_weight = 1;
1010     }
1011     path->fp_cfg_flags = flags;
1012     path->fp_cfg_flags |= fib_path_route_flags_to_cfg_flags(rpath);
1013
1014     /*
1015      * deduce the path's tpye from the parementers and save what is needed.
1016      */
1017     if (path->fp_cfg_flags & FIB_PATH_CFG_FLAG_LOCAL)
1018     {
1019         path->fp_type = FIB_PATH_TYPE_RECEIVE;
1020         path->receive.fp_interface = rpath->frp_sw_if_index;
1021         path->receive.fp_addr = rpath->frp_addr;
1022     }
1023     else if (~0 != rpath->frp_sw_if_index)
1024     {
1025         if (ip46_address_is_zero(&rpath->frp_addr))
1026         {
1027             path->fp_type = FIB_PATH_TYPE_ATTACHED;
1028             path->attached.fp_interface = rpath->frp_sw_if_index;
1029         }
1030         else
1031         {
1032             path->fp_type = FIB_PATH_TYPE_ATTACHED_NEXT_HOP;
1033             path->attached_next_hop.fp_interface = rpath->frp_sw_if_index;
1034             path->attached_next_hop.fp_nh = rpath->frp_addr;
1035         }
1036     }
1037     else
1038     {
1039         if (ip46_address_is_zero(&rpath->frp_addr))
1040         {
1041             if (~0 == rpath->frp_fib_index)
1042             {
1043                 path->fp_type = FIB_PATH_TYPE_SPECIAL;
1044             }
1045             else
1046             {
1047                 path->fp_type = FIB_PATH_TYPE_DEAG;
1048                 path->deag.fp_tbl_id = rpath->frp_fib_index;
1049             }           
1050         }
1051         else
1052         {
1053             path->fp_type = FIB_PATH_TYPE_RECURSIVE;
1054             if (FIB_PROTOCOL_MPLS == path->fp_nh_proto)
1055             {
1056                 path->recursive.fp_nh.fp_local_label = rpath->frp_local_label;
1057             }
1058             else
1059             {
1060                 path->recursive.fp_nh.fp_ip = rpath->frp_addr;
1061             }
1062             path->recursive.fp_tbl_id = rpath->frp_fib_index;
1063         }
1064     }
1065
1066     FIB_PATH_DBG(path, "create");
1067
1068     return (fib_path_get_index(path));
1069 }
1070
1071 /*
1072  * fib_path_create_special
1073  *
1074  * Create and initialise a new path object.
1075  * return the index of the path.
1076  */
1077 fib_node_index_t
1078 fib_path_create_special (fib_node_index_t pl_index,
1079                          fib_protocol_t nh_proto,
1080                          fib_path_cfg_flags_t flags,
1081                          const dpo_id_t *dpo)
1082 {
1083     fib_path_t *path;
1084
1085     pool_get(fib_path_pool, path);
1086     memset(path, 0, sizeof(*path));
1087
1088     fib_node_init(&path->fp_node,
1089                   FIB_NODE_TYPE_PATH);
1090     dpo_reset(&path->fp_dpo);
1091
1092     path->fp_pl_index = pl_index;
1093     path->fp_weight = 1;
1094     path->fp_nh_proto = nh_proto;
1095     path->fp_via_fib = FIB_NODE_INDEX_INVALID;
1096     path->fp_cfg_flags = flags;
1097
1098     if (FIB_PATH_CFG_FLAG_DROP & flags)
1099     {
1100         path->fp_type = FIB_PATH_TYPE_SPECIAL;
1101     }
1102     else if (FIB_PATH_CFG_FLAG_LOCAL & flags)
1103     {
1104         path->fp_type = FIB_PATH_TYPE_RECEIVE;
1105         path->attached.fp_interface = FIB_NODE_INDEX_INVALID;
1106     }
1107     else
1108     {
1109         path->fp_type = FIB_PATH_TYPE_EXCLUSIVE;
1110         ASSERT(NULL != dpo);
1111         dpo_copy(&path->exclusive.fp_ex_dpo, dpo);
1112     }
1113
1114     return (fib_path_get_index(path));
1115 }
1116
1117 /*
1118  * fib_path_copy
1119  *
1120  * Copy a path. return index of new path.
1121  */
1122 fib_node_index_t
1123 fib_path_copy (fib_node_index_t path_index,
1124                fib_node_index_t path_list_index)
1125 {
1126     fib_path_t *path, *orig_path;
1127
1128     pool_get(fib_path_pool, path);
1129
1130     orig_path = fib_path_get(path_index);
1131     ASSERT(NULL != orig_path);
1132
1133     memcpy(path, orig_path, sizeof(*path));
1134
1135     FIB_PATH_DBG(path, "create-copy:%d", path_index);
1136
1137     /*
1138      * reset the dynamic section
1139      */
1140     fib_node_init(&path->fp_node, FIB_NODE_TYPE_PATH);
1141     path->fp_oper_flags     = FIB_PATH_OPER_FLAG_NONE;
1142     path->fp_pl_index  = path_list_index;
1143     path->fp_via_fib   = FIB_NODE_INDEX_INVALID;
1144     memset(&path->fp_dpo, 0, sizeof(path->fp_dpo));
1145     dpo_reset(&path->fp_dpo);
1146
1147     return (fib_path_get_index(path));
1148 }
1149
1150 /*
1151  * fib_path_destroy
1152  *
1153  * destroy a path that is no longer required
1154  */
1155 void
1156 fib_path_destroy (fib_node_index_t path_index)
1157 {
1158     fib_path_t *path;
1159
1160     path = fib_path_get(path_index);
1161
1162     ASSERT(NULL != path);
1163     FIB_PATH_DBG(path, "destroy");
1164
1165     fib_path_unresolve(path);
1166
1167     fib_node_deinit(&path->fp_node);
1168     pool_put(fib_path_pool, path);
1169 }
1170
1171 /*
1172  * fib_path_destroy
1173  *
1174  * destroy a path that is no longer required
1175  */
1176 uword
1177 fib_path_hash (fib_node_index_t path_index)
1178 {
1179     fib_path_t *path;
1180
1181     path = fib_path_get(path_index);
1182
1183     return (hash_memory(STRUCT_MARK_PTR(path, path_hash_start),
1184                         (STRUCT_OFFSET_OF(fib_path_t, path_hash_end) -
1185                          STRUCT_OFFSET_OF(fib_path_t, path_hash_start)),
1186                         0));
1187 }
1188
1189 /*
1190  * fib_path_cmp_i
1191  *
1192  * Compare two paths for equivalence.
1193  */
1194 static int
1195 fib_path_cmp_i (const fib_path_t *path1,
1196                 const fib_path_t *path2)
1197 {
1198     int res;
1199
1200     res = 1;
1201
1202     /*
1203      * paths of different types and protocol are not equal.
1204      * different weights only are the same path.
1205      */
1206     if (path1->fp_type != path2->fp_type)
1207     {
1208         res = (path1->fp_type - path2->fp_type);
1209     }
1210     else if (path1->fp_nh_proto != path2->fp_nh_proto)
1211     {
1212         res = (path1->fp_nh_proto - path2->fp_nh_proto);
1213     }
1214     else
1215     {
1216         /*
1217          * both paths are of the same type.
1218          * consider each type and its attributes in turn.
1219          */
1220         switch (path1->fp_type)
1221         {
1222         case FIB_PATH_TYPE_ATTACHED_NEXT_HOP:
1223             res = ip46_address_cmp(&path1->attached_next_hop.fp_nh,
1224                                    &path2->attached_next_hop.fp_nh);
1225             if (0 == res) {
1226                 res = vnet_sw_interface_compare(
1227                           vnet_get_main(),
1228                           path1->attached_next_hop.fp_interface,
1229                           path2->attached_next_hop.fp_interface);
1230             }
1231             break;
1232         case FIB_PATH_TYPE_ATTACHED:
1233             res = vnet_sw_interface_compare(
1234                       vnet_get_main(),
1235                       path1->attached.fp_interface,
1236                       path2->attached.fp_interface);
1237             break;
1238         case FIB_PATH_TYPE_RECURSIVE:
1239             res = ip46_address_cmp(&path1->recursive.fp_nh,
1240                                    &path2->recursive.fp_nh);
1241  
1242             if (0 == res)
1243             {
1244                 res = (path1->recursive.fp_tbl_id - path2->recursive.fp_tbl_id);
1245             }
1246             break;
1247         case FIB_PATH_TYPE_DEAG:
1248             res = (path1->deag.fp_tbl_id - path2->deag.fp_tbl_id);
1249             break;
1250         case FIB_PATH_TYPE_SPECIAL:
1251         case FIB_PATH_TYPE_RECEIVE:
1252         case FIB_PATH_TYPE_EXCLUSIVE:
1253             res = 0;
1254             break;
1255         }
1256     }
1257     return (res);
1258 }
1259
1260 /*
1261  * fib_path_cmp_for_sort
1262  *
1263  * Compare two paths for equivalence. Used during path sorting.
1264  * As usual 0 means equal.
1265  */
1266 int
1267 fib_path_cmp_for_sort (void * v1,
1268                        void * v2)
1269 {
1270     fib_node_index_t *pi1 = v1, *pi2 = v2;
1271     fib_path_t *path1, *path2;
1272
1273     path1 = fib_path_get(*pi1);
1274     path2 = fib_path_get(*pi2);
1275
1276     return (fib_path_cmp_i(path1, path2));
1277 }
1278
1279 /*
1280  * fib_path_cmp
1281  *
1282  * Compare two paths for equivalence.
1283  */
1284 int
1285 fib_path_cmp (fib_node_index_t pi1,
1286               fib_node_index_t pi2)
1287 {
1288     fib_path_t *path1, *path2;
1289
1290     path1 = fib_path_get(pi1);
1291     path2 = fib_path_get(pi2);
1292
1293     return (fib_path_cmp_i(path1, path2));
1294 }
1295
1296 int
1297 fib_path_cmp_w_route_path (fib_node_index_t path_index,
1298                            const fib_route_path_t *rpath)
1299 {
1300     fib_path_t *path;
1301     int res;
1302
1303     path = fib_path_get(path_index);
1304
1305     res = 1;
1306
1307     if (path->fp_weight != rpath->frp_weight)
1308     {
1309         res = (path->fp_weight - rpath->frp_weight);
1310     }
1311     else
1312     {
1313         /*
1314          * both paths are of the same type.
1315          * consider each type and its attributes in turn.
1316          */
1317         switch (path->fp_type)
1318         {
1319         case FIB_PATH_TYPE_ATTACHED_NEXT_HOP:
1320             res = ip46_address_cmp(&path->attached_next_hop.fp_nh,
1321                                    &rpath->frp_addr);
1322             if (0 == res)
1323             {
1324                 res = vnet_sw_interface_compare(
1325                           vnet_get_main(),
1326                           path->attached_next_hop.fp_interface,
1327                           rpath->frp_sw_if_index);
1328             }
1329             break;
1330         case FIB_PATH_TYPE_ATTACHED:
1331             res = vnet_sw_interface_compare(
1332                       vnet_get_main(),
1333                       path->attached.fp_interface,
1334                       rpath->frp_sw_if_index);
1335             break;
1336         case FIB_PATH_TYPE_RECURSIVE:
1337             if (FIB_PROTOCOL_MPLS == path->fp_nh_proto)
1338             {
1339                 res = path->recursive.fp_nh.fp_local_label - rpath->frp_local_label;
1340             }
1341             else
1342             {
1343                 res = ip46_address_cmp(&path->recursive.fp_nh.fp_ip,
1344                                        &rpath->frp_addr);
1345             }
1346
1347             if (0 == res)
1348             {
1349                 res = (path->recursive.fp_tbl_id - rpath->frp_fib_index);
1350             }
1351             break;
1352         case FIB_PATH_TYPE_DEAG:
1353             res = (path->deag.fp_tbl_id - rpath->frp_fib_index);
1354             break;
1355         case FIB_PATH_TYPE_SPECIAL:
1356         case FIB_PATH_TYPE_RECEIVE:
1357         case FIB_PATH_TYPE_EXCLUSIVE:
1358             res = 0;
1359             break;
1360         }
1361     }
1362     return (res);
1363 }
1364
1365 /*
1366  * fib_path_recursive_loop_detect
1367  *
1368  * A forward walk of the FIB object graph to detect for a cycle/loop. This
1369  * walk is initiated when an entry is linking to a new path list or from an old.
1370  * The entry vector passed contains all the FIB entrys that are children of this
1371  * path (it is all the entries encountered on the walk so far). If this vector
1372  * contains the entry this path resolve via, then a loop is about to form.
1373  * The loop must be allowed to form, since we need the dependencies in place
1374  * so that we can track when the loop breaks.
1375  * However, we MUST not produce a loop in the forwarding graph (else packets
1376  * would loop around the switch path until the loop breaks), so we mark recursive
1377  * paths as looped so that they do not contribute forwarding information.
1378  * By marking the path as looped, an etry such as;
1379  *    X/Y
1380  *     via a.a.a.a (looped)
1381  *     via b.b.b.b (not looped)
1382  * can still forward using the info provided by b.b.b.b only
1383  */
1384 int
1385 fib_path_recursive_loop_detect (fib_node_index_t path_index,
1386                                 fib_node_index_t **entry_indicies)
1387 {
1388     fib_path_t *path;
1389
1390     path = fib_path_get(path_index);
1391
1392     /*
1393      * the forced drop path is never looped, cos it is never resolved.
1394      */
1395     if (fib_path_is_permanent_drop(path))
1396     {
1397         return (0);
1398     }
1399
1400     switch (path->fp_type)
1401     {
1402     case FIB_PATH_TYPE_RECURSIVE:
1403     {
1404         fib_node_index_t *entry_index, *entries;
1405         int looped = 0;
1406         entries = *entry_indicies;
1407
1408         vec_foreach(entry_index, entries) {
1409             if (*entry_index == path->fp_via_fib)
1410             {
1411                 /*
1412                  * the entry that is about to link to this path-list (or
1413                  * one of this path-list's children) is the same entry that
1414                  * this recursive path resolves through. this is a cycle.
1415                  * abort the walk.
1416                  */
1417                 looped = 1;
1418                 break;
1419             }
1420         }
1421
1422         if (looped)
1423         {
1424             FIB_PATH_DBG(path, "recursive loop formed");
1425             path->fp_oper_flags |= FIB_PATH_OPER_FLAG_RECURSIVE_LOOP;
1426
1427             dpo_copy(&path->fp_dpo,
1428                     drop_dpo_get(fib_proto_to_dpo(path->fp_nh_proto)));
1429         }
1430         else
1431         {
1432             /*
1433              * no loop here yet. keep forward walking the graph.
1434              */     
1435             if (fib_entry_recursive_loop_detect(path->fp_via_fib, entry_indicies))
1436             {
1437                 FIB_PATH_DBG(path, "recursive loop formed");
1438                 path->fp_oper_flags |= FIB_PATH_OPER_FLAG_RECURSIVE_LOOP;
1439             }
1440             else
1441             {
1442                 FIB_PATH_DBG(path, "recursive loop cleared");
1443                 path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RECURSIVE_LOOP;
1444             }
1445         }
1446         break;
1447     }
1448     case FIB_PATH_TYPE_ATTACHED_NEXT_HOP:
1449     case FIB_PATH_TYPE_ATTACHED:
1450     case FIB_PATH_TYPE_SPECIAL:
1451     case FIB_PATH_TYPE_DEAG:
1452     case FIB_PATH_TYPE_RECEIVE:
1453     case FIB_PATH_TYPE_EXCLUSIVE:
1454         /*
1455          * these path types cannot be part of a loop, since they are the leaves
1456          * of the graph.
1457          */
1458         break;
1459     }
1460
1461     return (fib_path_is_looped(path_index));
1462 }
1463
1464 int
1465 fib_path_resolve (fib_node_index_t path_index)
1466 {
1467     fib_path_t *path;
1468
1469     path = fib_path_get(path_index);
1470
1471     /*
1472      * hope for the best.
1473      */
1474     path->fp_oper_flags |= FIB_PATH_OPER_FLAG_RESOLVED;
1475
1476     /*
1477      * the forced drop path resolves via the drop adj
1478      */
1479     if (fib_path_is_permanent_drop(path))
1480     {
1481         dpo_copy(&path->fp_dpo,
1482                  drop_dpo_get(fib_proto_to_dpo(path->fp_nh_proto)));
1483         path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RESOLVED;
1484         return (fib_path_is_resolved(path_index));
1485     }
1486
1487     switch (path->fp_type)
1488     {
1489     case FIB_PATH_TYPE_ATTACHED_NEXT_HOP:
1490         fib_path_attached_next_hop_set(path);
1491         break;
1492     case FIB_PATH_TYPE_ATTACHED:
1493         /*
1494          * path->attached.fp_interface
1495          */
1496         if (!vnet_sw_interface_is_admin_up(vnet_get_main(),
1497                                            path->attached.fp_interface))
1498         {
1499             path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RESOLVED;
1500         }
1501         if (vnet_sw_interface_is_p2p(vnet_get_main(),
1502                                      path->attached.fp_interface))
1503         {
1504             /*
1505              * point-2-point interfaces do not require a glean, since
1506              * there is nothing to ARP. Install a rewrite/nbr adj instead
1507              */
1508             dpo_set(&path->fp_dpo,
1509                     DPO_ADJACENCY,
1510                     fib_proto_to_dpo(path->fp_nh_proto),
1511                     adj_nbr_add_or_lock(
1512                         path->fp_nh_proto,
1513                         fib_proto_to_link(path->fp_nh_proto),
1514                         &zero_addr,
1515                         path->attached.fp_interface));
1516         }
1517         else
1518         {
1519             dpo_set(&path->fp_dpo,
1520                     DPO_ADJACENCY_GLEAN,
1521                     fib_proto_to_dpo(path->fp_nh_proto),
1522                     adj_glean_add_or_lock(path->fp_nh_proto,
1523                                           path->attached.fp_interface,
1524                                           NULL));
1525         }
1526         /*
1527          * become a child of the adjacency so we receive updates
1528          * when the interface state changes
1529          */
1530         path->fp_sibling = adj_child_add(path->fp_dpo.dpoi_index,
1531                                          FIB_NODE_TYPE_PATH,
1532                                          fib_path_get_index(path));
1533
1534         break;
1535     case FIB_PATH_TYPE_RECURSIVE:
1536     {
1537         /*
1538          * Create a RR source entry in the table for the address
1539          * that this path recurses through.
1540          * This resolve action is recursive, hence we may create
1541          * more paths in the process. more creates mean maybe realloc
1542          * of this path.
1543          */
1544         fib_node_index_t fei;
1545         fib_prefix_t pfx;
1546
1547         ASSERT(FIB_NODE_INDEX_INVALID == path->fp_via_fib);
1548
1549         if (FIB_PROTOCOL_MPLS == path->fp_nh_proto)
1550         {
1551             fib_prefix_from_mpls_label(path->recursive.fp_nh.fp_local_label, &pfx);
1552         }
1553         else
1554         {
1555             fib_prefix_from_ip46_addr(&path->recursive.fp_nh.fp_ip, &pfx);
1556         }
1557
1558         fei = fib_table_entry_special_add(path->recursive.fp_tbl_id,
1559                                           &pfx,
1560                                           FIB_SOURCE_RR,
1561                                           FIB_ENTRY_FLAG_NONE,
1562                                           ADJ_INDEX_INVALID);
1563
1564         path = fib_path_get(path_index);
1565         path->fp_via_fib = fei;
1566
1567         /*
1568          * become a dependent child of the entry so the path is 
1569          * informed when the forwarding for the entry changes.
1570          */
1571         path->fp_sibling = fib_entry_child_add(path->fp_via_fib,
1572                                                FIB_NODE_TYPE_PATH,
1573                                                fib_path_get_index(path));
1574
1575         /*
1576          * create and configure the IP DPO
1577          */
1578         fib_path_recursive_adj_update(
1579             path,
1580             fib_path_proto_to_chain_type(path->fp_nh_proto),
1581             &path->fp_dpo);
1582
1583         break;
1584     }
1585     case FIB_PATH_TYPE_SPECIAL:
1586         /*
1587          * Resolve via the drop
1588          */
1589         dpo_copy(&path->fp_dpo,
1590                  drop_dpo_get(fib_proto_to_dpo(path->fp_nh_proto)));
1591         break;
1592     case FIB_PATH_TYPE_DEAG:
1593         /*
1594          * Resolve via a lookup DPO.
1595          * FIXME. control plane should add routes with a table ID
1596          */
1597         lookup_dpo_add_or_lock_w_fib_index(path->deag.fp_tbl_id,
1598                                           fib_proto_to_dpo(path->fp_nh_proto),
1599                                           LOOKUP_INPUT_DST_ADDR,
1600                                           LOOKUP_TABLE_FROM_CONFIG,
1601                                           &path->fp_dpo);
1602         break;
1603     case FIB_PATH_TYPE_RECEIVE:
1604         /*
1605          * Resolve via a receive DPO.
1606          */
1607         receive_dpo_add_or_lock(fib_proto_to_dpo(path->fp_nh_proto),
1608                                 path->receive.fp_interface,
1609                                 &path->receive.fp_addr,
1610                                 &path->fp_dpo);
1611         break;
1612     case FIB_PATH_TYPE_EXCLUSIVE:
1613         /*
1614          * Resolve via the user provided DPO
1615          */
1616         dpo_copy(&path->fp_dpo, &path->exclusive.fp_ex_dpo);
1617         break;
1618     }
1619
1620     return (fib_path_is_resolved(path_index));
1621 }
1622
1623 u32
1624 fib_path_get_resolving_interface (fib_node_index_t path_index)
1625 {
1626     fib_path_t *path;
1627
1628     path = fib_path_get(path_index);
1629
1630     switch (path->fp_type)
1631     {
1632     case FIB_PATH_TYPE_ATTACHED_NEXT_HOP:
1633         return (path->attached_next_hop.fp_interface);
1634     case FIB_PATH_TYPE_ATTACHED:
1635         return (path->attached.fp_interface);
1636     case FIB_PATH_TYPE_RECEIVE:
1637         return (path->receive.fp_interface);
1638     case FIB_PATH_TYPE_RECURSIVE:
1639         return (fib_entry_get_resolving_interface(path->fp_via_fib));    
1640     case FIB_PATH_TYPE_SPECIAL:
1641     case FIB_PATH_TYPE_DEAG:
1642     case FIB_PATH_TYPE_EXCLUSIVE:
1643         break;
1644     }
1645     return (~0);
1646 }
1647
1648 adj_index_t
1649 fib_path_get_adj (fib_node_index_t path_index)
1650 {
1651     fib_path_t *path;
1652
1653     path = fib_path_get(path_index);
1654
1655     ASSERT(dpo_is_adj(&path->fp_dpo));
1656     if (dpo_is_adj(&path->fp_dpo))
1657     {
1658         return (path->fp_dpo.dpoi_index);
1659     }
1660     return (ADJ_INDEX_INVALID);
1661 }
1662
1663 int
1664 fib_path_get_weight (fib_node_index_t path_index)
1665 {
1666     fib_path_t *path;
1667
1668     path = fib_path_get(path_index);
1669
1670     ASSERT(path);
1671
1672     return (path->fp_weight);
1673 }
1674
1675 /**
1676  * @brief Contribute the path's adjacency to the list passed.
1677  * By calling this function over all paths, recursively, a child
1678  * can construct its full set of forwarding adjacencies, and hence its
1679  * uRPF list.
1680  */
1681 void
1682 fib_path_contribute_urpf (fib_node_index_t path_index,
1683                           index_t urpf)
1684 {
1685     fib_path_t *path;
1686
1687     if (!fib_path_is_resolved(path_index))
1688         return;
1689
1690     path = fib_path_get(path_index);
1691
1692     switch (path->fp_type)
1693     {
1694     case FIB_PATH_TYPE_ATTACHED_NEXT_HOP:
1695         fib_urpf_list_append(urpf, path->attached_next_hop.fp_interface);
1696         break;
1697
1698     case FIB_PATH_TYPE_ATTACHED:
1699         fib_urpf_list_append(urpf, path->attached.fp_interface);
1700         break;
1701
1702     case FIB_PATH_TYPE_RECURSIVE:
1703         fib_entry_contribute_urpf(path->fp_via_fib, urpf);
1704         break;
1705
1706     case FIB_PATH_TYPE_EXCLUSIVE:
1707     case FIB_PATH_TYPE_SPECIAL:
1708         /*
1709          * these path types may link to an adj, if that's what
1710          * the clinet gave
1711          */
1712         if (dpo_is_adj(&path->fp_dpo))
1713         {
1714             ip_adjacency_t *adj;
1715
1716             adj = adj_get(path->fp_dpo.dpoi_index);
1717
1718             fib_urpf_list_append(urpf, adj->rewrite_header.sw_if_index);
1719         }
1720         break;
1721
1722     case FIB_PATH_TYPE_DEAG:
1723     case FIB_PATH_TYPE_RECEIVE:
1724         /*
1725          * these path types don't link to an adj
1726          */
1727         break;
1728     }
1729 }
1730
1731 void
1732 fib_path_contribute_forwarding (fib_node_index_t path_index,
1733                                 fib_forward_chain_type_t fct,
1734                                 dpo_id_t *dpo)
1735 {
1736     fib_path_t *path;
1737
1738     path = fib_path_get(path_index);
1739
1740     ASSERT(path);
1741     ASSERT(FIB_FORW_CHAIN_TYPE_MPLS_EOS != fct);
1742
1743     FIB_PATH_DBG(path, "contribute");
1744
1745     /*
1746      * The DPO stored in the path was created when the path was resolved.
1747      * This then represents the path's 'native' protocol; IP.
1748      * For all others will need to go find something else.
1749      */
1750     if (fib_path_proto_to_chain_type(path->fp_nh_proto) == fct)
1751     {
1752         dpo_copy(dpo, &path->fp_dpo);
1753     }
1754     else
1755     {
1756         switch (path->fp_type)
1757         {
1758         case FIB_PATH_TYPE_ATTACHED_NEXT_HOP:
1759             switch (fct)
1760             {
1761             case FIB_FORW_CHAIN_TYPE_UNICAST_IP4:
1762             case FIB_FORW_CHAIN_TYPE_UNICAST_IP6:
1763             case FIB_FORW_CHAIN_TYPE_MPLS_EOS:
1764             case FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS:
1765             case FIB_FORW_CHAIN_TYPE_ETHERNET:
1766             case FIB_FORW_CHAIN_TYPE_NSH:
1767             {
1768                 adj_index_t ai;
1769
1770                 /*
1771                  * get a appropriate link type adj.
1772                  */
1773                 ai = fib_path_attached_next_hop_get_adj(
1774                          path,
1775                          fib_forw_chain_type_to_link_type(fct));
1776                 dpo_set(dpo, DPO_ADJACENCY,
1777                         fib_forw_chain_type_to_dpo_proto(fct), ai);
1778                 adj_unlock(ai);
1779
1780                 break;
1781             }
1782             case FIB_FORW_CHAIN_TYPE_MCAST_IP4:
1783             case FIB_FORW_CHAIN_TYPE_MCAST_IP6:
1784             break;
1785             }
1786             break;
1787         case FIB_PATH_TYPE_RECURSIVE:
1788             switch (fct)
1789             {
1790             case FIB_FORW_CHAIN_TYPE_MPLS_EOS:
1791             case FIB_FORW_CHAIN_TYPE_UNICAST_IP4:
1792             case FIB_FORW_CHAIN_TYPE_UNICAST_IP6:
1793             case FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS:
1794                 fib_path_recursive_adj_update(path, fct, dpo);
1795                 break;
1796             case FIB_FORW_CHAIN_TYPE_MCAST_IP4:
1797             case FIB_FORW_CHAIN_TYPE_MCAST_IP6:
1798             case FIB_FORW_CHAIN_TYPE_ETHERNET:
1799             case FIB_FORW_CHAIN_TYPE_NSH:
1800                 ASSERT(0);
1801                 break;
1802             }
1803             break;
1804         case FIB_PATH_TYPE_DEAG:
1805             switch (fct)
1806             {
1807             case FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS:
1808                 lookup_dpo_add_or_lock_w_table_id(MPLS_FIB_DEFAULT_TABLE_ID,
1809                                                   DPO_PROTO_MPLS,
1810                                                   LOOKUP_INPUT_DST_ADDR,
1811                                                   LOOKUP_TABLE_FROM_CONFIG,
1812                                                   dpo);
1813                 break;
1814             case FIB_FORW_CHAIN_TYPE_UNICAST_IP4:
1815             case FIB_FORW_CHAIN_TYPE_UNICAST_IP6:
1816             case FIB_FORW_CHAIN_TYPE_MPLS_EOS:
1817                 dpo_copy(dpo, &path->fp_dpo);
1818                 break;
1819             case FIB_FORW_CHAIN_TYPE_MCAST_IP4:
1820             case FIB_FORW_CHAIN_TYPE_MCAST_IP6:
1821             case FIB_FORW_CHAIN_TYPE_ETHERNET:
1822             case FIB_FORW_CHAIN_TYPE_NSH:
1823                 ASSERT(0);
1824                 break;
1825             }
1826             break;
1827         case FIB_PATH_TYPE_EXCLUSIVE:
1828             dpo_copy(dpo, &path->exclusive.fp_ex_dpo);
1829             break;
1830         case FIB_PATH_TYPE_ATTACHED:
1831             switch (fct)
1832             {
1833             case FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS:
1834             case FIB_FORW_CHAIN_TYPE_UNICAST_IP4:
1835             case FIB_FORW_CHAIN_TYPE_UNICAST_IP6:
1836             case FIB_FORW_CHAIN_TYPE_MPLS_EOS:
1837             case FIB_FORW_CHAIN_TYPE_ETHERNET:
1838             case FIB_FORW_CHAIN_TYPE_NSH:
1839                 break;
1840             case FIB_FORW_CHAIN_TYPE_MCAST_IP4:
1841             case FIB_FORW_CHAIN_TYPE_MCAST_IP6:
1842                 {
1843                     adj_index_t ai;
1844
1845                     /*
1846                      * Create the adj needed for sending IP multicast traffic
1847                      */
1848                     ai = adj_mcast_add_or_lock(path->fp_nh_proto,
1849                                                fib_forw_chain_type_to_link_type(fct),
1850                                                path->attached.fp_interface);
1851                     dpo_set(dpo, DPO_ADJACENCY_MCAST,
1852                             fib_forw_chain_type_to_dpo_proto(fct),
1853                             ai);
1854                     adj_unlock(ai);
1855                 }
1856                 break;
1857             }
1858             break;
1859         case FIB_PATH_TYPE_RECEIVE:
1860         case FIB_PATH_TYPE_SPECIAL:
1861             dpo_copy(dpo, &path->fp_dpo);
1862             break;
1863         }
1864     }
1865 }
1866
1867 load_balance_path_t *
1868 fib_path_append_nh_for_multipath_hash (fib_node_index_t path_index,
1869                                        fib_forward_chain_type_t fct,
1870                                        load_balance_path_t *hash_key)
1871 {
1872     load_balance_path_t *mnh;
1873     fib_path_t *path;
1874
1875     path = fib_path_get(path_index);
1876
1877     ASSERT(path);
1878
1879     if (fib_path_is_resolved(path_index))
1880     {
1881         vec_add2(hash_key, mnh, 1);
1882
1883         mnh->path_weight = path->fp_weight;
1884         mnh->path_index = path_index;
1885         fib_path_contribute_forwarding(path_index, fct, &mnh->path_dpo);
1886     }
1887
1888     return (hash_key);
1889 }
1890
1891 int
1892 fib_path_is_recursive (fib_node_index_t path_index)
1893 {
1894     fib_path_t *path;
1895
1896     path = fib_path_get(path_index);
1897
1898     return (FIB_PATH_TYPE_RECURSIVE == path->fp_type);
1899 }
1900
1901 int
1902 fib_path_is_exclusive (fib_node_index_t path_index)
1903 {
1904     fib_path_t *path;
1905
1906     path = fib_path_get(path_index);
1907
1908     return (FIB_PATH_TYPE_EXCLUSIVE == path->fp_type);
1909 }
1910
1911 int
1912 fib_path_is_deag (fib_node_index_t path_index)
1913 {
1914     fib_path_t *path;
1915
1916     path = fib_path_get(path_index);
1917
1918     return (FIB_PATH_TYPE_DEAG == path->fp_type);
1919 }
1920
1921 int
1922 fib_path_is_resolved (fib_node_index_t path_index)
1923 {
1924     fib_path_t *path;
1925
1926     path = fib_path_get(path_index);
1927
1928     return (dpo_id_is_valid(&path->fp_dpo) &&
1929             (path->fp_oper_flags & FIB_PATH_OPER_FLAG_RESOLVED) &&
1930             !fib_path_is_looped(path_index) &&
1931             !fib_path_is_permanent_drop(path));
1932 }
1933
1934 int
1935 fib_path_is_looped (fib_node_index_t path_index)
1936 {
1937     fib_path_t *path;
1938
1939     path = fib_path_get(path_index);
1940
1941     return (path->fp_oper_flags & FIB_PATH_OPER_FLAG_RECURSIVE_LOOP);
1942 }
1943
1944 int
1945 fib_path_encode (fib_node_index_t path_list_index,
1946                  fib_node_index_t path_index,
1947                  void *ctx)
1948 {
1949     fib_route_path_encode_t **api_rpaths = ctx;
1950     fib_route_path_encode_t *api_rpath;
1951     fib_path_t *path;
1952
1953     path = fib_path_get(path_index);
1954     if (!path)
1955       return (0);
1956     vec_add2(*api_rpaths, api_rpath, 1);
1957     api_rpath->rpath.frp_weight = path->fp_weight;
1958     api_rpath->rpath.frp_proto = path->fp_nh_proto;
1959     api_rpath->rpath.frp_sw_if_index = ~0;
1960     api_rpath->dpo = path->exclusive.fp_ex_dpo;
1961     switch (path->fp_type)
1962       {
1963       case FIB_PATH_TYPE_RECEIVE:
1964         api_rpath->rpath.frp_addr = path->receive.fp_addr;
1965         api_rpath->rpath.frp_sw_if_index = path->receive.fp_interface;
1966         break;
1967       case FIB_PATH_TYPE_ATTACHED:
1968         api_rpath->rpath.frp_sw_if_index = path->attached.fp_interface;
1969         break;
1970       case FIB_PATH_TYPE_ATTACHED_NEXT_HOP:
1971         api_rpath->rpath.frp_sw_if_index = path->attached_next_hop.fp_interface;
1972         api_rpath->rpath.frp_addr = path->attached_next_hop.fp_nh;
1973         break;
1974       case FIB_PATH_TYPE_SPECIAL:
1975         break;
1976       case FIB_PATH_TYPE_DEAG:
1977         break;
1978       case FIB_PATH_TYPE_RECURSIVE:
1979         api_rpath->rpath.frp_addr = path->recursive.fp_nh.fp_ip;
1980         break;
1981       default:
1982         break;
1983       }
1984     return (1);
1985 }
1986
1987 fib_protocol_t
1988 fib_path_get_proto (fib_node_index_t path_index)
1989 {
1990     fib_path_t *path;
1991
1992     path = fib_path_get(path_index);
1993
1994     return (path->fp_nh_proto);
1995 }
1996
1997 void
1998 fib_path_module_init (void)
1999 {
2000     fib_node_register_type (FIB_NODE_TYPE_PATH, &fib_path_vft);
2001 }
2002
2003 static clib_error_t *
2004 show_fib_path_command (vlib_main_t * vm,
2005                         unformat_input_t * input,
2006                         vlib_cli_command_t * cmd)
2007 {
2008     fib_node_index_t pi;
2009     fib_path_t *path;
2010
2011     if (unformat (input, "%d", &pi))
2012     {
2013         /*
2014          * show one in detail
2015          */
2016         if (!pool_is_free_index(fib_path_pool, pi))
2017         {
2018             path = fib_path_get(pi);
2019             u8 *s = fib_path_format(pi, NULL);
2020             s = format(s, "children:");
2021             s = fib_node_children_format(path->fp_node.fn_children, s);
2022             vlib_cli_output (vm, "%s", s);
2023             vec_free(s);
2024         }
2025         else
2026         {
2027             vlib_cli_output (vm, "path %d invalid", pi);
2028         }
2029     }
2030     else
2031     {
2032         vlib_cli_output (vm, "FIB Paths");
2033         pool_foreach(path, fib_path_pool,
2034         ({
2035             vlib_cli_output (vm, "%U", format_fib_path, path);
2036         }));
2037     }
2038
2039     return (NULL);
2040 }
2041
2042 VLIB_CLI_COMMAND (show_fib_path, static) = {
2043   .path = "show fib paths",
2044   .function = show_fib_path_command,
2045   .short_help = "show fib paths",
2046 };