Attached hosts
[vpp.git] / src / vnet / fib / fib_path.c
1 /*
2  * Copyright (c) 2016 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15
16 #include <vlib/vlib.h>
17 #include <vnet/vnet.h>
18 #include <vnet/ip/format.h>
19 #include <vnet/ip/ip.h>
20 #include <vnet/dpo/drop_dpo.h>
21 #include <vnet/dpo/receive_dpo.h>
22 #include <vnet/dpo/load_balance_map.h>
23 #include <vnet/dpo/lookup_dpo.h>
24
25 #include <vnet/adj/adj.h>
26 #include <vnet/adj/adj_mcast.h>
27
28 #include <vnet/fib/fib_path.h>
29 #include <vnet/fib/fib_node.h>
30 #include <vnet/fib/fib_table.h>
31 #include <vnet/fib/fib_entry.h>
32 #include <vnet/fib/fib_path_list.h>
33 #include <vnet/fib/fib_internal.h>
34 #include <vnet/fib/fib_urpf_list.h>
35
36 /**
37  * Enurmeration of path types
38  */
39 typedef enum fib_path_type_t_ {
40     /**
41      * Marker. Add new types after this one.
42      */
43     FIB_PATH_TYPE_FIRST = 0,
44     /**
45      * Attached-nexthop. An interface and a nexthop are known.
46      */
47     FIB_PATH_TYPE_ATTACHED_NEXT_HOP = FIB_PATH_TYPE_FIRST,
48     /**
49      * attached. Only the interface is known.
50      */
51     FIB_PATH_TYPE_ATTACHED,
52     /**
53      * recursive. Only the next-hop is known.
54      */
55     FIB_PATH_TYPE_RECURSIVE,
56     /**
57      * special. nothing is known. so we drop.
58      */
59     FIB_PATH_TYPE_SPECIAL,
60     /**
61      * exclusive. user provided adj.
62      */
63     FIB_PATH_TYPE_EXCLUSIVE,
64     /**
65      * deag. Link to a lookup adj in the next table
66      */
67     FIB_PATH_TYPE_DEAG,
68     /**
69      * receive. it's for-us.
70      */
71     FIB_PATH_TYPE_RECEIVE,
72     /**
73      * Marker. Add new types before this one, then update it.
74      */
75     FIB_PATH_TYPE_LAST = FIB_PATH_TYPE_RECEIVE,
76 } __attribute__ ((packed)) fib_path_type_t;
77
78 /**
79  * The maximum number of path_types
80  */
81 #define FIB_PATH_TYPE_MAX (FIB_PATH_TYPE_LAST + 1)
82
83 #define FIB_PATH_TYPES {                                        \
84     [FIB_PATH_TYPE_ATTACHED_NEXT_HOP] = "attached-nexthop",     \
85     [FIB_PATH_TYPE_ATTACHED]          = "attached",             \
86     [FIB_PATH_TYPE_RECURSIVE]         = "recursive",            \
87     [FIB_PATH_TYPE_SPECIAL]           = "special",              \
88     [FIB_PATH_TYPE_EXCLUSIVE]         = "exclusive",            \
89     [FIB_PATH_TYPE_DEAG]              = "deag",                 \
90     [FIB_PATH_TYPE_RECEIVE]           = "receive",              \
91 }
92
93 #define FOR_EACH_FIB_PATH_TYPE(_item) \
94     for (_item = FIB_PATH_TYPE_FIRST; _item <= FIB_PATH_TYPE_LAST; _item++)
95
96 /**
97  * Enurmeration of path operational (i.e. derived) attributes
98  */
99 typedef enum fib_path_oper_attribute_t_ {
100     /**
101      * Marker. Add new types after this one.
102      */
103     FIB_PATH_OPER_ATTRIBUTE_FIRST = 0,
104     /**
105      * The path forms part of a recursive loop.
106      */
107     FIB_PATH_OPER_ATTRIBUTE_RECURSIVE_LOOP = FIB_PATH_OPER_ATTRIBUTE_FIRST,
108     /**
109      * The path is resolved
110      */
111     FIB_PATH_OPER_ATTRIBUTE_RESOLVED,
112     /**
113      * The path is attached, despite what the next-hop may say.
114      */
115     FIB_PATH_OPER_ATTRIBUTE_ATTACHED,
116     /**
117      * The path has become a permanent drop.
118      */
119     FIB_PATH_OPER_ATTRIBUTE_DROP,
120     /**
121      * Marker. Add new types before this one, then update it.
122      */
123     FIB_PATH_OPER_ATTRIBUTE_LAST = FIB_PATH_OPER_ATTRIBUTE_DROP,
124 } __attribute__ ((packed)) fib_path_oper_attribute_t;
125
126 /**
127  * The maximum number of path operational attributes
128  */
129 #define FIB_PATH_OPER_ATTRIBUTE_MAX (FIB_PATH_OPER_ATTRIBUTE_LAST + 1)
130
131 #define FIB_PATH_OPER_ATTRIBUTES {                                      \
132     [FIB_PATH_OPER_ATTRIBUTE_RECURSIVE_LOOP] = "recursive-loop",        \
133     [FIB_PATH_OPER_ATTRIBUTE_RESOLVED]       = "resolved",              \
134     [FIB_PATH_OPER_ATTRIBUTE_DROP]           = "drop",                  \
135 }
136
137 #define FOR_EACH_FIB_PATH_OPER_ATTRIBUTE(_item) \
138     for (_item = FIB_PATH_OPER_ATTRIBUTE_FIRST; \
139          _item <= FIB_PATH_OPER_ATTRIBUTE_LAST; \
140          _item++)
141
142 /**
143  * Path flags from the attributes
144  */
145 typedef enum fib_path_oper_flags_t_ {
146     FIB_PATH_OPER_FLAG_NONE = 0,
147     FIB_PATH_OPER_FLAG_RECURSIVE_LOOP = (1 << FIB_PATH_OPER_ATTRIBUTE_RECURSIVE_LOOP),
148     FIB_PATH_OPER_FLAG_DROP = (1 << FIB_PATH_OPER_ATTRIBUTE_DROP),
149     FIB_PATH_OPER_FLAG_RESOLVED = (1 << FIB_PATH_OPER_ATTRIBUTE_RESOLVED),
150     FIB_PATH_OPER_FLAG_ATTACHED = (1 << FIB_PATH_OPER_ATTRIBUTE_ATTACHED),
151 } __attribute__ ((packed)) fib_path_oper_flags_t;
152
153 /**
154  * A FIB path
155  */
156 typedef struct fib_path_t_ {
157     /**
158      * A path is a node in the FIB graph.
159      */
160     fib_node_t fp_node;
161
162     /**
163      * The index of the path-list to which this path belongs
164      */
165     u32 fp_pl_index;
166
167     /**
168      * This marks the start of the memory area used to hash
169      * the path
170      */
171     STRUCT_MARK(path_hash_start);
172
173     /**
174      * Configuration Flags
175      */
176     fib_path_cfg_flags_t fp_cfg_flags;
177
178     /**
179      * The type of the path. This is the selector for the union
180      */
181     fib_path_type_t fp_type;
182
183     /**
184      * The protocol of the next-hop, i.e. the address family of the
185      * next-hop's address. We can't derive this from the address itself
186      * since the address can be all zeros
187      */
188     fib_protocol_t fp_nh_proto;
189
190     /**
191      * UCMP [unnormalised] weigt
192      */
193     u32 fp_weight;
194
195     /**
196      * per-type union of the data required to resolve the path
197      */
198     union {
199         struct {
200             /**
201              * The next-hop
202              */
203             ip46_address_t fp_nh;
204             /**
205              * The interface
206              */
207             u32 fp_interface;
208         } attached_next_hop;
209         struct {
210             /**
211              * The interface
212              */
213             u32 fp_interface;
214         } attached;
215         struct {
216             union
217             {
218                 /**
219                  * The next-hop
220                  */
221                 ip46_address_t fp_ip;
222                 /**
223                  * The local label to resolve through.
224                  */
225                 mpls_label_t fp_local_label;
226             } fp_nh;
227             /**
228              * The FIB table index in which to find the next-hop.
229              * This needs to be fixed. We should lookup the adjacencies in
230              * a separate table of adjacencies, rather than from the FIB.
231              * Two reasons I can think of:
232              *   - consider:
233              *       int ip addr Gig0 10.0.0.1/24
234              *       ip route 10.0.0.2/32 via Gig1 192.168.1.2
235              *       ip route 1.1.1.1/32 via Gig0 10.0.0.2
236              *     this is perfectly valid.
237              *     Packets addressed to 10.0.0.2 should be sent via Gig1.
238              *     Packets address to 1.1.1.1 should be sent via Gig0.
239              *    when we perform the adj resolution from the FIB for the path
240              *    "via Gig0 10.0.0.2" the lookup will result in the route via Gig1
241              *    and so we will pick up the adj via Gig1 - which was not what the
242              *    operator wanted.
243              *  - we can only return link-type IPv4 and so not the link-type MPLS.
244              *    more on this in a later commit.
245              *
246              * The table ID should only belong to a recursive path and indicate
247              * which FIB should be used to resolve the next-hop.
248              */
249             fib_node_index_t fp_tbl_id;
250         } recursive;
251         struct {
252             /**
253              * The FIB index in which to perfom the next lookup
254              */
255             fib_node_index_t fp_tbl_id;
256         } deag;
257         struct {
258         } special;
259         struct {
260             /**
261              * The user provided 'exclusive' DPO
262              */
263             dpo_id_t fp_ex_dpo;
264         } exclusive;
265         struct {
266             /**
267              * The interface on which the local address is configured
268              */
269             u32 fp_interface;
270             /**
271              * The next-hop
272              */
273             ip46_address_t fp_addr;
274         } receive;
275     };
276     STRUCT_MARK(path_hash_end);
277
278     /**
279      * Memebers in this last section represent information that is
280      * dervied during resolution. It should not be copied to new paths
281      * nor compared.
282      */
283
284     /**
285      * Operational Flags
286      */
287     fib_path_oper_flags_t fp_oper_flags;
288
289     /**
290      * the resolving via fib. not part of the union, since it it not part
291      * of the path's hash.
292      */
293     fib_node_index_t fp_via_fib;
294
295     /**
296      * The Data-path objects through which this path resolves for IP.
297      */
298     dpo_id_t fp_dpo;
299
300     /**
301      * the index of this path in the parent's child list.
302      */
303     u32 fp_sibling;
304 } fib_path_t;
305
306 /*
307  * Array of strings/names for the path types and attributes
308  */
309 static const char *fib_path_type_names[] = FIB_PATH_TYPES;
310 static const char *fib_path_oper_attribute_names[] = FIB_PATH_OPER_ATTRIBUTES;
311 static const char *fib_path_cfg_attribute_names[]  = FIB_PATH_CFG_ATTRIBUTES;
312
313 /*
314  * The memory pool from which we allocate all the paths
315  */
316 static fib_path_t *fib_path_pool;
317
318 /*
319  * Debug macro
320  */
321 #ifdef FIB_DEBUG
322 #define FIB_PATH_DBG(_p, _fmt, _args...)                        \
323 {                                                               \
324     u8 *_tmp = NULL;                                            \
325     _tmp = fib_path_format(fib_path_get_index(_p), _tmp);       \
326     clib_warning("path:[%d:%s]:" _fmt,                          \
327                  fib_path_get_index(_p), _tmp,                  \
328                  ##_args);                                      \
329     vec_free(_tmp);                                             \
330 }
331 #else
332 #define FIB_PATH_DBG(_p, _fmt, _args...)
333 #endif
334
335 static fib_path_t *
336 fib_path_get (fib_node_index_t index)
337 {
338     return (pool_elt_at_index(fib_path_pool, index));
339 }
340
341 static fib_node_index_t 
342 fib_path_get_index (fib_path_t *path)
343 {
344     return (path - fib_path_pool);
345 }
346
347 static fib_node_t *
348 fib_path_get_node (fib_node_index_t index)
349 {
350     return ((fib_node_t*)fib_path_get(index));
351 }
352
353 static fib_path_t*
354 fib_path_from_fib_node (fib_node_t *node)
355 {
356 #if CLIB_DEBUG > 0
357     ASSERT(FIB_NODE_TYPE_PATH == node->fn_type);
358 #endif
359     return ((fib_path_t*)node);
360 }
361
362 u8 *
363 format_fib_path (u8 * s, va_list * args)
364 {
365     fib_path_t *path = va_arg (*args, fib_path_t *);
366     vnet_main_t * vnm = vnet_get_main();
367     fib_path_oper_attribute_t oattr;
368     fib_path_cfg_attribute_t cattr;
369
370     s = format (s, "      index:%d ", fib_path_get_index(path));
371     s = format (s, "pl-index:%d ", path->fp_pl_index);
372     s = format (s, "%U ", format_fib_protocol, path->fp_nh_proto);
373     s = format (s, "weight=%d ", path->fp_weight);
374     s = format (s, "%s: ", fib_path_type_names[path->fp_type]);
375     if (FIB_PATH_OPER_FLAG_NONE != path->fp_oper_flags) {
376         s = format(s, " oper-flags:");
377         FOR_EACH_FIB_PATH_OPER_ATTRIBUTE(oattr) {
378             if ((1<<oattr) & path->fp_oper_flags) {
379                 s = format (s, "%s,", fib_path_oper_attribute_names[oattr]);
380             }
381         }
382     }
383     if (FIB_PATH_CFG_FLAG_NONE != path->fp_cfg_flags) {
384         s = format(s, " cfg-flags:");
385         FOR_EACH_FIB_PATH_CFG_ATTRIBUTE(cattr) {
386             if ((1<<cattr) & path->fp_cfg_flags) {
387                 s = format (s, "%s,", fib_path_cfg_attribute_names[cattr]);
388             }
389         }
390     }
391     s = format(s, "\n       ");
392
393     switch (path->fp_type)
394     {
395     case FIB_PATH_TYPE_ATTACHED_NEXT_HOP:
396         s = format (s, "%U", format_ip46_address,
397                     &path->attached_next_hop.fp_nh,
398                     IP46_TYPE_ANY);
399         if (path->fp_oper_flags & FIB_PATH_OPER_FLAG_DROP)
400         {
401             s = format (s, " if_index:%d", path->attached_next_hop.fp_interface);
402         }
403         else
404         {
405             s = format (s, " %U",
406                         format_vnet_sw_interface_name,
407                         vnm,
408                         vnet_get_sw_interface(
409                             vnm,
410                             path->attached_next_hop.fp_interface));
411             if (vnet_sw_interface_is_p2p(vnet_get_main(),
412                                          path->attached_next_hop.fp_interface))
413             {
414                 s = format (s, " (p2p)");
415             }
416         }
417         if (!dpo_id_is_valid(&path->fp_dpo))
418         {
419             s = format(s, "\n          unresolved");
420         }
421         else
422         {
423             s = format(s, "\n          %U",
424                        format_dpo_id,
425                        &path->fp_dpo, 13);
426         }
427         break;
428     case FIB_PATH_TYPE_ATTACHED:
429         if (path->fp_oper_flags & FIB_PATH_OPER_FLAG_DROP)
430         {
431             s = format (s, " if_index:%d", path->attached_next_hop.fp_interface);
432         }
433         else
434         {
435             s = format (s, " %U",
436                         format_vnet_sw_interface_name,
437                         vnm,
438                         vnet_get_sw_interface(
439                             vnm,
440                             path->attached.fp_interface));
441         }
442         break;
443     case FIB_PATH_TYPE_RECURSIVE:
444         if (FIB_PROTOCOL_MPLS == path->fp_nh_proto)
445         {
446             s = format (s, "via %U",
447                         format_mpls_unicast_label,
448                         path->recursive.fp_nh.fp_local_label);
449         }
450         else
451         {
452             s = format (s, "via %U",
453                         format_ip46_address,
454                         &path->recursive.fp_nh.fp_ip,
455                         IP46_TYPE_ANY);
456         }
457         s = format (s, " in fib:%d",
458                     path->recursive.fp_tbl_id,
459                     path->fp_via_fib); 
460         s = format (s, " via-fib:%d", path->fp_via_fib); 
461         s = format (s, " via-dpo:[%U:%d]",
462                     format_dpo_type, path->fp_dpo.dpoi_type, 
463                     path->fp_dpo.dpoi_index);
464
465         break;
466     case FIB_PATH_TYPE_RECEIVE:
467     case FIB_PATH_TYPE_SPECIAL:
468     case FIB_PATH_TYPE_DEAG:
469     case FIB_PATH_TYPE_EXCLUSIVE:
470         if (dpo_id_is_valid(&path->fp_dpo))
471         {
472             s = format(s, "%U", format_dpo_id,
473                        &path->fp_dpo, 2);
474         }
475         break;
476     }
477     return (s);
478 }
479
480 u8 *
481 fib_path_format (fib_node_index_t pi, u8 *s)
482 {
483     fib_path_t *path;
484
485     path = fib_path_get(pi);
486     ASSERT(NULL != path);
487
488     return (format (s, "%U", format_fib_path, path));
489 }
490
491 u8 *
492 fib_path_adj_format (fib_node_index_t pi,
493                      u32 indent,
494                      u8 *s)
495 {
496     fib_path_t *path;
497
498     path = fib_path_get(pi);
499     ASSERT(NULL != path);
500
501     if (!dpo_id_is_valid(&path->fp_dpo))
502     {
503         s = format(s, " unresolved");
504     }
505     else
506     {
507         s = format(s, "%U", format_dpo_id,
508                    &path->fp_dpo, 2);
509     }
510
511     return (s);
512 }
513
514 /*
515  * fib_path_last_lock_gone
516  *
517  * We don't share paths, we share path lists, so the [un]lock functions
518  * are no-ops
519  */
520 static void
521 fib_path_last_lock_gone (fib_node_t *node)
522 {
523     ASSERT(0);
524 }
525
526 static const adj_index_t
527 fib_path_attached_next_hop_get_adj (fib_path_t *path,
528                                     vnet_link_t link)
529 {
530     if (vnet_sw_interface_is_p2p(vnet_get_main(),
531                                  path->attached_next_hop.fp_interface))
532     {
533         /*
534          * if the interface is p2p then the adj for the specific
535          * neighbour on that link will never exist. on p2p links
536          * the subnet address (the attached route) links to the
537          * auto-adj (see below), we want that adj here too.
538          */
539         return (adj_nbr_add_or_lock(path->fp_nh_proto,
540                                     link,
541                                     &zero_addr,
542                                     path->attached_next_hop.fp_interface));
543     }
544     else
545     {
546         return (adj_nbr_add_or_lock(path->fp_nh_proto,
547                                     link,
548                                     &path->attached_next_hop.fp_nh,
549                                     path->attached_next_hop.fp_interface));
550     }
551 }
552
553 static void
554 fib_path_attached_next_hop_set (fib_path_t *path)
555 {
556     /*
557      * resolve directly via the adjacnecy discribed by the
558      * interface and next-hop
559      */
560     if (!vnet_sw_interface_is_admin_up(vnet_get_main(),
561                                       path->attached_next_hop.fp_interface))
562     {
563         path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RESOLVED;
564     }
565
566     dpo_set(&path->fp_dpo,
567             DPO_ADJACENCY,
568             fib_proto_to_dpo(path->fp_nh_proto),
569             fib_path_attached_next_hop_get_adj(
570                  path,
571                  fib_proto_to_link(path->fp_nh_proto)));
572
573     /*
574      * become a child of the adjacency so we receive updates
575      * when its rewrite changes
576      */
577     path->fp_sibling = adj_child_add(path->fp_dpo.dpoi_index,
578                                      FIB_NODE_TYPE_PATH,
579                                      fib_path_get_index(path));
580 }
581
582 /*
583  * create of update the paths recursive adj
584  */
585 static void
586 fib_path_recursive_adj_update (fib_path_t *path,
587                                fib_forward_chain_type_t fct,
588                                dpo_id_t *dpo)
589 {
590     dpo_id_t via_dpo = DPO_INVALID;
591
592     /*
593      * get the DPO to resolve through from the via-entry
594      */
595     fib_entry_contribute_forwarding(path->fp_via_fib,
596                                     fct,
597                                     &via_dpo);
598
599
600     /*
601      * hope for the best - clear if restrictions apply.
602      */
603     path->fp_oper_flags |= FIB_PATH_OPER_FLAG_RESOLVED;
604
605     /*
606      * Validate any recursion constraints and over-ride the via
607      * adj if not met
608      */
609     if (path->fp_oper_flags & FIB_PATH_OPER_FLAG_RECURSIVE_LOOP)
610     {
611         path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RESOLVED;
612         dpo_copy(&via_dpo, drop_dpo_get(fib_proto_to_dpo(path->fp_nh_proto)));
613     }
614     else if (path->fp_cfg_flags & FIB_PATH_CFG_FLAG_RESOLVE_HOST)
615     {
616         /*
617          * the via FIB must be a host route.
618          * note the via FIB just added will always be a host route
619          * since it is an RR source added host route. So what we need to
620          * check is whether the route has other sources. If it does then
621          * some other source has added it as a host route. If it doesn't
622          * then it was added only here and inherits forwarding from a cover.
623          * the cover is not a host route.
624          * The RR source is the lowest priority source, so we check if it
625          * is the best. if it is there are no other sources.
626          */
627         if (fib_entry_get_best_source(path->fp_via_fib) >= FIB_SOURCE_RR)
628         {
629             path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RESOLVED;
630             dpo_copy(&via_dpo, drop_dpo_get(fib_proto_to_dpo(path->fp_nh_proto)));
631
632             /*
633              * PIC edge trigger. let the load-balance maps know
634              */
635             load_balance_map_path_state_change(fib_path_get_index(path));
636         }
637     }
638     else if (path->fp_cfg_flags & FIB_PATH_CFG_FLAG_RESOLVE_ATTACHED)
639     {
640         /*
641          * RR source entries inherit the flags from the cover, so
642          * we can check the via directly
643          */
644         if (!(FIB_ENTRY_FLAG_ATTACHED & fib_entry_get_flags(path->fp_via_fib)))
645         {
646             path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RESOLVED;
647             dpo_copy(&via_dpo, drop_dpo_get(fib_proto_to_dpo(path->fp_nh_proto)));
648
649             /*
650              * PIC edge trigger. let the load-balance maps know
651              */
652             load_balance_map_path_state_change(fib_path_get_index(path));
653         }
654     }
655
656     /*
657      * update the path's contributed DPO
658      */
659     dpo_copy(dpo, &via_dpo);
660
661     FIB_PATH_DBG(path, "recursive update: %U",
662                  fib_get_lookup_main(path->fp_nh_proto),
663                  &path->fp_dpo, 2);
664
665     dpo_reset(&via_dpo);
666 }
667
668 /*
669  * fib_path_is_permanent_drop
670  *
671  * Return !0 if the path is configured to permanently drop,
672  * despite other attributes.
673  */
674 static int
675 fib_path_is_permanent_drop (fib_path_t *path)
676 {
677     return ((path->fp_cfg_flags & FIB_PATH_CFG_FLAG_DROP) ||
678             (path->fp_oper_flags & FIB_PATH_OPER_FLAG_DROP));
679 }
680
681 /*
682  * fib_path_unresolve
683  *
684  * Remove our dependency on the resolution target
685  */
686 static void
687 fib_path_unresolve (fib_path_t *path)
688 {
689     /*
690      * the forced drop path does not need unresolving
691      */
692     if (fib_path_is_permanent_drop(path))
693     {
694         return;
695     }
696
697     switch (path->fp_type)
698     {
699     case FIB_PATH_TYPE_RECURSIVE:
700         if (FIB_NODE_INDEX_INVALID != path->fp_via_fib)
701         {
702             fib_prefix_t pfx;
703
704             fib_entry_get_prefix(path->fp_via_fib, &pfx);
705             fib_entry_child_remove(path->fp_via_fib,
706                                    path->fp_sibling);
707             fib_table_entry_special_remove(path->recursive.fp_tbl_id,
708                                            &pfx,
709                                            FIB_SOURCE_RR);
710             path->fp_via_fib = FIB_NODE_INDEX_INVALID;
711         }
712         break;
713     case FIB_PATH_TYPE_ATTACHED_NEXT_HOP:
714     case FIB_PATH_TYPE_ATTACHED:
715         adj_child_remove(path->fp_dpo.dpoi_index,
716                          path->fp_sibling);
717         adj_unlock(path->fp_dpo.dpoi_index);
718         break;
719     case FIB_PATH_TYPE_EXCLUSIVE:
720         dpo_reset(&path->exclusive.fp_ex_dpo);
721         break;
722     case FIB_PATH_TYPE_SPECIAL:
723     case FIB_PATH_TYPE_RECEIVE:
724     case FIB_PATH_TYPE_DEAG:
725         /*
726          * these hold only the path's DPO, which is reset below.
727          */
728         break;
729     }
730
731     /*
732      * release the adj we were holding and pick up the
733      * drop just in case.
734      */
735     dpo_reset(&path->fp_dpo);
736     path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RESOLVED;
737
738     return;
739 }
740
741 static fib_forward_chain_type_t
742 fib_path_proto_to_chain_type (fib_protocol_t proto)
743 {
744     switch (proto)
745     {
746     case FIB_PROTOCOL_IP4:
747         return (FIB_FORW_CHAIN_TYPE_UNICAST_IP4);
748     case FIB_PROTOCOL_IP6:
749         return (FIB_FORW_CHAIN_TYPE_UNICAST_IP6);
750     case FIB_PROTOCOL_MPLS:
751         return (FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS);
752     }
753     return (FIB_FORW_CHAIN_TYPE_UNICAST_IP4);
754 }
755
756 /*
757  * fib_path_back_walk_notify
758  *
759  * A back walk has reach this path.
760  */
761 static fib_node_back_walk_rc_t
762 fib_path_back_walk_notify (fib_node_t *node,
763                            fib_node_back_walk_ctx_t *ctx)
764 {
765     fib_path_t *path;
766
767     path = fib_path_from_fib_node(node);
768
769     switch (path->fp_type)
770     {
771     case FIB_PATH_TYPE_RECURSIVE:
772         if (FIB_NODE_BW_REASON_FLAG_EVALUATE & ctx->fnbw_reason)
773         {
774             /*
775              * modify the recursive adjacency to use the new forwarding
776              * of the via-fib.
777              * this update is visible to packets in flight in the DP.
778              */
779             fib_path_recursive_adj_update(
780                 path,
781                 fib_path_proto_to_chain_type(path->fp_nh_proto),
782                 &path->fp_dpo);
783         }
784         if ((FIB_NODE_BW_REASON_FLAG_ADJ_UPDATE & ctx->fnbw_reason) ||
785             (FIB_NODE_BW_REASON_FLAG_ADJ_DOWN   & ctx->fnbw_reason))
786         {
787             /*
788              * ADJ updates (complete<->incomplete) do not need to propagate to
789              * recursive entries.
790              * The only reason its needed as far back as here, is that the adj
791              * and the incomplete adj are a different DPO type, so the LBs need
792              * to re-stack.
793              * If this walk was quashed in the fib_entry, then any non-fib_path
794              * children (like tunnels that collapse out the LB when they stack)
795              * would not see the update.
796              */
797             return (FIB_NODE_BACK_WALK_CONTINUE);
798         }
799         break;
800     case FIB_PATH_TYPE_ATTACHED_NEXT_HOP:
801         /*
802 FIXME comment
803          * ADJ_UPDATE backwalk pass silently through here and up to
804          * the path-list when the multipath adj collapse occurs.
805          * The reason we do this is that the assumtption is that VPP
806          * runs in an environment where the Control-Plane is remote
807          * and hence reacts slowly to link up down. In order to remove
808          * this down link from the ECMP set quickly, we back-walk.
809          * VPP also has dedicated CPUs, so we are not stealing resources
810          * from the CP to do so.
811          */
812         if (FIB_NODE_BW_REASON_FLAG_INTERFACE_UP & ctx->fnbw_reason)
813         {
814             if (path->fp_oper_flags & FIB_PATH_OPER_FLAG_RESOLVED)
815             {
816                 /*
817                  * alreday resolved. no need to walk back again
818                  */
819                 return (FIB_NODE_BACK_WALK_CONTINUE);
820             }
821             path->fp_oper_flags |= FIB_PATH_OPER_FLAG_RESOLVED;
822         }
823         if (FIB_NODE_BW_REASON_FLAG_INTERFACE_DOWN & ctx->fnbw_reason)
824         {
825             if (!(path->fp_oper_flags & FIB_PATH_OPER_FLAG_RESOLVED))
826             {
827                 /*
828                  * alreday unresolved. no need to walk back again
829                  */
830                 return (FIB_NODE_BACK_WALK_CONTINUE);
831             }
832             path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RESOLVED;
833         }
834         if (FIB_NODE_BW_REASON_FLAG_INTERFACE_DELETE & ctx->fnbw_reason)
835         {
836             /*
837              * The interface this path resolves through has been deleted.
838              * This will leave the path in a permanent drop state. The route
839              * needs to be removed and readded (and hence the path-list deleted)
840              * before it can forward again.
841              */
842             fib_path_unresolve(path);
843             path->fp_oper_flags |= FIB_PATH_OPER_FLAG_DROP;
844         }
845         if (FIB_NODE_BW_REASON_FLAG_ADJ_UPDATE & ctx->fnbw_reason)
846         {
847             /*
848              * restack the DPO to pick up the correct DPO sub-type
849              */
850             uword if_is_up;
851             adj_index_t ai;
852
853             if_is_up = vnet_sw_interface_is_admin_up(
854                            vnet_get_main(),
855                            path->attached_next_hop.fp_interface);
856
857             if (if_is_up)
858             {
859                 path->fp_oper_flags |= FIB_PATH_OPER_FLAG_RESOLVED;
860             }
861
862             ai = fib_path_attached_next_hop_get_adj(
863                      path,
864                      fib_proto_to_link(path->fp_nh_proto));
865
866             dpo_set(&path->fp_dpo, DPO_ADJACENCY,
867                     fib_proto_to_dpo(path->fp_nh_proto),
868                     ai);
869             adj_unlock(ai);
870
871             if (!if_is_up)
872             {
873                 /*
874                  * If the interface is not up there is no reason to walk
875                  * back to children. if we did they would only evalute
876                  * that this path is unresolved and hence it would
877                  * not contribute the adjacency - so it would be wasted
878                  * CPU time.
879                  */
880                 return (FIB_NODE_BACK_WALK_CONTINUE);
881             }
882         }
883         if (FIB_NODE_BW_REASON_FLAG_ADJ_DOWN & ctx->fnbw_reason)
884         {
885             if (!(path->fp_oper_flags & FIB_PATH_OPER_FLAG_RESOLVED))
886             {
887                 /*
888                  * alreday unresolved. no need to walk back again
889                  */
890                 return (FIB_NODE_BACK_WALK_CONTINUE);
891             }
892             /*
893              * the adj has gone down. the path is no longer resolved.
894              */
895             path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RESOLVED;
896         }
897         break;
898     case FIB_PATH_TYPE_ATTACHED:
899         /*
900          * FIXME; this could schedule a lower priority walk, since attached
901          * routes are not usually in ECMP configurations so the backwalk to
902          * the FIB entry does not need to be high priority
903          */
904         if (FIB_NODE_BW_REASON_FLAG_INTERFACE_UP & ctx->fnbw_reason)
905         {
906             path->fp_oper_flags |= FIB_PATH_OPER_FLAG_RESOLVED;
907         }
908         if (FIB_NODE_BW_REASON_FLAG_INTERFACE_DOWN & ctx->fnbw_reason)
909         {
910             path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RESOLVED;
911         }
912         if (FIB_NODE_BW_REASON_FLAG_INTERFACE_DELETE & ctx->fnbw_reason)
913         {
914             fib_path_unresolve(path);
915             path->fp_oper_flags |= FIB_PATH_OPER_FLAG_DROP;
916         }
917         break;
918     case FIB_PATH_TYPE_DEAG:
919         /*
920          * FIXME When VRF delete is allowed this will need a poke.
921          */
922     case FIB_PATH_TYPE_SPECIAL:
923     case FIB_PATH_TYPE_RECEIVE:
924     case FIB_PATH_TYPE_EXCLUSIVE:
925         /*
926          * these path types have no parents. so to be
927          * walked from one is unexpected.
928          */
929         ASSERT(0);
930         break;
931     }
932
933     /*
934      * propagate the backwalk further to the path-list
935      */
936     fib_path_list_back_walk(path->fp_pl_index, ctx);
937
938     return (FIB_NODE_BACK_WALK_CONTINUE);
939 }
940
941 static void
942 fib_path_memory_show (void)
943 {
944     fib_show_memory_usage("Path",
945                           pool_elts(fib_path_pool),
946                           pool_len(fib_path_pool),
947                           sizeof(fib_path_t));
948 }
949
950 /*
951  * The FIB path's graph node virtual function table
952  */
953 static const fib_node_vft_t fib_path_vft = {
954     .fnv_get = fib_path_get_node,
955     .fnv_last_lock = fib_path_last_lock_gone,
956     .fnv_back_walk = fib_path_back_walk_notify,
957     .fnv_mem_show = fib_path_memory_show,
958 };
959
960 static fib_path_cfg_flags_t
961 fib_path_route_flags_to_cfg_flags (const fib_route_path_t *rpath)
962 {
963     fib_path_cfg_flags_t cfg_flags = FIB_PATH_CFG_FLAG_NONE;
964
965     if (rpath->frp_flags & FIB_ROUTE_PATH_RESOLVE_VIA_HOST)
966         cfg_flags |= FIB_PATH_CFG_FLAG_RESOLVE_HOST;
967     if (rpath->frp_flags & FIB_ROUTE_PATH_RESOLVE_VIA_ATTACHED)
968         cfg_flags |= FIB_PATH_CFG_FLAG_RESOLVE_ATTACHED;
969     if (rpath->frp_flags & FIB_ROUTE_PATH_LOCAL)
970         cfg_flags |= FIB_PATH_CFG_FLAG_LOCAL;
971     if (rpath->frp_flags & FIB_ROUTE_PATH_ATTACHED)
972         cfg_flags |= FIB_PATH_CFG_FLAG_ATTACHED;
973
974     return (cfg_flags);
975 }
976
977 /*
978  * fib_path_create
979  *
980  * Create and initialise a new path object.
981  * return the index of the path.
982  */
983 fib_node_index_t
984 fib_path_create (fib_node_index_t pl_index,
985                  fib_protocol_t nh_proto,
986                  fib_path_cfg_flags_t flags,
987                  const fib_route_path_t *rpath)
988 {
989     fib_path_t *path;
990
991     pool_get(fib_path_pool, path);
992     memset(path, 0, sizeof(*path));
993
994     fib_node_init(&path->fp_node,
995                   FIB_NODE_TYPE_PATH);
996
997     dpo_reset(&path->fp_dpo);
998     path->fp_pl_index = pl_index;
999     path->fp_nh_proto = nh_proto;
1000     path->fp_via_fib = FIB_NODE_INDEX_INVALID;
1001     path->fp_weight = rpath->frp_weight;
1002     if (0 == path->fp_weight)
1003     {
1004         /*
1005          * a weight of 0 is a meaningless value. We could either reject it, and thus force
1006          * clients to always use 1, or we can accept it and fixup approrpiately.
1007          */
1008         path->fp_weight = 1;
1009     }
1010     path->fp_cfg_flags = flags;
1011     path->fp_cfg_flags |= fib_path_route_flags_to_cfg_flags(rpath);
1012
1013     /*
1014      * deduce the path's tpye from the parementers and save what is needed.
1015      */
1016     if (path->fp_cfg_flags & FIB_PATH_CFG_FLAG_LOCAL)
1017     {
1018         path->fp_type = FIB_PATH_TYPE_RECEIVE;
1019         path->receive.fp_interface = rpath->frp_sw_if_index;
1020         path->receive.fp_addr = rpath->frp_addr;
1021     }
1022     else if (~0 != rpath->frp_sw_if_index)
1023     {
1024         if (ip46_address_is_zero(&rpath->frp_addr))
1025         {
1026             path->fp_type = FIB_PATH_TYPE_ATTACHED;
1027             path->attached.fp_interface = rpath->frp_sw_if_index;
1028         }
1029         else
1030         {
1031             path->fp_type = FIB_PATH_TYPE_ATTACHED_NEXT_HOP;
1032             path->attached_next_hop.fp_interface = rpath->frp_sw_if_index;
1033             path->attached_next_hop.fp_nh = rpath->frp_addr;
1034         }
1035     }
1036     else
1037     {
1038         if (ip46_address_is_zero(&rpath->frp_addr))
1039         {
1040             if (~0 == rpath->frp_fib_index)
1041             {
1042                 path->fp_type = FIB_PATH_TYPE_SPECIAL;
1043             }
1044             else
1045             {
1046                 path->fp_type = FIB_PATH_TYPE_DEAG;
1047                 path->deag.fp_tbl_id = rpath->frp_fib_index;
1048             }           
1049         }
1050         else
1051         {
1052             path->fp_type = FIB_PATH_TYPE_RECURSIVE;
1053             if (FIB_PROTOCOL_MPLS == path->fp_nh_proto)
1054             {
1055                 path->recursive.fp_nh.fp_local_label = rpath->frp_local_label;
1056             }
1057             else
1058             {
1059                 path->recursive.fp_nh.fp_ip = rpath->frp_addr;
1060             }
1061             path->recursive.fp_tbl_id = rpath->frp_fib_index;
1062         }
1063     }
1064
1065     FIB_PATH_DBG(path, "create");
1066
1067     return (fib_path_get_index(path));
1068 }
1069
1070 /*
1071  * fib_path_create_special
1072  *
1073  * Create and initialise a new path object.
1074  * return the index of the path.
1075  */
1076 fib_node_index_t
1077 fib_path_create_special (fib_node_index_t pl_index,
1078                          fib_protocol_t nh_proto,
1079                          fib_path_cfg_flags_t flags,
1080                          const dpo_id_t *dpo)
1081 {
1082     fib_path_t *path;
1083
1084     pool_get(fib_path_pool, path);
1085     memset(path, 0, sizeof(*path));
1086
1087     fib_node_init(&path->fp_node,
1088                   FIB_NODE_TYPE_PATH);
1089     dpo_reset(&path->fp_dpo);
1090
1091     path->fp_pl_index = pl_index;
1092     path->fp_weight = 1;
1093     path->fp_nh_proto = nh_proto;
1094     path->fp_via_fib = FIB_NODE_INDEX_INVALID;
1095     path->fp_cfg_flags = flags;
1096
1097     if (FIB_PATH_CFG_FLAG_DROP & flags)
1098     {
1099         path->fp_type = FIB_PATH_TYPE_SPECIAL;
1100     }
1101     else if (FIB_PATH_CFG_FLAG_LOCAL & flags)
1102     {
1103         path->fp_type = FIB_PATH_TYPE_RECEIVE;
1104         path->attached.fp_interface = FIB_NODE_INDEX_INVALID;
1105     }
1106     else
1107     {
1108         path->fp_type = FIB_PATH_TYPE_EXCLUSIVE;
1109         ASSERT(NULL != dpo);
1110         dpo_copy(&path->exclusive.fp_ex_dpo, dpo);
1111     }
1112
1113     return (fib_path_get_index(path));
1114 }
1115
1116 /*
1117  * fib_path_copy
1118  *
1119  * Copy a path. return index of new path.
1120  */
1121 fib_node_index_t
1122 fib_path_copy (fib_node_index_t path_index,
1123                fib_node_index_t path_list_index)
1124 {
1125     fib_path_t *path, *orig_path;
1126
1127     pool_get(fib_path_pool, path);
1128
1129     orig_path = fib_path_get(path_index);
1130     ASSERT(NULL != orig_path);
1131
1132     memcpy(path, orig_path, sizeof(*path));
1133
1134     FIB_PATH_DBG(path, "create-copy:%d", path_index);
1135
1136     /*
1137      * reset the dynamic section
1138      */
1139     fib_node_init(&path->fp_node, FIB_NODE_TYPE_PATH);
1140     path->fp_oper_flags     = FIB_PATH_OPER_FLAG_NONE;
1141     path->fp_pl_index  = path_list_index;
1142     path->fp_via_fib   = FIB_NODE_INDEX_INVALID;
1143     memset(&path->fp_dpo, 0, sizeof(path->fp_dpo));
1144     dpo_reset(&path->fp_dpo);
1145
1146     return (fib_path_get_index(path));
1147 }
1148
1149 /*
1150  * fib_path_destroy
1151  *
1152  * destroy a path that is no longer required
1153  */
1154 void
1155 fib_path_destroy (fib_node_index_t path_index)
1156 {
1157     fib_path_t *path;
1158
1159     path = fib_path_get(path_index);
1160
1161     ASSERT(NULL != path);
1162     FIB_PATH_DBG(path, "destroy");
1163
1164     fib_path_unresolve(path);
1165
1166     fib_node_deinit(&path->fp_node);
1167     pool_put(fib_path_pool, path);
1168 }
1169
1170 /*
1171  * fib_path_destroy
1172  *
1173  * destroy a path that is no longer required
1174  */
1175 uword
1176 fib_path_hash (fib_node_index_t path_index)
1177 {
1178     fib_path_t *path;
1179
1180     path = fib_path_get(path_index);
1181
1182     return (hash_memory(STRUCT_MARK_PTR(path, path_hash_start),
1183                         (STRUCT_OFFSET_OF(fib_path_t, path_hash_end) -
1184                          STRUCT_OFFSET_OF(fib_path_t, path_hash_start)),
1185                         0));
1186 }
1187
1188 /*
1189  * fib_path_cmp_i
1190  *
1191  * Compare two paths for equivalence.
1192  */
1193 static int
1194 fib_path_cmp_i (const fib_path_t *path1,
1195                 const fib_path_t *path2)
1196 {
1197     int res;
1198
1199     res = 1;
1200
1201     /*
1202      * paths of different types and protocol are not equal.
1203      * different weights only are the same path.
1204      */
1205     if (path1->fp_type != path2->fp_type)
1206     {
1207         res = (path1->fp_type - path2->fp_type);
1208     }
1209     else if (path1->fp_nh_proto != path2->fp_nh_proto)
1210     {
1211         res = (path1->fp_nh_proto - path2->fp_nh_proto);
1212     }
1213     else
1214     {
1215         /*
1216          * both paths are of the same type.
1217          * consider each type and its attributes in turn.
1218          */
1219         switch (path1->fp_type)
1220         {
1221         case FIB_PATH_TYPE_ATTACHED_NEXT_HOP:
1222             res = ip46_address_cmp(&path1->attached_next_hop.fp_nh,
1223                                    &path2->attached_next_hop.fp_nh);
1224             if (0 == res) {
1225                 res = vnet_sw_interface_compare(
1226                           vnet_get_main(),
1227                           path1->attached_next_hop.fp_interface,
1228                           path2->attached_next_hop.fp_interface);
1229             }
1230             break;
1231         case FIB_PATH_TYPE_ATTACHED:
1232             res = vnet_sw_interface_compare(
1233                       vnet_get_main(),
1234                       path1->attached.fp_interface,
1235                       path2->attached.fp_interface);
1236             break;
1237         case FIB_PATH_TYPE_RECURSIVE:
1238             res = ip46_address_cmp(&path1->recursive.fp_nh,
1239                                    &path2->recursive.fp_nh);
1240  
1241             if (0 == res)
1242             {
1243                 res = (path1->recursive.fp_tbl_id - path2->recursive.fp_tbl_id);
1244             }
1245             break;
1246         case FIB_PATH_TYPE_DEAG:
1247             res = (path1->deag.fp_tbl_id - path2->deag.fp_tbl_id);
1248             break;
1249         case FIB_PATH_TYPE_SPECIAL:
1250         case FIB_PATH_TYPE_RECEIVE:
1251         case FIB_PATH_TYPE_EXCLUSIVE:
1252             res = 0;
1253             break;
1254         }
1255     }
1256     return (res);
1257 }
1258
1259 /*
1260  * fib_path_cmp_for_sort
1261  *
1262  * Compare two paths for equivalence. Used during path sorting.
1263  * As usual 0 means equal.
1264  */
1265 int
1266 fib_path_cmp_for_sort (void * v1,
1267                        void * v2)
1268 {
1269     fib_node_index_t *pi1 = v1, *pi2 = v2;
1270     fib_path_t *path1, *path2;
1271
1272     path1 = fib_path_get(*pi1);
1273     path2 = fib_path_get(*pi2);
1274
1275     return (fib_path_cmp_i(path1, path2));
1276 }
1277
1278 /*
1279  * fib_path_cmp
1280  *
1281  * Compare two paths for equivalence.
1282  */
1283 int
1284 fib_path_cmp (fib_node_index_t pi1,
1285               fib_node_index_t pi2)
1286 {
1287     fib_path_t *path1, *path2;
1288
1289     path1 = fib_path_get(pi1);
1290     path2 = fib_path_get(pi2);
1291
1292     return (fib_path_cmp_i(path1, path2));
1293 }
1294
1295 int
1296 fib_path_cmp_w_route_path (fib_node_index_t path_index,
1297                            const fib_route_path_t *rpath)
1298 {
1299     fib_path_t *path;
1300     int res;
1301
1302     path = fib_path_get(path_index);
1303
1304     res = 1;
1305
1306     if (path->fp_weight != rpath->frp_weight)
1307     {
1308         res = (path->fp_weight - rpath->frp_weight);
1309     }
1310     else
1311     {
1312         /*
1313          * both paths are of the same type.
1314          * consider each type and its attributes in turn.
1315          */
1316         switch (path->fp_type)
1317         {
1318         case FIB_PATH_TYPE_ATTACHED_NEXT_HOP:
1319             res = ip46_address_cmp(&path->attached_next_hop.fp_nh,
1320                                    &rpath->frp_addr);
1321             if (0 == res)
1322             {
1323                 res = vnet_sw_interface_compare(
1324                           vnet_get_main(),
1325                           path->attached_next_hop.fp_interface,
1326                           rpath->frp_sw_if_index);
1327             }
1328             break;
1329         case FIB_PATH_TYPE_ATTACHED:
1330             res = vnet_sw_interface_compare(
1331                       vnet_get_main(),
1332                       path->attached.fp_interface,
1333                       rpath->frp_sw_if_index);
1334             break;
1335         case FIB_PATH_TYPE_RECURSIVE:
1336             if (FIB_PROTOCOL_MPLS == path->fp_nh_proto)
1337             {
1338                 res = path->recursive.fp_nh.fp_local_label - rpath->frp_local_label;
1339             }
1340             else
1341             {
1342                 res = ip46_address_cmp(&path->recursive.fp_nh.fp_ip,
1343                                        &rpath->frp_addr);
1344             }
1345
1346             if (0 == res)
1347             {
1348                 res = (path->recursive.fp_tbl_id - rpath->frp_fib_index);
1349             }
1350             break;
1351         case FIB_PATH_TYPE_DEAG:
1352             res = (path->deag.fp_tbl_id - rpath->frp_fib_index);
1353             break;
1354         case FIB_PATH_TYPE_SPECIAL:
1355         case FIB_PATH_TYPE_RECEIVE:
1356         case FIB_PATH_TYPE_EXCLUSIVE:
1357             res = 0;
1358             break;
1359         }
1360     }
1361     return (res);
1362 }
1363
1364 /*
1365  * fib_path_recursive_loop_detect
1366  *
1367  * A forward walk of the FIB object graph to detect for a cycle/loop. This
1368  * walk is initiated when an entry is linking to a new path list or from an old.
1369  * The entry vector passed contains all the FIB entrys that are children of this
1370  * path (it is all the entries encountered on the walk so far). If this vector
1371  * contains the entry this path resolve via, then a loop is about to form.
1372  * The loop must be allowed to form, since we need the dependencies in place
1373  * so that we can track when the loop breaks.
1374  * However, we MUST not produce a loop in the forwarding graph (else packets
1375  * would loop around the switch path until the loop breaks), so we mark recursive
1376  * paths as looped so that they do not contribute forwarding information.
1377  * By marking the path as looped, an etry such as;
1378  *    X/Y
1379  *     via a.a.a.a (looped)
1380  *     via b.b.b.b (not looped)
1381  * can still forward using the info provided by b.b.b.b only
1382  */
1383 int
1384 fib_path_recursive_loop_detect (fib_node_index_t path_index,
1385                                 fib_node_index_t **entry_indicies)
1386 {
1387     fib_path_t *path;
1388
1389     path = fib_path_get(path_index);
1390
1391     /*
1392      * the forced drop path is never looped, cos it is never resolved.
1393      */
1394     if (fib_path_is_permanent_drop(path))
1395     {
1396         return (0);
1397     }
1398
1399     switch (path->fp_type)
1400     {
1401     case FIB_PATH_TYPE_RECURSIVE:
1402     {
1403         fib_node_index_t *entry_index, *entries;
1404         int looped = 0;
1405         entries = *entry_indicies;
1406
1407         vec_foreach(entry_index, entries) {
1408             if (*entry_index == path->fp_via_fib)
1409             {
1410                 /*
1411                  * the entry that is about to link to this path-list (or
1412                  * one of this path-list's children) is the same entry that
1413                  * this recursive path resolves through. this is a cycle.
1414                  * abort the walk.
1415                  */
1416                 looped = 1;
1417                 break;
1418             }
1419         }
1420
1421         if (looped)
1422         {
1423             FIB_PATH_DBG(path, "recursive loop formed");
1424             path->fp_oper_flags |= FIB_PATH_OPER_FLAG_RECURSIVE_LOOP;
1425
1426             dpo_copy(&path->fp_dpo,
1427                     drop_dpo_get(fib_proto_to_dpo(path->fp_nh_proto)));
1428         }
1429         else
1430         {
1431             /*
1432              * no loop here yet. keep forward walking the graph.
1433              */     
1434             if (fib_entry_recursive_loop_detect(path->fp_via_fib, entry_indicies))
1435             {
1436                 FIB_PATH_DBG(path, "recursive loop formed");
1437                 path->fp_oper_flags |= FIB_PATH_OPER_FLAG_RECURSIVE_LOOP;
1438             }
1439             else
1440             {
1441                 FIB_PATH_DBG(path, "recursive loop cleared");
1442                 path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RECURSIVE_LOOP;
1443             }
1444         }
1445         break;
1446     }
1447     case FIB_PATH_TYPE_ATTACHED_NEXT_HOP:
1448     case FIB_PATH_TYPE_ATTACHED:
1449     case FIB_PATH_TYPE_SPECIAL:
1450     case FIB_PATH_TYPE_DEAG:
1451     case FIB_PATH_TYPE_RECEIVE:
1452     case FIB_PATH_TYPE_EXCLUSIVE:
1453         /*
1454          * these path types cannot be part of a loop, since they are the leaves
1455          * of the graph.
1456          */
1457         break;
1458     }
1459
1460     return (fib_path_is_looped(path_index));
1461 }
1462
1463 int
1464 fib_path_resolve (fib_node_index_t path_index)
1465 {
1466     fib_path_t *path;
1467
1468     path = fib_path_get(path_index);
1469
1470     /*
1471      * hope for the best.
1472      */
1473     path->fp_oper_flags |= FIB_PATH_OPER_FLAG_RESOLVED;
1474
1475     /*
1476      * the forced drop path resolves via the drop adj
1477      */
1478     if (fib_path_is_permanent_drop(path))
1479     {
1480         dpo_copy(&path->fp_dpo,
1481                  drop_dpo_get(fib_proto_to_dpo(path->fp_nh_proto)));
1482         path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RESOLVED;
1483         return (fib_path_is_resolved(path_index));
1484     }
1485
1486     switch (path->fp_type)
1487     {
1488     case FIB_PATH_TYPE_ATTACHED_NEXT_HOP:
1489         fib_path_attached_next_hop_set(path);
1490         break;
1491     case FIB_PATH_TYPE_ATTACHED:
1492         /*
1493          * path->attached.fp_interface
1494          */
1495         if (!vnet_sw_interface_is_admin_up(vnet_get_main(),
1496                                            path->attached.fp_interface))
1497         {
1498             path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RESOLVED;
1499         }
1500         if (vnet_sw_interface_is_p2p(vnet_get_main(),
1501                                      path->attached.fp_interface))
1502         {
1503             /*
1504              * point-2-point interfaces do not require a glean, since
1505              * there is nothing to ARP. Install a rewrite/nbr adj instead
1506              */
1507             dpo_set(&path->fp_dpo,
1508                     DPO_ADJACENCY,
1509                     fib_proto_to_dpo(path->fp_nh_proto),
1510                     adj_nbr_add_or_lock(
1511                         path->fp_nh_proto,
1512                         fib_proto_to_link(path->fp_nh_proto),
1513                         &zero_addr,
1514                         path->attached.fp_interface));
1515         }
1516         else
1517         {
1518             dpo_set(&path->fp_dpo,
1519                     DPO_ADJACENCY_GLEAN,
1520                     fib_proto_to_dpo(path->fp_nh_proto),
1521                     adj_glean_add_or_lock(path->fp_nh_proto,
1522                                           path->attached.fp_interface,
1523                                           NULL));
1524         }
1525         /*
1526          * become a child of the adjacency so we receive updates
1527          * when the interface state changes
1528          */
1529         path->fp_sibling = adj_child_add(path->fp_dpo.dpoi_index,
1530                                          FIB_NODE_TYPE_PATH,
1531                                          fib_path_get_index(path));
1532
1533         break;
1534     case FIB_PATH_TYPE_RECURSIVE:
1535     {
1536         /*
1537          * Create a RR source entry in the table for the address
1538          * that this path recurses through.
1539          * This resolve action is recursive, hence we may create
1540          * more paths in the process. more creates mean maybe realloc
1541          * of this path.
1542          */
1543         fib_node_index_t fei;
1544         fib_prefix_t pfx;
1545
1546         ASSERT(FIB_NODE_INDEX_INVALID == path->fp_via_fib);
1547
1548         if (FIB_PROTOCOL_MPLS == path->fp_nh_proto)
1549         {
1550             fib_prefix_from_mpls_label(path->recursive.fp_nh.fp_local_label, &pfx);
1551         }
1552         else
1553         {
1554             fib_prefix_from_ip46_addr(&path->recursive.fp_nh.fp_ip, &pfx);
1555         }
1556
1557         fei = fib_table_entry_special_add(path->recursive.fp_tbl_id,
1558                                           &pfx,
1559                                           FIB_SOURCE_RR,
1560                                           FIB_ENTRY_FLAG_NONE,
1561                                           ADJ_INDEX_INVALID);
1562
1563         path = fib_path_get(path_index);
1564         path->fp_via_fib = fei;
1565
1566         /*
1567          * become a dependent child of the entry so the path is 
1568          * informed when the forwarding for the entry changes.
1569          */
1570         path->fp_sibling = fib_entry_child_add(path->fp_via_fib,
1571                                                FIB_NODE_TYPE_PATH,
1572                                                fib_path_get_index(path));
1573
1574         /*
1575          * create and configure the IP DPO
1576          */
1577         fib_path_recursive_adj_update(
1578             path,
1579             fib_path_proto_to_chain_type(path->fp_nh_proto),
1580             &path->fp_dpo);
1581
1582         break;
1583     }
1584     case FIB_PATH_TYPE_SPECIAL:
1585         /*
1586          * Resolve via the drop
1587          */
1588         dpo_copy(&path->fp_dpo,
1589                  drop_dpo_get(fib_proto_to_dpo(path->fp_nh_proto)));
1590         break;
1591     case FIB_PATH_TYPE_DEAG:
1592         /*
1593          * Resolve via a lookup DPO.
1594          * FIXME. control plane should add routes with a table ID
1595          */
1596         lookup_dpo_add_or_lock_w_fib_index(path->deag.fp_tbl_id,
1597                                           fib_proto_to_dpo(path->fp_nh_proto),
1598                                           LOOKUP_INPUT_DST_ADDR,
1599                                           LOOKUP_TABLE_FROM_CONFIG,
1600                                           &path->fp_dpo);
1601         break;
1602     case FIB_PATH_TYPE_RECEIVE:
1603         /*
1604          * Resolve via a receive DPO.
1605          */
1606         receive_dpo_add_or_lock(fib_proto_to_dpo(path->fp_nh_proto),
1607                                 path->receive.fp_interface,
1608                                 &path->receive.fp_addr,
1609                                 &path->fp_dpo);
1610         break;
1611     case FIB_PATH_TYPE_EXCLUSIVE:
1612         /*
1613          * Resolve via the user provided DPO
1614          */
1615         dpo_copy(&path->fp_dpo, &path->exclusive.fp_ex_dpo);
1616         break;
1617     }
1618
1619     return (fib_path_is_resolved(path_index));
1620 }
1621
1622 u32
1623 fib_path_get_resolving_interface (fib_node_index_t path_index)
1624 {
1625     fib_path_t *path;
1626
1627     path = fib_path_get(path_index);
1628
1629     switch (path->fp_type)
1630     {
1631     case FIB_PATH_TYPE_ATTACHED_NEXT_HOP:
1632         return (path->attached_next_hop.fp_interface);
1633     case FIB_PATH_TYPE_ATTACHED:
1634         return (path->attached.fp_interface);
1635     case FIB_PATH_TYPE_RECEIVE:
1636         return (path->receive.fp_interface);
1637     case FIB_PATH_TYPE_RECURSIVE:
1638         return (fib_entry_get_resolving_interface(path->fp_via_fib));    
1639     case FIB_PATH_TYPE_SPECIAL:
1640     case FIB_PATH_TYPE_DEAG:
1641     case FIB_PATH_TYPE_EXCLUSIVE:
1642         break;
1643     }
1644     return (~0);
1645 }
1646
1647 adj_index_t
1648 fib_path_get_adj (fib_node_index_t path_index)
1649 {
1650     fib_path_t *path;
1651
1652     path = fib_path_get(path_index);
1653
1654     ASSERT(dpo_is_adj(&path->fp_dpo));
1655     if (dpo_is_adj(&path->fp_dpo))
1656     {
1657         return (path->fp_dpo.dpoi_index);
1658     }
1659     return (ADJ_INDEX_INVALID);
1660 }
1661
1662 int
1663 fib_path_get_weight (fib_node_index_t path_index)
1664 {
1665     fib_path_t *path;
1666
1667     path = fib_path_get(path_index);
1668
1669     ASSERT(path);
1670
1671     return (path->fp_weight);
1672 }
1673
1674 /**
1675  * @brief Contribute the path's adjacency to the list passed.
1676  * By calling this function over all paths, recursively, a child
1677  * can construct its full set of forwarding adjacencies, and hence its
1678  * uRPF list.
1679  */
1680 void
1681 fib_path_contribute_urpf (fib_node_index_t path_index,
1682                           index_t urpf)
1683 {
1684     fib_path_t *path;
1685
1686     if (!fib_path_is_resolved(path_index))
1687         return;
1688
1689     path = fib_path_get(path_index);
1690
1691     switch (path->fp_type)
1692     {
1693     case FIB_PATH_TYPE_ATTACHED_NEXT_HOP:
1694         fib_urpf_list_append(urpf, path->attached_next_hop.fp_interface);
1695         break;
1696
1697     case FIB_PATH_TYPE_ATTACHED:
1698         fib_urpf_list_append(urpf, path->attached.fp_interface);
1699         break;
1700
1701     case FIB_PATH_TYPE_RECURSIVE:
1702         fib_entry_contribute_urpf(path->fp_via_fib, urpf);
1703         break;
1704
1705     case FIB_PATH_TYPE_EXCLUSIVE:
1706     case FIB_PATH_TYPE_SPECIAL:
1707         /*
1708          * these path types may link to an adj, if that's what
1709          * the clinet gave
1710          */
1711         if (dpo_is_adj(&path->fp_dpo))
1712         {
1713             ip_adjacency_t *adj;
1714
1715             adj = adj_get(path->fp_dpo.dpoi_index);
1716
1717             fib_urpf_list_append(urpf, adj->rewrite_header.sw_if_index);
1718         }
1719         break;
1720
1721     case FIB_PATH_TYPE_DEAG:
1722     case FIB_PATH_TYPE_RECEIVE:
1723         /*
1724          * these path types don't link to an adj
1725          */
1726         break;
1727     }
1728 }
1729
1730 void
1731 fib_path_contribute_forwarding (fib_node_index_t path_index,
1732                                 fib_forward_chain_type_t fct,
1733                                 dpo_id_t *dpo)
1734 {
1735     fib_path_t *path;
1736
1737     path = fib_path_get(path_index);
1738
1739     ASSERT(path);
1740     ASSERT(FIB_FORW_CHAIN_TYPE_MPLS_EOS != fct);
1741
1742     FIB_PATH_DBG(path, "contribute");
1743
1744     /*
1745      * The DPO stored in the path was created when the path was resolved.
1746      * This then represents the path's 'native' protocol; IP.
1747      * For all others will need to go find something else.
1748      */
1749     if (fib_path_proto_to_chain_type(path->fp_nh_proto) == fct)
1750     {
1751         dpo_copy(dpo, &path->fp_dpo);
1752     }
1753     else
1754     {
1755         switch (path->fp_type)
1756         {
1757         case FIB_PATH_TYPE_ATTACHED_NEXT_HOP:
1758             switch (fct)
1759             {
1760             case FIB_FORW_CHAIN_TYPE_UNICAST_IP4:
1761             case FIB_FORW_CHAIN_TYPE_UNICAST_IP6:
1762             case FIB_FORW_CHAIN_TYPE_MPLS_EOS:
1763             case FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS:
1764             case FIB_FORW_CHAIN_TYPE_ETHERNET:
1765             case FIB_FORW_CHAIN_TYPE_NSH:
1766             {
1767                 adj_index_t ai;
1768
1769                 /*
1770                  * get a appropriate link type adj.
1771                  */
1772                 ai = fib_path_attached_next_hop_get_adj(
1773                          path,
1774                          fib_forw_chain_type_to_link_type(fct));
1775                 dpo_set(dpo, DPO_ADJACENCY,
1776                         fib_forw_chain_type_to_dpo_proto(fct), ai);
1777                 adj_unlock(ai);
1778
1779                 break;
1780             }
1781             case FIB_FORW_CHAIN_TYPE_MCAST_IP4:
1782             case FIB_FORW_CHAIN_TYPE_MCAST_IP6:
1783             break;
1784             }
1785             break;
1786         case FIB_PATH_TYPE_RECURSIVE:
1787             switch (fct)
1788             {
1789             case FIB_FORW_CHAIN_TYPE_MPLS_EOS:
1790             case FIB_FORW_CHAIN_TYPE_UNICAST_IP4:
1791             case FIB_FORW_CHAIN_TYPE_UNICAST_IP6:
1792             case FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS:
1793                 fib_path_recursive_adj_update(path, fct, dpo);
1794                 break;
1795             case FIB_FORW_CHAIN_TYPE_MCAST_IP4:
1796             case FIB_FORW_CHAIN_TYPE_MCAST_IP6:
1797             case FIB_FORW_CHAIN_TYPE_ETHERNET:
1798             case FIB_FORW_CHAIN_TYPE_NSH:
1799                 ASSERT(0);
1800                 break;
1801             }
1802             break;
1803         case FIB_PATH_TYPE_DEAG:
1804             switch (fct)
1805             {
1806             case FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS:
1807                 lookup_dpo_add_or_lock_w_table_id(MPLS_FIB_DEFAULT_TABLE_ID,
1808                                                   DPO_PROTO_MPLS,
1809                                                   LOOKUP_INPUT_DST_ADDR,
1810                                                   LOOKUP_TABLE_FROM_CONFIG,
1811                                                   dpo);
1812                 break;
1813             case FIB_FORW_CHAIN_TYPE_UNICAST_IP4:
1814             case FIB_FORW_CHAIN_TYPE_UNICAST_IP6:
1815             case FIB_FORW_CHAIN_TYPE_MPLS_EOS:
1816                 dpo_copy(dpo, &path->fp_dpo);
1817                 break;
1818             case FIB_FORW_CHAIN_TYPE_MCAST_IP4:
1819             case FIB_FORW_CHAIN_TYPE_MCAST_IP6:
1820             case FIB_FORW_CHAIN_TYPE_ETHERNET:
1821             case FIB_FORW_CHAIN_TYPE_NSH:
1822                 ASSERT(0);
1823                 break;
1824             }
1825             break;
1826         case FIB_PATH_TYPE_EXCLUSIVE:
1827             dpo_copy(dpo, &path->exclusive.fp_ex_dpo);
1828             break;
1829         case FIB_PATH_TYPE_ATTACHED:
1830             switch (fct)
1831             {
1832             case FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS:
1833             case FIB_FORW_CHAIN_TYPE_UNICAST_IP4:
1834             case FIB_FORW_CHAIN_TYPE_UNICAST_IP6:
1835             case FIB_FORW_CHAIN_TYPE_MPLS_EOS:
1836             case FIB_FORW_CHAIN_TYPE_ETHERNET:
1837             case FIB_FORW_CHAIN_TYPE_NSH:
1838                 break;
1839             case FIB_FORW_CHAIN_TYPE_MCAST_IP4:
1840             case FIB_FORW_CHAIN_TYPE_MCAST_IP6:
1841                 {
1842                     adj_index_t ai;
1843
1844                     /*
1845                      * Create the adj needed for sending IP multicast traffic
1846                      */
1847                     ai = adj_mcast_add_or_lock(path->fp_nh_proto,
1848                                                fib_forw_chain_type_to_link_type(fct),
1849                                                path->attached.fp_interface);
1850                     dpo_set(dpo, DPO_ADJACENCY_MCAST,
1851                             fib_forw_chain_type_to_dpo_proto(fct),
1852                             ai);
1853                     adj_unlock(ai);
1854                 }
1855                 break;
1856             }
1857             break;
1858         case FIB_PATH_TYPE_RECEIVE:
1859         case FIB_PATH_TYPE_SPECIAL:
1860             dpo_copy(dpo, &path->fp_dpo);
1861             break;
1862         }
1863     }
1864 }
1865
1866 load_balance_path_t *
1867 fib_path_append_nh_for_multipath_hash (fib_node_index_t path_index,
1868                                        fib_forward_chain_type_t fct,
1869                                        load_balance_path_t *hash_key)
1870 {
1871     load_balance_path_t *mnh;
1872     fib_path_t *path;
1873
1874     path = fib_path_get(path_index);
1875
1876     ASSERT(path);
1877
1878     if (fib_path_is_resolved(path_index))
1879     {
1880         vec_add2(hash_key, mnh, 1);
1881
1882         mnh->path_weight = path->fp_weight;
1883         mnh->path_index = path_index;
1884         fib_path_contribute_forwarding(path_index, fct, &mnh->path_dpo);
1885     }
1886
1887     return (hash_key);
1888 }
1889
1890 int
1891 fib_path_is_recursive (fib_node_index_t path_index)
1892 {
1893     fib_path_t *path;
1894
1895     path = fib_path_get(path_index);
1896
1897     return (FIB_PATH_TYPE_RECURSIVE == path->fp_type);
1898 }
1899
1900 int
1901 fib_path_is_exclusive (fib_node_index_t path_index)
1902 {
1903     fib_path_t *path;
1904
1905     path = fib_path_get(path_index);
1906
1907     return (FIB_PATH_TYPE_EXCLUSIVE == path->fp_type);
1908 }
1909
1910 int
1911 fib_path_is_deag (fib_node_index_t path_index)
1912 {
1913     fib_path_t *path;
1914
1915     path = fib_path_get(path_index);
1916
1917     return (FIB_PATH_TYPE_DEAG == path->fp_type);
1918 }
1919
1920 int
1921 fib_path_is_resolved (fib_node_index_t path_index)
1922 {
1923     fib_path_t *path;
1924
1925     path = fib_path_get(path_index);
1926
1927     return (dpo_id_is_valid(&path->fp_dpo) &&
1928             (path->fp_oper_flags & FIB_PATH_OPER_FLAG_RESOLVED) &&
1929             !fib_path_is_looped(path_index) &&
1930             !fib_path_is_permanent_drop(path));
1931 }
1932
1933 int
1934 fib_path_is_looped (fib_node_index_t path_index)
1935 {
1936     fib_path_t *path;
1937
1938     path = fib_path_get(path_index);
1939
1940     return (path->fp_oper_flags & FIB_PATH_OPER_FLAG_RECURSIVE_LOOP);
1941 }
1942
1943 int
1944 fib_path_encode (fib_node_index_t path_list_index,
1945                  fib_node_index_t path_index,
1946                  void *ctx)
1947 {
1948     fib_route_path_encode_t **api_rpaths = ctx;
1949     fib_route_path_encode_t *api_rpath;
1950     fib_path_t *path;
1951
1952     path = fib_path_get(path_index);
1953     if (!path)
1954       return (0);
1955     vec_add2(*api_rpaths, api_rpath, 1);
1956     api_rpath->rpath.frp_weight = path->fp_weight;
1957     api_rpath->rpath.frp_proto = path->fp_nh_proto;
1958     api_rpath->rpath.frp_sw_if_index = ~0;
1959     api_rpath->dpo = path->exclusive.fp_ex_dpo;
1960     switch (path->fp_type)
1961       {
1962       case FIB_PATH_TYPE_RECEIVE:
1963         api_rpath->rpath.frp_addr = path->receive.fp_addr;
1964         api_rpath->rpath.frp_sw_if_index = path->receive.fp_interface;
1965         break;
1966       case FIB_PATH_TYPE_ATTACHED:
1967         api_rpath->rpath.frp_sw_if_index = path->attached.fp_interface;
1968         break;
1969       case FIB_PATH_TYPE_ATTACHED_NEXT_HOP:
1970         api_rpath->rpath.frp_sw_if_index = path->attached_next_hop.fp_interface;
1971         api_rpath->rpath.frp_addr = path->attached_next_hop.fp_nh;
1972         break;
1973       case FIB_PATH_TYPE_SPECIAL:
1974         break;
1975       case FIB_PATH_TYPE_DEAG:
1976         break;
1977       case FIB_PATH_TYPE_RECURSIVE:
1978         api_rpath->rpath.frp_addr = path->recursive.fp_nh.fp_ip;
1979         break;
1980       default:
1981         break;
1982       }
1983     return (1);
1984 }
1985
1986 fib_protocol_t
1987 fib_path_get_proto (fib_node_index_t path_index)
1988 {
1989     fib_path_t *path;
1990
1991     path = fib_path_get(path_index);
1992
1993     return (path->fp_nh_proto);
1994 }
1995
1996 void
1997 fib_path_module_init (void)
1998 {
1999     fib_node_register_type (FIB_NODE_TYPE_PATH, &fib_path_vft);
2000 }
2001
2002 static clib_error_t *
2003 show_fib_path_command (vlib_main_t * vm,
2004                         unformat_input_t * input,
2005                         vlib_cli_command_t * cmd)
2006 {
2007     fib_node_index_t pi;
2008     fib_path_t *path;
2009
2010     if (unformat (input, "%d", &pi))
2011     {
2012         /*
2013          * show one in detail
2014          */
2015         if (!pool_is_free_index(fib_path_pool, pi))
2016         {
2017             path = fib_path_get(pi);
2018             u8 *s = fib_path_format(pi, NULL);
2019             s = format(s, "children:");
2020             s = fib_node_children_format(path->fp_node.fn_children, s);
2021             vlib_cli_output (vm, "%s", s);
2022             vec_free(s);
2023         }
2024         else
2025         {
2026             vlib_cli_output (vm, "path %d invalid", pi);
2027         }
2028     }
2029     else
2030     {
2031         vlib_cli_output (vm, "FIB Paths");
2032         pool_foreach(path, fib_path_pool,
2033         ({
2034             vlib_cli_output (vm, "%U", format_fib_path, path);
2035         }));
2036     }
2037
2038     return (NULL);
2039 }
2040
2041 VLIB_CLI_COMMAND (show_fib_path, static) = {
2042   .path = "show fib paths",
2043   .function = show_fib_path_command,
2044   .short_help = "show fib paths",
2045 };