IP Multicast FIB (mfib)
[vpp.git] / src / vnet / fib / fib_path.c
1 /*
2  * Copyright (c) 2016 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15
16 #include <vlib/vlib.h>
17 #include <vnet/vnet.h>
18 #include <vnet/ip/format.h>
19 #include <vnet/ip/ip.h>
20 #include <vnet/dpo/drop_dpo.h>
21 #include <vnet/dpo/receive_dpo.h>
22 #include <vnet/dpo/load_balance_map.h>
23 #include <vnet/dpo/lookup_dpo.h>
24
25 #include <vnet/adj/adj.h>
26 #include <vnet/adj/adj_mcast.h>
27
28 #include <vnet/fib/fib_path.h>
29 #include <vnet/fib/fib_node.h>
30 #include <vnet/fib/fib_table.h>
31 #include <vnet/fib/fib_entry.h>
32 #include <vnet/fib/fib_path_list.h>
33 #include <vnet/fib/fib_internal.h>
34 #include <vnet/fib/fib_urpf_list.h>
35
36 /**
37  * Enurmeration of path types
38  */
39 typedef enum fib_path_type_t_ {
40     /**
41      * Marker. Add new types after this one.
42      */
43     FIB_PATH_TYPE_FIRST = 0,
44     /**
45      * Attached-nexthop. An interface and a nexthop are known.
46      */
47     FIB_PATH_TYPE_ATTACHED_NEXT_HOP = FIB_PATH_TYPE_FIRST,
48     /**
49      * attached. Only the interface is known.
50      */
51     FIB_PATH_TYPE_ATTACHED,
52     /**
53      * recursive. Only the next-hop is known.
54      */
55     FIB_PATH_TYPE_RECURSIVE,
56     /**
57      * special. nothing is known. so we drop.
58      */
59     FIB_PATH_TYPE_SPECIAL,
60     /**
61      * exclusive. user provided adj.
62      */
63     FIB_PATH_TYPE_EXCLUSIVE,
64     /**
65      * deag. Link to a lookup adj in the next table
66      */
67     FIB_PATH_TYPE_DEAG,
68     /**
69      * receive. it's for-us.
70      */
71     FIB_PATH_TYPE_RECEIVE,
72     /**
73      * Marker. Add new types before this one, then update it.
74      */
75     FIB_PATH_TYPE_LAST = FIB_PATH_TYPE_RECEIVE,
76 } __attribute__ ((packed)) fib_path_type_t;
77
78 /**
79  * The maximum number of path_types
80  */
81 #define FIB_PATH_TYPE_MAX (FIB_PATH_TYPE_LAST + 1)
82
83 #define FIB_PATH_TYPES {                                        \
84     [FIB_PATH_TYPE_ATTACHED_NEXT_HOP] = "attached-nexthop",     \
85     [FIB_PATH_TYPE_ATTACHED]          = "attached",             \
86     [FIB_PATH_TYPE_RECURSIVE]         = "recursive",            \
87     [FIB_PATH_TYPE_SPECIAL]           = "special",              \
88     [FIB_PATH_TYPE_EXCLUSIVE]         = "exclusive",            \
89     [FIB_PATH_TYPE_DEAG]              = "deag",                 \
90     [FIB_PATH_TYPE_RECEIVE]           = "receive",              \
91 }
92
93 #define FOR_EACH_FIB_PATH_TYPE(_item) \
94     for (_item = FIB_PATH_TYPE_FIRST; _item <= FIB_PATH_TYPE_LAST; _item++)
95
96 /**
97  * Enurmeration of path operational (i.e. derived) attributes
98  */
99 typedef enum fib_path_oper_attribute_t_ {
100     /**
101      * Marker. Add new types after this one.
102      */
103     FIB_PATH_OPER_ATTRIBUTE_FIRST = 0,
104     /**
105      * The path forms part of a recursive loop.
106      */
107     FIB_PATH_OPER_ATTRIBUTE_RECURSIVE_LOOP = FIB_PATH_OPER_ATTRIBUTE_FIRST,
108     /**
109      * The path is resolved
110      */
111     FIB_PATH_OPER_ATTRIBUTE_RESOLVED,
112     /**
113      * The path has become a permanent drop.
114      */
115     FIB_PATH_OPER_ATTRIBUTE_DROP,
116     /**
117      * Marker. Add new types before this one, then update it.
118      */
119     FIB_PATH_OPER_ATTRIBUTE_LAST = FIB_PATH_OPER_ATTRIBUTE_DROP,
120 } __attribute__ ((packed)) fib_path_oper_attribute_t;
121
122 /**
123  * The maximum number of path operational attributes
124  */
125 #define FIB_PATH_OPER_ATTRIBUTE_MAX (FIB_PATH_OPER_ATTRIBUTE_LAST + 1)
126
127 #define FIB_PATH_OPER_ATTRIBUTES {                                      \
128     [FIB_PATH_OPER_ATTRIBUTE_RECURSIVE_LOOP] = "recursive-loop",        \
129     [FIB_PATH_OPER_ATTRIBUTE_RESOLVED]       = "resolved",              \
130     [FIB_PATH_OPER_ATTRIBUTE_DROP]           = "drop",                  \
131 }
132
133 #define FOR_EACH_FIB_PATH_OPER_ATTRIBUTE(_item) \
134     for (_item = FIB_PATH_OPER_ATTRIBUTE_FIRST; \
135          _item <= FIB_PATH_OPER_ATTRIBUTE_LAST; \
136          _item++)
137
138 /**
139  * Path flags from the attributes
140  */
141 typedef enum fib_path_oper_flags_t_ {
142     FIB_PATH_OPER_FLAG_NONE = 0,
143     FIB_PATH_OPER_FLAG_RECURSIVE_LOOP = (1 << FIB_PATH_OPER_ATTRIBUTE_RECURSIVE_LOOP),
144     FIB_PATH_OPER_FLAG_DROP = (1 << FIB_PATH_OPER_ATTRIBUTE_DROP),
145     FIB_PATH_OPER_FLAG_RESOLVED = (1 << FIB_PATH_OPER_ATTRIBUTE_RESOLVED),
146 } __attribute__ ((packed)) fib_path_oper_flags_t;
147
148 /**
149  * A FIB path
150  */
151 typedef struct fib_path_t_ {
152     /**
153      * A path is a node in the FIB graph.
154      */
155     fib_node_t fp_node;
156
157     /**
158      * The index of the path-list to which this path belongs
159      */
160     u32 fp_pl_index;
161
162     /**
163      * This marks the start of the memory area used to hash
164      * the path
165      */
166     STRUCT_MARK(path_hash_start);
167
168     /**
169      * Configuration Flags
170      */
171     fib_path_cfg_flags_t fp_cfg_flags;
172
173     /**
174      * The type of the path. This is the selector for the union
175      */
176     fib_path_type_t fp_type;
177
178     /**
179      * The protocol of the next-hop, i.e. the address family of the
180      * next-hop's address. We can't derive this from the address itself
181      * since the address can be all zeros
182      */
183     fib_protocol_t fp_nh_proto;
184
185     /**
186      * UCMP [unnormalised] weigt
187      */
188     u32 fp_weight;
189
190     /**
191      * per-type union of the data required to resolve the path
192      */
193     union {
194         struct {
195             /**
196              * The next-hop
197              */
198             ip46_address_t fp_nh;
199             /**
200              * The interface
201              */
202             u32 fp_interface;
203         } attached_next_hop;
204         struct {
205             /**
206              * The interface
207              */
208             u32 fp_interface;
209         } attached;
210         struct {
211             union
212             {
213                 /**
214                  * The next-hop
215                  */
216                 ip46_address_t fp_ip;
217                 /**
218                  * The local label to resolve through.
219                  */
220                 mpls_label_t fp_local_label;
221             } fp_nh;
222             /**
223              * The FIB table index in which to find the next-hop.
224              * This needs to be fixed. We should lookup the adjacencies in
225              * a separate table of adjacencies, rather than from the FIB.
226              * Two reasons I can think of:
227              *   - consider:
228              *       int ip addr Gig0 10.0.0.1/24
229              *       ip route 10.0.0.2/32 via Gig1 192.168.1.2
230              *       ip route 1.1.1.1/32 via Gig0 10.0.0.2
231              *     this is perfectly valid.
232              *     Packets addressed to 10.0.0.2 should be sent via Gig1.
233              *     Packets address to 1.1.1.1 should be sent via Gig0.
234              *    when we perform the adj resolution from the FIB for the path
235              *    "via Gig0 10.0.0.2" the lookup will result in the route via Gig1
236              *    and so we will pick up the adj via Gig1 - which was not what the
237              *    operator wanted.
238              *  - we can only return link-type IPv4 and so not the link-type MPLS.
239              *    more on this in a later commit.
240              *
241              * The table ID should only belong to a recursive path and indicate
242              * which FIB should be used to resolve the next-hop.
243              */
244             fib_node_index_t fp_tbl_id;
245         } recursive;
246         struct {
247             /**
248              * The FIB index in which to perfom the next lookup
249              */
250             fib_node_index_t fp_tbl_id;
251         } deag;
252         struct {
253         } special;
254         struct {
255             /**
256              * The user provided 'exclusive' DPO
257              */
258             dpo_id_t fp_ex_dpo;
259         } exclusive;
260         struct {
261             /**
262              * The interface on which the local address is configured
263              */
264             u32 fp_interface;
265             /**
266              * The next-hop
267              */
268             ip46_address_t fp_addr;
269         } receive;
270     };
271     STRUCT_MARK(path_hash_end);
272
273     /**
274      * Memebers in this last section represent information that is
275      * dervied during resolution. It should not be copied to new paths
276      * nor compared.
277      */
278
279     /**
280      * Operational Flags
281      */
282     fib_path_oper_flags_t fp_oper_flags;
283
284     /**
285      * the resolving via fib. not part of the union, since it it not part
286      * of the path's hash.
287      */
288     fib_node_index_t fp_via_fib;
289
290     /**
291      * The Data-path objects through which this path resolves for IP.
292      */
293     dpo_id_t fp_dpo;
294
295     /**
296      * the index of this path in the parent's child list.
297      */
298     u32 fp_sibling;
299 } fib_path_t;
300
301 /*
302  * Array of strings/names for the path types and attributes
303  */
304 static const char *fib_path_type_names[] = FIB_PATH_TYPES;
305 static const char *fib_path_oper_attribute_names[] = FIB_PATH_OPER_ATTRIBUTES;
306 static const char *fib_path_cfg_attribute_names[]  = FIB_PATH_CFG_ATTRIBUTES;
307
308 /*
309  * The memory pool from which we allocate all the paths
310  */
311 static fib_path_t *fib_path_pool;
312
313 /*
314  * Debug macro
315  */
316 #ifdef FIB_DEBUG
317 #define FIB_PATH_DBG(_p, _fmt, _args...)                        \
318 {                                                               \
319     u8 *_tmp = NULL;                                            \
320     _tmp = fib_path_format(fib_path_get_index(_p), _tmp);       \
321     clib_warning("path:[%d:%s]:" _fmt,                          \
322                  fib_path_get_index(_p), _tmp,                  \
323                  ##_args);                                      \
324     vec_free(_tmp);                                             \
325 }
326 #else
327 #define FIB_PATH_DBG(_p, _fmt, _args...)
328 #endif
329
330 static fib_path_t *
331 fib_path_get (fib_node_index_t index)
332 {
333     return (pool_elt_at_index(fib_path_pool, index));
334 }
335
336 static fib_node_index_t 
337 fib_path_get_index (fib_path_t *path)
338 {
339     return (path - fib_path_pool);
340 }
341
342 static fib_node_t *
343 fib_path_get_node (fib_node_index_t index)
344 {
345     return ((fib_node_t*)fib_path_get(index));
346 }
347
348 static fib_path_t*
349 fib_path_from_fib_node (fib_node_t *node)
350 {
351 #if CLIB_DEBUG > 0
352     ASSERT(FIB_NODE_TYPE_PATH == node->fn_type);
353 #endif
354     return ((fib_path_t*)node);
355 }
356
357 u8 *
358 format_fib_path (u8 * s, va_list * args)
359 {
360     fib_path_t *path = va_arg (*args, fib_path_t *);
361     vnet_main_t * vnm = vnet_get_main();
362     fib_path_oper_attribute_t oattr;
363     fib_path_cfg_attribute_t cattr;
364
365     s = format (s, "      index:%d ", fib_path_get_index(path));
366     s = format (s, "pl-index:%d ", path->fp_pl_index);
367     s = format (s, "%U ", format_fib_protocol, path->fp_nh_proto);
368     s = format (s, "weight=%d ", path->fp_weight);
369     s = format (s, "%s: ", fib_path_type_names[path->fp_type]);
370     if (FIB_PATH_OPER_FLAG_NONE != path->fp_oper_flags) {
371         s = format(s, " oper-flags:");
372         FOR_EACH_FIB_PATH_OPER_ATTRIBUTE(oattr) {
373             if ((1<<oattr) & path->fp_oper_flags) {
374                 s = format (s, "%s,", fib_path_oper_attribute_names[oattr]);
375             }
376         }
377     }
378     if (FIB_PATH_CFG_FLAG_NONE != path->fp_cfg_flags) {
379         s = format(s, " cfg-flags:");
380         FOR_EACH_FIB_PATH_CFG_ATTRIBUTE(cattr) {
381             if ((1<<cattr) & path->fp_cfg_flags) {
382                 s = format (s, "%s,", fib_path_cfg_attribute_names[cattr]);
383             }
384         }
385     }
386     s = format(s, "\n       ");
387
388     switch (path->fp_type)
389     {
390     case FIB_PATH_TYPE_ATTACHED_NEXT_HOP:
391         s = format (s, "%U", format_ip46_address,
392                     &path->attached_next_hop.fp_nh,
393                     IP46_TYPE_ANY);
394         if (path->fp_oper_flags & FIB_PATH_OPER_FLAG_DROP)
395         {
396             s = format (s, " if_index:%d", path->attached_next_hop.fp_interface);
397         }
398         else
399         {
400             s = format (s, " %U",
401                         format_vnet_sw_interface_name,
402                         vnm,
403                         vnet_get_sw_interface(
404                             vnm,
405                             path->attached_next_hop.fp_interface));
406             if (vnet_sw_interface_is_p2p(vnet_get_main(),
407                                          path->attached_next_hop.fp_interface))
408             {
409                 s = format (s, " (p2p)");
410             }
411         }
412         if (!dpo_id_is_valid(&path->fp_dpo))
413         {
414             s = format(s, "\n          unresolved");
415         }
416         else
417         {
418             s = format(s, "\n          %U",
419                        format_dpo_id,
420                        &path->fp_dpo, 13);
421         }
422         break;
423     case FIB_PATH_TYPE_ATTACHED:
424         if (path->fp_oper_flags & FIB_PATH_OPER_FLAG_DROP)
425         {
426             s = format (s, " if_index:%d", path->attached_next_hop.fp_interface);
427         }
428         else
429         {
430             s = format (s, " %U",
431                         format_vnet_sw_interface_name,
432                         vnm,
433                         vnet_get_sw_interface(
434                             vnm,
435                             path->attached.fp_interface));
436         }
437         break;
438     case FIB_PATH_TYPE_RECURSIVE:
439         if (FIB_PROTOCOL_MPLS == path->fp_nh_proto)
440         {
441             s = format (s, "via %U",
442                         format_mpls_unicast_label,
443                         path->recursive.fp_nh.fp_local_label);
444         }
445         else
446         {
447             s = format (s, "via %U",
448                         format_ip46_address,
449                         &path->recursive.fp_nh.fp_ip,
450                         IP46_TYPE_ANY);
451         }
452         s = format (s, " in fib:%d",
453                     path->recursive.fp_tbl_id,
454                     path->fp_via_fib); 
455         s = format (s, " via-fib:%d", path->fp_via_fib); 
456         s = format (s, " via-dpo:[%U:%d]",
457                     format_dpo_type, path->fp_dpo.dpoi_type, 
458                     path->fp_dpo.dpoi_index);
459
460         break;
461     case FIB_PATH_TYPE_RECEIVE:
462     case FIB_PATH_TYPE_SPECIAL:
463     case FIB_PATH_TYPE_DEAG:
464     case FIB_PATH_TYPE_EXCLUSIVE:
465         if (dpo_id_is_valid(&path->fp_dpo))
466         {
467             s = format(s, "%U", format_dpo_id,
468                        &path->fp_dpo, 2);
469         }
470         break;
471     }
472     return (s);
473 }
474
475 u8 *
476 fib_path_format (fib_node_index_t pi, u8 *s)
477 {
478     fib_path_t *path;
479
480     path = fib_path_get(pi);
481     ASSERT(NULL != path);
482
483     return (format (s, "%U", format_fib_path, path));
484 }
485
486 u8 *
487 fib_path_adj_format (fib_node_index_t pi,
488                      u32 indent,
489                      u8 *s)
490 {
491     fib_path_t *path;
492
493     path = fib_path_get(pi);
494     ASSERT(NULL != path);
495
496     if (!dpo_id_is_valid(&path->fp_dpo))
497     {
498         s = format(s, " unresolved");
499     }
500     else
501     {
502         s = format(s, "%U", format_dpo_id,
503                    &path->fp_dpo, 2);
504     }
505
506     return (s);
507 }
508
509 /*
510  * fib_path_last_lock_gone
511  *
512  * We don't share paths, we share path lists, so the [un]lock functions
513  * are no-ops
514  */
515 static void
516 fib_path_last_lock_gone (fib_node_t *node)
517 {
518     ASSERT(0);
519 }
520
521 static const adj_index_t
522 fib_path_attached_next_hop_get_adj (fib_path_t *path,
523                                     vnet_link_t link)
524 {
525     if (vnet_sw_interface_is_p2p(vnet_get_main(),
526                                  path->attached_next_hop.fp_interface))
527     {
528         /*
529          * if the interface is p2p then the adj for the specific
530          * neighbour on that link will never exist. on p2p links
531          * the subnet address (the attached route) links to the
532          * auto-adj (see below), we want that adj here too.
533          */
534         return (adj_nbr_add_or_lock(path->fp_nh_proto,
535                                     link,
536                                     &zero_addr,
537                                     path->attached_next_hop.fp_interface));
538     }
539     else
540     {
541         return (adj_nbr_add_or_lock(path->fp_nh_proto,
542                                     link,
543                                     &path->attached_next_hop.fp_nh,
544                                     path->attached_next_hop.fp_interface));
545     }
546 }
547
548 static void
549 fib_path_attached_next_hop_set (fib_path_t *path)
550 {
551     /*
552      * resolve directly via the adjacnecy discribed by the
553      * interface and next-hop
554      */
555     if (!vnet_sw_interface_is_admin_up(vnet_get_main(),
556                                       path->attached_next_hop.fp_interface))
557     {
558         path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RESOLVED;
559     }
560
561     dpo_set(&path->fp_dpo,
562             DPO_ADJACENCY,
563             fib_proto_to_dpo(path->fp_nh_proto),
564             fib_path_attached_next_hop_get_adj(
565                  path,
566                  fib_proto_to_link(path->fp_nh_proto)));
567
568     /*
569      * become a child of the adjacency so we receive updates
570      * when its rewrite changes
571      */
572     path->fp_sibling = adj_child_add(path->fp_dpo.dpoi_index,
573                                      FIB_NODE_TYPE_PATH,
574                                      fib_path_get_index(path));
575 }
576
577 /*
578  * create of update the paths recursive adj
579  */
580 static void
581 fib_path_recursive_adj_update (fib_path_t *path,
582                                fib_forward_chain_type_t fct,
583                                dpo_id_t *dpo)
584 {
585     dpo_id_t via_dpo = DPO_INVALID;
586
587     /*
588      * get the DPO to resolve through from the via-entry
589      */
590     fib_entry_contribute_forwarding(path->fp_via_fib,
591                                     fct,
592                                     &via_dpo);
593
594
595     /*
596      * hope for the best - clear if restrictions apply.
597      */
598     path->fp_oper_flags |= FIB_PATH_OPER_FLAG_RESOLVED;
599
600     /*
601      * Validate any recursion constraints and over-ride the via
602      * adj if not met
603      */
604     if (path->fp_oper_flags & FIB_PATH_OPER_FLAG_RECURSIVE_LOOP)
605     {
606         path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RESOLVED;
607         dpo_copy(&via_dpo, drop_dpo_get(fib_proto_to_dpo(path->fp_nh_proto)));
608     }
609     else if (path->fp_cfg_flags & FIB_PATH_CFG_FLAG_RESOLVE_HOST)
610     {
611         /*
612          * the via FIB must be a host route.
613          * note the via FIB just added will always be a host route
614          * since it is an RR source added host route. So what we need to
615          * check is whether the route has other sources. If it does then
616          * some other source has added it as a host route. If it doesn't
617          * then it was added only here and inherits forwarding from a cover.
618          * the cover is not a host route.
619          * The RR source is the lowest priority source, so we check if it
620          * is the best. if it is there are no other sources.
621          */
622         if (fib_entry_get_best_source(path->fp_via_fib) >= FIB_SOURCE_RR)
623         {
624             path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RESOLVED;
625             dpo_copy(&via_dpo, drop_dpo_get(fib_proto_to_dpo(path->fp_nh_proto)));
626
627             /*
628              * PIC edge trigger. let the load-balance maps know
629              */
630             load_balance_map_path_state_change(fib_path_get_index(path));
631         }
632     }
633     else if (path->fp_cfg_flags & FIB_PATH_CFG_FLAG_RESOLVE_ATTACHED)
634     {
635         /*
636          * RR source entries inherit the flags from the cover, so
637          * we can check the via directly
638          */
639         if (!(FIB_ENTRY_FLAG_ATTACHED & fib_entry_get_flags(path->fp_via_fib)))
640         {
641             path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RESOLVED;
642             dpo_copy(&via_dpo, drop_dpo_get(fib_proto_to_dpo(path->fp_nh_proto)));
643
644             /*
645              * PIC edge trigger. let the load-balance maps know
646              */
647             load_balance_map_path_state_change(fib_path_get_index(path));
648         }
649     }
650
651     /*
652      * update the path's contributed DPO
653      */
654     dpo_copy(dpo, &via_dpo);
655
656     FIB_PATH_DBG(path, "recursive update: %U",
657                  fib_get_lookup_main(path->fp_nh_proto),
658                  &path->fp_dpo, 2);
659
660     dpo_reset(&via_dpo);
661 }
662
663 /*
664  * fib_path_is_permanent_drop
665  *
666  * Return !0 if the path is configured to permanently drop,
667  * despite other attributes.
668  */
669 static int
670 fib_path_is_permanent_drop (fib_path_t *path)
671 {
672     return ((path->fp_cfg_flags & FIB_PATH_CFG_FLAG_DROP) ||
673             (path->fp_oper_flags & FIB_PATH_OPER_FLAG_DROP));
674 }
675
676 /*
677  * fib_path_unresolve
678  *
679  * Remove our dependency on the resolution target
680  */
681 static void
682 fib_path_unresolve (fib_path_t *path)
683 {
684     /*
685      * the forced drop path does not need unresolving
686      */
687     if (fib_path_is_permanent_drop(path))
688     {
689         return;
690     }
691
692     switch (path->fp_type)
693     {
694     case FIB_PATH_TYPE_RECURSIVE:
695         if (FIB_NODE_INDEX_INVALID != path->fp_via_fib)
696         {
697             fib_prefix_t pfx;
698
699             fib_entry_get_prefix(path->fp_via_fib, &pfx);
700             fib_entry_child_remove(path->fp_via_fib,
701                                    path->fp_sibling);
702             fib_table_entry_special_remove(path->recursive.fp_tbl_id,
703                                            &pfx,
704                                            FIB_SOURCE_RR);
705             path->fp_via_fib = FIB_NODE_INDEX_INVALID;
706         }
707         break;
708     case FIB_PATH_TYPE_ATTACHED_NEXT_HOP:
709     case FIB_PATH_TYPE_ATTACHED:
710         adj_child_remove(path->fp_dpo.dpoi_index,
711                          path->fp_sibling);
712         adj_unlock(path->fp_dpo.dpoi_index);
713         break;
714     case FIB_PATH_TYPE_EXCLUSIVE:
715         dpo_reset(&path->exclusive.fp_ex_dpo);
716         break;
717     case FIB_PATH_TYPE_SPECIAL:
718     case FIB_PATH_TYPE_RECEIVE:
719     case FIB_PATH_TYPE_DEAG:
720         /*
721          * these hold only the path's DPO, which is reset below.
722          */
723         break;
724     }
725
726     /*
727      * release the adj we were holding and pick up the
728      * drop just in case.
729      */
730     dpo_reset(&path->fp_dpo);
731     path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RESOLVED;
732
733     return;
734 }
735
736 static fib_forward_chain_type_t
737 fib_path_proto_to_chain_type (fib_protocol_t proto)
738 {
739     switch (proto)
740     {
741     case FIB_PROTOCOL_IP4:
742         return (FIB_FORW_CHAIN_TYPE_UNICAST_IP4);
743     case FIB_PROTOCOL_IP6:
744         return (FIB_FORW_CHAIN_TYPE_UNICAST_IP6);
745     case FIB_PROTOCOL_MPLS:
746         return (FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS);
747     }
748     return (FIB_FORW_CHAIN_TYPE_UNICAST_IP4);
749 }
750
751 /*
752  * fib_path_back_walk_notify
753  *
754  * A back walk has reach this path.
755  */
756 static fib_node_back_walk_rc_t
757 fib_path_back_walk_notify (fib_node_t *node,
758                            fib_node_back_walk_ctx_t *ctx)
759 {
760     fib_path_t *path;
761
762     path = fib_path_from_fib_node(node);
763
764     switch (path->fp_type)
765     {
766     case FIB_PATH_TYPE_RECURSIVE:
767         if (FIB_NODE_BW_REASON_FLAG_EVALUATE & ctx->fnbw_reason)
768         {
769             /*
770              * modify the recursive adjacency to use the new forwarding
771              * of the via-fib.
772              * this update is visible to packets in flight in the DP.
773              */
774             fib_path_recursive_adj_update(
775                 path,
776                 fib_path_proto_to_chain_type(path->fp_nh_proto),
777                 &path->fp_dpo);
778         }
779         if ((FIB_NODE_BW_REASON_FLAG_ADJ_UPDATE & ctx->fnbw_reason) ||
780             (FIB_NODE_BW_REASON_FLAG_ADJ_DOWN   & ctx->fnbw_reason))
781         {
782             /*
783              * ADJ updates (complete<->incomplete) do not need to propagate to
784              * recursive entries.
785              * The only reason its needed as far back as here, is that the adj
786              * and the incomplete adj are a different DPO type, so the LBs need
787              * to re-stack.
788              * If this walk was quashed in the fib_entry, then any non-fib_path
789              * children (like tunnels that collapse out the LB when they stack)
790              * would not see the update.
791              */
792             return (FIB_NODE_BACK_WALK_CONTINUE);
793         }
794         break;
795     case FIB_PATH_TYPE_ATTACHED_NEXT_HOP:
796         /*
797 FIXME comment
798          * ADJ_UPDATE backwalk pass silently through here and up to
799          * the path-list when the multipath adj collapse occurs.
800          * The reason we do this is that the assumtption is that VPP
801          * runs in an environment where the Control-Plane is remote
802          * and hence reacts slowly to link up down. In order to remove
803          * this down link from the ECMP set quickly, we back-walk.
804          * VPP also has dedicated CPUs, so we are not stealing resources
805          * from the CP to do so.
806          */
807         if (FIB_NODE_BW_REASON_FLAG_INTERFACE_UP & ctx->fnbw_reason)
808         {
809             if (path->fp_oper_flags & FIB_PATH_OPER_FLAG_RESOLVED)
810             {
811                 /*
812                  * alreday resolved. no need to walk back again
813                  */
814                 return (FIB_NODE_BACK_WALK_CONTINUE);
815             }
816             path->fp_oper_flags |= FIB_PATH_OPER_FLAG_RESOLVED;
817         }
818         if (FIB_NODE_BW_REASON_FLAG_INTERFACE_DOWN & ctx->fnbw_reason)
819         {
820             if (!(path->fp_oper_flags & FIB_PATH_OPER_FLAG_RESOLVED))
821             {
822                 /*
823                  * alreday unresolved. no need to walk back again
824                  */
825                 return (FIB_NODE_BACK_WALK_CONTINUE);
826             }
827             path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RESOLVED;
828         }
829         if (FIB_NODE_BW_REASON_FLAG_INTERFACE_DELETE & ctx->fnbw_reason)
830         {
831             /*
832              * The interface this path resolves through has been deleted.
833              * This will leave the path in a permanent drop state. The route
834              * needs to be removed and readded (and hence the path-list deleted)
835              * before it can forward again.
836              */
837             fib_path_unresolve(path);
838             path->fp_oper_flags |= FIB_PATH_OPER_FLAG_DROP;
839         }
840         if (FIB_NODE_BW_REASON_FLAG_ADJ_UPDATE & ctx->fnbw_reason)
841         {
842             /*
843              * restack the DPO to pick up the correct DPO sub-type
844              */
845             uword if_is_up;
846             adj_index_t ai;
847
848             if_is_up = vnet_sw_interface_is_admin_up(
849                            vnet_get_main(),
850                            path->attached_next_hop.fp_interface);
851
852             if (if_is_up)
853             {
854                 path->fp_oper_flags |= FIB_PATH_OPER_FLAG_RESOLVED;
855             }
856
857             ai = fib_path_attached_next_hop_get_adj(
858                      path,
859                      fib_proto_to_link(path->fp_nh_proto));
860
861             dpo_set(&path->fp_dpo, DPO_ADJACENCY,
862                     fib_proto_to_dpo(path->fp_nh_proto),
863                     ai);
864             adj_unlock(ai);
865
866             if (!if_is_up)
867             {
868                 /*
869                  * If the interface is not up there is no reason to walk
870                  * back to children. if we did they would only evalute
871                  * that this path is unresolved and hence it would
872                  * not contribute the adjacency - so it would be wasted
873                  * CPU time.
874                  */
875                 return (FIB_NODE_BACK_WALK_CONTINUE);
876             }
877         }
878         if (FIB_NODE_BW_REASON_FLAG_ADJ_DOWN & ctx->fnbw_reason)
879         {
880             if (!(path->fp_oper_flags & FIB_PATH_OPER_FLAG_RESOLVED))
881             {
882                 /*
883                  * alreday unresolved. no need to walk back again
884                  */
885                 return (FIB_NODE_BACK_WALK_CONTINUE);
886             }
887             /*
888              * the adj has gone down. the path is no longer resolved.
889              */
890             path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RESOLVED;
891         }
892         break;
893     case FIB_PATH_TYPE_ATTACHED:
894         /*
895          * FIXME; this could schedule a lower priority walk, since attached
896          * routes are not usually in ECMP configurations so the backwalk to
897          * the FIB entry does not need to be high priority
898          */
899         if (FIB_NODE_BW_REASON_FLAG_INTERFACE_UP & ctx->fnbw_reason)
900         {
901             path->fp_oper_flags |= FIB_PATH_OPER_FLAG_RESOLVED;
902         }
903         if (FIB_NODE_BW_REASON_FLAG_INTERFACE_DOWN & ctx->fnbw_reason)
904         {
905             path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RESOLVED;
906         }
907         if (FIB_NODE_BW_REASON_FLAG_INTERFACE_DELETE & ctx->fnbw_reason)
908         {
909             fib_path_unresolve(path);
910             path->fp_oper_flags |= FIB_PATH_OPER_FLAG_DROP;
911         }
912         break;
913     case FIB_PATH_TYPE_DEAG:
914         /*
915          * FIXME When VRF delete is allowed this will need a poke.
916          */
917     case FIB_PATH_TYPE_SPECIAL:
918     case FIB_PATH_TYPE_RECEIVE:
919     case FIB_PATH_TYPE_EXCLUSIVE:
920         /*
921          * these path types have no parents. so to be
922          * walked from one is unexpected.
923          */
924         ASSERT(0);
925         break;
926     }
927
928     /*
929      * propagate the backwalk further to the path-list
930      */
931     fib_path_list_back_walk(path->fp_pl_index, ctx);
932
933     return (FIB_NODE_BACK_WALK_CONTINUE);
934 }
935
936 static void
937 fib_path_memory_show (void)
938 {
939     fib_show_memory_usage("Path",
940                           pool_elts(fib_path_pool),
941                           pool_len(fib_path_pool),
942                           sizeof(fib_path_t));
943 }
944
945 /*
946  * The FIB path's graph node virtual function table
947  */
948 static const fib_node_vft_t fib_path_vft = {
949     .fnv_get = fib_path_get_node,
950     .fnv_last_lock = fib_path_last_lock_gone,
951     .fnv_back_walk = fib_path_back_walk_notify,
952     .fnv_mem_show = fib_path_memory_show,
953 };
954
955 static fib_path_cfg_flags_t
956 fib_path_route_flags_to_cfg_flags (const fib_route_path_t *rpath)
957 {
958     fib_path_cfg_flags_t cfg_flags = FIB_PATH_CFG_FLAG_NONE;
959
960     if (rpath->frp_flags & FIB_ROUTE_PATH_RESOLVE_VIA_HOST)
961         cfg_flags |= FIB_PATH_CFG_FLAG_RESOLVE_HOST;
962     if (rpath->frp_flags & FIB_ROUTE_PATH_RESOLVE_VIA_ATTACHED)
963         cfg_flags |= FIB_PATH_CFG_FLAG_RESOLVE_ATTACHED;
964     if (rpath->frp_flags & FIB_ROUTE_PATH_LOCAL)
965         cfg_flags |= FIB_PATH_CFG_FLAG_LOCAL;
966
967     return (cfg_flags);
968 }
969
970 /*
971  * fib_path_create
972  *
973  * Create and initialise a new path object.
974  * return the index of the path.
975  */
976 fib_node_index_t
977 fib_path_create (fib_node_index_t pl_index,
978                  fib_protocol_t nh_proto,
979                  fib_path_cfg_flags_t flags,
980                  const fib_route_path_t *rpath)
981 {
982     fib_path_t *path;
983
984     pool_get(fib_path_pool, path);
985     memset(path, 0, sizeof(*path));
986
987     fib_node_init(&path->fp_node,
988                   FIB_NODE_TYPE_PATH);
989
990     dpo_reset(&path->fp_dpo);
991     path->fp_pl_index = pl_index;
992     path->fp_nh_proto = nh_proto;
993     path->fp_via_fib = FIB_NODE_INDEX_INVALID;
994     path->fp_weight = rpath->frp_weight;
995     if (0 == path->fp_weight)
996     {
997         /*
998          * a weight of 0 is a meaningless value. We could either reject it, and thus force
999          * clients to always use 1, or we can accept it and fixup approrpiately.
1000          */
1001         path->fp_weight = 1;
1002     }
1003     path->fp_cfg_flags = flags;
1004     path->fp_cfg_flags |= fib_path_route_flags_to_cfg_flags(rpath);
1005
1006     /*
1007      * deduce the path's tpye from the parementers and save what is needed.
1008      */
1009     if (path->fp_cfg_flags & FIB_PATH_CFG_FLAG_LOCAL)
1010     {
1011         path->fp_type = FIB_PATH_TYPE_RECEIVE;
1012         path->receive.fp_interface = rpath->frp_sw_if_index;
1013         path->receive.fp_addr = rpath->frp_addr;
1014     }
1015     else if (~0 != rpath->frp_sw_if_index)
1016     {
1017         if (ip46_address_is_zero(&rpath->frp_addr))
1018         {
1019             path->fp_type = FIB_PATH_TYPE_ATTACHED;
1020             path->attached.fp_interface = rpath->frp_sw_if_index;
1021         }
1022         else
1023         {
1024             path->fp_type = FIB_PATH_TYPE_ATTACHED_NEXT_HOP;
1025             path->attached_next_hop.fp_interface = rpath->frp_sw_if_index;
1026             path->attached_next_hop.fp_nh = rpath->frp_addr;
1027         }
1028     }
1029     else
1030     {
1031         if (ip46_address_is_zero(&rpath->frp_addr))
1032         {
1033             if (~0 == rpath->frp_fib_index)
1034             {
1035                 path->fp_type = FIB_PATH_TYPE_SPECIAL;
1036             }
1037             else
1038             {
1039                 path->fp_type = FIB_PATH_TYPE_DEAG;
1040                 path->deag.fp_tbl_id = rpath->frp_fib_index;
1041             }           
1042         }
1043         else
1044         {
1045             path->fp_type = FIB_PATH_TYPE_RECURSIVE;
1046             if (FIB_PROTOCOL_MPLS == path->fp_nh_proto)
1047             {
1048                 path->recursive.fp_nh.fp_local_label = rpath->frp_local_label;
1049             }
1050             else
1051             {
1052                 path->recursive.fp_nh.fp_ip = rpath->frp_addr;
1053             }
1054             path->recursive.fp_tbl_id = rpath->frp_fib_index;
1055         }
1056     }
1057
1058     FIB_PATH_DBG(path, "create");
1059
1060     return (fib_path_get_index(path));
1061 }
1062
1063 /*
1064  * fib_path_create_special
1065  *
1066  * Create and initialise a new path object.
1067  * return the index of the path.
1068  */
1069 fib_node_index_t
1070 fib_path_create_special (fib_node_index_t pl_index,
1071                          fib_protocol_t nh_proto,
1072                          fib_path_cfg_flags_t flags,
1073                          const dpo_id_t *dpo)
1074 {
1075     fib_path_t *path;
1076
1077     pool_get(fib_path_pool, path);
1078     memset(path, 0, sizeof(*path));
1079
1080     fib_node_init(&path->fp_node,
1081                   FIB_NODE_TYPE_PATH);
1082     dpo_reset(&path->fp_dpo);
1083
1084     path->fp_pl_index = pl_index;
1085     path->fp_weight = 1;
1086     path->fp_nh_proto = nh_proto;
1087     path->fp_via_fib = FIB_NODE_INDEX_INVALID;
1088     path->fp_cfg_flags = flags;
1089
1090     if (FIB_PATH_CFG_FLAG_DROP & flags)
1091     {
1092         path->fp_type = FIB_PATH_TYPE_SPECIAL;
1093     }
1094     else if (FIB_PATH_CFG_FLAG_LOCAL & flags)
1095     {
1096         path->fp_type = FIB_PATH_TYPE_RECEIVE;
1097         path->attached.fp_interface = FIB_NODE_INDEX_INVALID;
1098     }
1099     else
1100     {
1101         path->fp_type = FIB_PATH_TYPE_EXCLUSIVE;
1102         ASSERT(NULL != dpo);
1103         dpo_copy(&path->exclusive.fp_ex_dpo, dpo);
1104     }
1105
1106     return (fib_path_get_index(path));
1107 }
1108
1109 /*
1110  * fib_path_copy
1111  *
1112  * Copy a path. return index of new path.
1113  */
1114 fib_node_index_t
1115 fib_path_copy (fib_node_index_t path_index,
1116                fib_node_index_t path_list_index)
1117 {
1118     fib_path_t *path, *orig_path;
1119
1120     pool_get(fib_path_pool, path);
1121
1122     orig_path = fib_path_get(path_index);
1123     ASSERT(NULL != orig_path);
1124
1125     memcpy(path, orig_path, sizeof(*path));
1126
1127     FIB_PATH_DBG(path, "create-copy:%d", path_index);
1128
1129     /*
1130      * reset the dynamic section
1131      */
1132     fib_node_init(&path->fp_node, FIB_NODE_TYPE_PATH);
1133     path->fp_oper_flags     = FIB_PATH_OPER_FLAG_NONE;
1134     path->fp_pl_index  = path_list_index;
1135     path->fp_via_fib   = FIB_NODE_INDEX_INVALID;
1136     memset(&path->fp_dpo, 0, sizeof(path->fp_dpo));
1137     dpo_reset(&path->fp_dpo);
1138
1139     return (fib_path_get_index(path));
1140 }
1141
1142 /*
1143  * fib_path_destroy
1144  *
1145  * destroy a path that is no longer required
1146  */
1147 void
1148 fib_path_destroy (fib_node_index_t path_index)
1149 {
1150     fib_path_t *path;
1151
1152     path = fib_path_get(path_index);
1153
1154     ASSERT(NULL != path);
1155     FIB_PATH_DBG(path, "destroy");
1156
1157     fib_path_unresolve(path);
1158
1159     fib_node_deinit(&path->fp_node);
1160     pool_put(fib_path_pool, path);
1161 }
1162
1163 /*
1164  * fib_path_destroy
1165  *
1166  * destroy a path that is no longer required
1167  */
1168 uword
1169 fib_path_hash (fib_node_index_t path_index)
1170 {
1171     fib_path_t *path;
1172
1173     path = fib_path_get(path_index);
1174
1175     return (hash_memory(STRUCT_MARK_PTR(path, path_hash_start),
1176                         (STRUCT_OFFSET_OF(fib_path_t, path_hash_end) -
1177                          STRUCT_OFFSET_OF(fib_path_t, path_hash_start)),
1178                         0));
1179 }
1180
1181 /*
1182  * fib_path_cmp_i
1183  *
1184  * Compare two paths for equivalence.
1185  */
1186 static int
1187 fib_path_cmp_i (const fib_path_t *path1,
1188                 const fib_path_t *path2)
1189 {
1190     int res;
1191
1192     res = 1;
1193
1194     /*
1195      * paths of different types and protocol are not equal.
1196      * different weights only are the same path.
1197      */
1198     if (path1->fp_type != path2->fp_type)
1199     {
1200         res = (path1->fp_type - path2->fp_type);
1201     }
1202     else if (path1->fp_nh_proto != path2->fp_nh_proto)
1203     {
1204         res = (path1->fp_nh_proto - path2->fp_nh_proto);
1205     }
1206     else
1207     {
1208         /*
1209          * both paths are of the same type.
1210          * consider each type and its attributes in turn.
1211          */
1212         switch (path1->fp_type)
1213         {
1214         case FIB_PATH_TYPE_ATTACHED_NEXT_HOP:
1215             res = ip46_address_cmp(&path1->attached_next_hop.fp_nh,
1216                                    &path2->attached_next_hop.fp_nh);
1217             if (0 == res) {
1218                 res = vnet_sw_interface_compare(
1219                           vnet_get_main(),
1220                           path1->attached_next_hop.fp_interface,
1221                           path2->attached_next_hop.fp_interface);
1222             }
1223             break;
1224         case FIB_PATH_TYPE_ATTACHED:
1225             res = vnet_sw_interface_compare(
1226                       vnet_get_main(),
1227                       path1->attached.fp_interface,
1228                       path2->attached.fp_interface);
1229             break;
1230         case FIB_PATH_TYPE_RECURSIVE:
1231             res = ip46_address_cmp(&path1->recursive.fp_nh,
1232                                    &path2->recursive.fp_nh);
1233  
1234             if (0 == res)
1235             {
1236                 res = (path1->recursive.fp_tbl_id - path2->recursive.fp_tbl_id);
1237             }
1238             break;
1239         case FIB_PATH_TYPE_DEAG:
1240             res = (path1->deag.fp_tbl_id - path2->deag.fp_tbl_id);
1241             break;
1242         case FIB_PATH_TYPE_SPECIAL:
1243         case FIB_PATH_TYPE_RECEIVE:
1244         case FIB_PATH_TYPE_EXCLUSIVE:
1245             res = 0;
1246             break;
1247         }
1248     }
1249     return (res);
1250 }
1251
1252 /*
1253  * fib_path_cmp_for_sort
1254  *
1255  * Compare two paths for equivalence. Used during path sorting.
1256  * As usual 0 means equal.
1257  */
1258 int
1259 fib_path_cmp_for_sort (void * v1,
1260                        void * v2)
1261 {
1262     fib_node_index_t *pi1 = v1, *pi2 = v2;
1263     fib_path_t *path1, *path2;
1264
1265     path1 = fib_path_get(*pi1);
1266     path2 = fib_path_get(*pi2);
1267
1268     return (fib_path_cmp_i(path1, path2));
1269 }
1270
1271 /*
1272  * fib_path_cmp
1273  *
1274  * Compare two paths for equivalence.
1275  */
1276 int
1277 fib_path_cmp (fib_node_index_t pi1,
1278               fib_node_index_t pi2)
1279 {
1280     fib_path_t *path1, *path2;
1281
1282     path1 = fib_path_get(pi1);
1283     path2 = fib_path_get(pi2);
1284
1285     return (fib_path_cmp_i(path1, path2));
1286 }
1287
1288 int
1289 fib_path_cmp_w_route_path (fib_node_index_t path_index,
1290                            const fib_route_path_t *rpath)
1291 {
1292     fib_path_t *path;
1293     int res;
1294
1295     path = fib_path_get(path_index);
1296
1297     res = 1;
1298
1299     if (path->fp_weight != rpath->frp_weight)
1300     {
1301         res = (path->fp_weight - rpath->frp_weight);
1302     }
1303     else
1304     {
1305         /*
1306          * both paths are of the same type.
1307          * consider each type and its attributes in turn.
1308          */
1309         switch (path->fp_type)
1310         {
1311         case FIB_PATH_TYPE_ATTACHED_NEXT_HOP:
1312             res = ip46_address_cmp(&path->attached_next_hop.fp_nh,
1313                                    &rpath->frp_addr);
1314             if (0 == res)
1315             {
1316                 res = vnet_sw_interface_compare(
1317                           vnet_get_main(),
1318                           path->attached_next_hop.fp_interface,
1319                           rpath->frp_sw_if_index);
1320             }
1321             break;
1322         case FIB_PATH_TYPE_ATTACHED:
1323             res = vnet_sw_interface_compare(
1324                       vnet_get_main(),
1325                       path->attached.fp_interface,
1326                       rpath->frp_sw_if_index);
1327             break;
1328         case FIB_PATH_TYPE_RECURSIVE:
1329             if (FIB_PROTOCOL_MPLS == path->fp_nh_proto)
1330             {
1331                 res = path->recursive.fp_nh.fp_local_label - rpath->frp_local_label;
1332             }
1333             else
1334             {
1335                 res = ip46_address_cmp(&path->recursive.fp_nh.fp_ip,
1336                                        &rpath->frp_addr);
1337             }
1338
1339             if (0 == res)
1340             {
1341                 res = (path->recursive.fp_tbl_id - rpath->frp_fib_index);
1342             }
1343             break;
1344         case FIB_PATH_TYPE_DEAG:
1345             res = (path->deag.fp_tbl_id - rpath->frp_fib_index);
1346             break;
1347         case FIB_PATH_TYPE_SPECIAL:
1348         case FIB_PATH_TYPE_RECEIVE:
1349         case FIB_PATH_TYPE_EXCLUSIVE:
1350             res = 0;
1351             break;
1352         }
1353     }
1354     return (res);
1355 }
1356
1357 /*
1358  * fib_path_recursive_loop_detect
1359  *
1360  * A forward walk of the FIB object graph to detect for a cycle/loop. This
1361  * walk is initiated when an entry is linking to a new path list or from an old.
1362  * The entry vector passed contains all the FIB entrys that are children of this
1363  * path (it is all the entries encountered on the walk so far). If this vector
1364  * contains the entry this path resolve via, then a loop is about to form.
1365  * The loop must be allowed to form, since we need the dependencies in place
1366  * so that we can track when the loop breaks.
1367  * However, we MUST not produce a loop in the forwarding graph (else packets
1368  * would loop around the switch path until the loop breaks), so we mark recursive
1369  * paths as looped so that they do not contribute forwarding information.
1370  * By marking the path as looped, an etry such as;
1371  *    X/Y
1372  *     via a.a.a.a (looped)
1373  *     via b.b.b.b (not looped)
1374  * can still forward using the info provided by b.b.b.b only
1375  */
1376 int
1377 fib_path_recursive_loop_detect (fib_node_index_t path_index,
1378                                 fib_node_index_t **entry_indicies)
1379 {
1380     fib_path_t *path;
1381
1382     path = fib_path_get(path_index);
1383
1384     /*
1385      * the forced drop path is never looped, cos it is never resolved.
1386      */
1387     if (fib_path_is_permanent_drop(path))
1388     {
1389         return (0);
1390     }
1391
1392     switch (path->fp_type)
1393     {
1394     case FIB_PATH_TYPE_RECURSIVE:
1395     {
1396         fib_node_index_t *entry_index, *entries;
1397         int looped = 0;
1398         entries = *entry_indicies;
1399
1400         vec_foreach(entry_index, entries) {
1401             if (*entry_index == path->fp_via_fib)
1402             {
1403                 /*
1404                  * the entry that is about to link to this path-list (or
1405                  * one of this path-list's children) is the same entry that
1406                  * this recursive path resolves through. this is a cycle.
1407                  * abort the walk.
1408                  */
1409                 looped = 1;
1410                 break;
1411             }
1412         }
1413
1414         if (looped)
1415         {
1416             FIB_PATH_DBG(path, "recursive loop formed");
1417             path->fp_oper_flags |= FIB_PATH_OPER_FLAG_RECURSIVE_LOOP;
1418
1419             dpo_copy(&path->fp_dpo,
1420                     drop_dpo_get(fib_proto_to_dpo(path->fp_nh_proto)));
1421         }
1422         else
1423         {
1424             /*
1425              * no loop here yet. keep forward walking the graph.
1426              */     
1427             if (fib_entry_recursive_loop_detect(path->fp_via_fib, entry_indicies))
1428             {
1429                 FIB_PATH_DBG(path, "recursive loop formed");
1430                 path->fp_oper_flags |= FIB_PATH_OPER_FLAG_RECURSIVE_LOOP;
1431             }
1432             else
1433             {
1434                 FIB_PATH_DBG(path, "recursive loop cleared");
1435                 path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RECURSIVE_LOOP;
1436             }
1437         }
1438         break;
1439     }
1440     case FIB_PATH_TYPE_ATTACHED_NEXT_HOP:
1441     case FIB_PATH_TYPE_ATTACHED:
1442     case FIB_PATH_TYPE_SPECIAL:
1443     case FIB_PATH_TYPE_DEAG:
1444     case FIB_PATH_TYPE_RECEIVE:
1445     case FIB_PATH_TYPE_EXCLUSIVE:
1446         /*
1447          * these path types cannot be part of a loop, since they are the leaves
1448          * of the graph.
1449          */
1450         break;
1451     }
1452
1453     return (fib_path_is_looped(path_index));
1454 }
1455
1456 int
1457 fib_path_resolve (fib_node_index_t path_index)
1458 {
1459     fib_path_t *path;
1460
1461     path = fib_path_get(path_index);
1462
1463     /*
1464      * hope for the best.
1465      */
1466     path->fp_oper_flags |= FIB_PATH_OPER_FLAG_RESOLVED;
1467
1468     /*
1469      * the forced drop path resolves via the drop adj
1470      */
1471     if (fib_path_is_permanent_drop(path))
1472     {
1473         dpo_copy(&path->fp_dpo,
1474                  drop_dpo_get(fib_proto_to_dpo(path->fp_nh_proto)));
1475         path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RESOLVED;
1476         return (fib_path_is_resolved(path_index));
1477     }
1478
1479     switch (path->fp_type)
1480     {
1481     case FIB_PATH_TYPE_ATTACHED_NEXT_HOP:
1482         fib_path_attached_next_hop_set(path);
1483         break;
1484     case FIB_PATH_TYPE_ATTACHED:
1485         /*
1486          * path->attached.fp_interface
1487          */
1488         if (!vnet_sw_interface_is_admin_up(vnet_get_main(),
1489                                            path->attached.fp_interface))
1490         {
1491             path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RESOLVED;
1492         }
1493         if (vnet_sw_interface_is_p2p(vnet_get_main(),
1494                                      path->attached.fp_interface))
1495         {
1496             /*
1497              * point-2-point interfaces do not require a glean, since
1498              * there is nothing to ARP. Install a rewrite/nbr adj instead
1499              */
1500             dpo_set(&path->fp_dpo,
1501                     DPO_ADJACENCY,
1502                     fib_proto_to_dpo(path->fp_nh_proto),
1503                     adj_nbr_add_or_lock(
1504                         path->fp_nh_proto,
1505                         fib_proto_to_link(path->fp_nh_proto),
1506                         &zero_addr,
1507                         path->attached.fp_interface));
1508         }
1509         else
1510         {
1511             dpo_set(&path->fp_dpo,
1512                     DPO_ADJACENCY_GLEAN,
1513                     fib_proto_to_dpo(path->fp_nh_proto),
1514                     adj_glean_add_or_lock(path->fp_nh_proto,
1515                                           path->attached.fp_interface,
1516                                           NULL));
1517         }
1518         /*
1519          * become a child of the adjacency so we receive updates
1520          * when the interface state changes
1521          */
1522         path->fp_sibling = adj_child_add(path->fp_dpo.dpoi_index,
1523                                          FIB_NODE_TYPE_PATH,
1524                                          fib_path_get_index(path));
1525
1526         break;
1527     case FIB_PATH_TYPE_RECURSIVE:
1528     {
1529         /*
1530          * Create a RR source entry in the table for the address
1531          * that this path recurses through.
1532          * This resolve action is recursive, hence we may create
1533          * more paths in the process. more creates mean maybe realloc
1534          * of this path.
1535          */
1536         fib_node_index_t fei;
1537         fib_prefix_t pfx;
1538
1539         ASSERT(FIB_NODE_INDEX_INVALID == path->fp_via_fib);
1540
1541         if (FIB_PROTOCOL_MPLS == path->fp_nh_proto)
1542         {
1543             fib_prefix_from_mpls_label(path->recursive.fp_nh.fp_local_label, &pfx);
1544         }
1545         else
1546         {
1547             fib_prefix_from_ip46_addr(&path->recursive.fp_nh.fp_ip, &pfx);
1548         }
1549
1550         fei = fib_table_entry_special_add(path->recursive.fp_tbl_id,
1551                                           &pfx,
1552                                           FIB_SOURCE_RR,
1553                                           FIB_ENTRY_FLAG_NONE,
1554                                           ADJ_INDEX_INVALID);
1555
1556         path = fib_path_get(path_index);
1557         path->fp_via_fib = fei;
1558
1559         /*
1560          * become a dependent child of the entry so the path is 
1561          * informed when the forwarding for the entry changes.
1562          */
1563         path->fp_sibling = fib_entry_child_add(path->fp_via_fib,
1564                                                FIB_NODE_TYPE_PATH,
1565                                                fib_path_get_index(path));
1566
1567         /*
1568          * create and configure the IP DPO
1569          */
1570         fib_path_recursive_adj_update(
1571             path,
1572             fib_path_proto_to_chain_type(path->fp_nh_proto),
1573             &path->fp_dpo);
1574
1575         break;
1576     }
1577     case FIB_PATH_TYPE_SPECIAL:
1578         /*
1579          * Resolve via the drop
1580          */
1581         dpo_copy(&path->fp_dpo,
1582                  drop_dpo_get(fib_proto_to_dpo(path->fp_nh_proto)));
1583         break;
1584     case FIB_PATH_TYPE_DEAG:
1585         /*
1586          * Resolve via a lookup DPO.
1587          * FIXME. control plane should add routes with a table ID
1588          */
1589         lookup_dpo_add_or_lock_w_fib_index(path->deag.fp_tbl_id,
1590                                           fib_proto_to_dpo(path->fp_nh_proto),
1591                                           LOOKUP_INPUT_DST_ADDR,
1592                                           LOOKUP_TABLE_FROM_CONFIG,
1593                                           &path->fp_dpo);
1594         break;
1595     case FIB_PATH_TYPE_RECEIVE:
1596         /*
1597          * Resolve via a receive DPO.
1598          */
1599         receive_dpo_add_or_lock(fib_proto_to_dpo(path->fp_nh_proto),
1600                                 path->receive.fp_interface,
1601                                 &path->receive.fp_addr,
1602                                 &path->fp_dpo);
1603         break;
1604     case FIB_PATH_TYPE_EXCLUSIVE:
1605         /*
1606          * Resolve via the user provided DPO
1607          */
1608         dpo_copy(&path->fp_dpo, &path->exclusive.fp_ex_dpo);
1609         break;
1610     }
1611
1612     return (fib_path_is_resolved(path_index));
1613 }
1614
1615 u32
1616 fib_path_get_resolving_interface (fib_node_index_t path_index)
1617 {
1618     fib_path_t *path;
1619
1620     path = fib_path_get(path_index);
1621
1622     switch (path->fp_type)
1623     {
1624     case FIB_PATH_TYPE_ATTACHED_NEXT_HOP:
1625         return (path->attached_next_hop.fp_interface);
1626     case FIB_PATH_TYPE_ATTACHED:
1627         return (path->attached.fp_interface);
1628     case FIB_PATH_TYPE_RECEIVE:
1629         return (path->receive.fp_interface);
1630     case FIB_PATH_TYPE_RECURSIVE:
1631         return (fib_entry_get_resolving_interface(path->fp_via_fib));    
1632     case FIB_PATH_TYPE_SPECIAL:
1633     case FIB_PATH_TYPE_DEAG:
1634     case FIB_PATH_TYPE_EXCLUSIVE:
1635         break;
1636     }
1637     return (~0);
1638 }
1639
1640 adj_index_t
1641 fib_path_get_adj (fib_node_index_t path_index)
1642 {
1643     fib_path_t *path;
1644
1645     path = fib_path_get(path_index);
1646
1647     ASSERT(dpo_is_adj(&path->fp_dpo));
1648     if (dpo_is_adj(&path->fp_dpo))
1649     {
1650         return (path->fp_dpo.dpoi_index);
1651     }
1652     return (ADJ_INDEX_INVALID);
1653 }
1654
1655 int
1656 fib_path_get_weight (fib_node_index_t path_index)
1657 {
1658     fib_path_t *path;
1659
1660     path = fib_path_get(path_index);
1661
1662     ASSERT(path);
1663
1664     return (path->fp_weight);
1665 }
1666
1667 /**
1668  * @brief Contribute the path's adjacency to the list passed.
1669  * By calling this function over all paths, recursively, a child
1670  * can construct its full set of forwarding adjacencies, and hence its
1671  * uRPF list.
1672  */
1673 void
1674 fib_path_contribute_urpf (fib_node_index_t path_index,
1675                           index_t urpf)
1676 {
1677     fib_path_t *path;
1678
1679     if (!fib_path_is_resolved(path_index))
1680         return;
1681
1682     path = fib_path_get(path_index);
1683
1684     switch (path->fp_type)
1685     {
1686     case FIB_PATH_TYPE_ATTACHED_NEXT_HOP:
1687         fib_urpf_list_append(urpf, path->attached_next_hop.fp_interface);
1688         break;
1689
1690     case FIB_PATH_TYPE_ATTACHED:
1691         fib_urpf_list_append(urpf, path->attached.fp_interface);
1692         break;
1693
1694     case FIB_PATH_TYPE_RECURSIVE:
1695         fib_entry_contribute_urpf(path->fp_via_fib, urpf);
1696         break;
1697
1698     case FIB_PATH_TYPE_EXCLUSIVE:
1699     case FIB_PATH_TYPE_SPECIAL:
1700         /*
1701          * these path types may link to an adj, if that's what
1702          * the clinet gave
1703          */
1704         if (dpo_is_adj(&path->fp_dpo))
1705         {
1706             ip_adjacency_t *adj;
1707
1708             adj = adj_get(path->fp_dpo.dpoi_index);
1709
1710             fib_urpf_list_append(urpf, adj->rewrite_header.sw_if_index);
1711         }
1712         break;
1713
1714     case FIB_PATH_TYPE_DEAG:
1715     case FIB_PATH_TYPE_RECEIVE:
1716         /*
1717          * these path types don't link to an adj
1718          */
1719         break;
1720     }
1721 }
1722
1723 void
1724 fib_path_contribute_forwarding (fib_node_index_t path_index,
1725                                 fib_forward_chain_type_t fct,
1726                                 dpo_id_t *dpo)
1727 {
1728     fib_path_t *path;
1729
1730     path = fib_path_get(path_index);
1731
1732     ASSERT(path);
1733     ASSERT(FIB_FORW_CHAIN_TYPE_MPLS_EOS != fct);
1734
1735     FIB_PATH_DBG(path, "contribute");
1736
1737     /*
1738      * The DPO stored in the path was created when the path was resolved.
1739      * This then represents the path's 'native' protocol; IP.
1740      * For all others will need to go find something else.
1741      */
1742     if (fib_path_proto_to_chain_type(path->fp_nh_proto) == fct)
1743     {
1744         dpo_copy(dpo, &path->fp_dpo);
1745     }
1746     else
1747     {
1748         switch (path->fp_type)
1749         {
1750         case FIB_PATH_TYPE_ATTACHED_NEXT_HOP:
1751             switch (fct)
1752             {
1753             case FIB_FORW_CHAIN_TYPE_UNICAST_IP4:
1754             case FIB_FORW_CHAIN_TYPE_UNICAST_IP6:
1755             case FIB_FORW_CHAIN_TYPE_MPLS_EOS:
1756             case FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS:
1757             case FIB_FORW_CHAIN_TYPE_ETHERNET:
1758             {
1759                 adj_index_t ai;
1760
1761                 /*
1762                  * get a appropriate link type adj.
1763                  */
1764                 ai = fib_path_attached_next_hop_get_adj(
1765                          path,
1766                          fib_forw_chain_type_to_link_type(fct));
1767                 dpo_set(dpo, DPO_ADJACENCY,
1768                         fib_forw_chain_type_to_dpo_proto(fct), ai);
1769                 adj_unlock(ai);
1770
1771                 break;
1772             }
1773             case FIB_FORW_CHAIN_TYPE_MCAST_IP4:
1774             case FIB_FORW_CHAIN_TYPE_MCAST_IP6:
1775             break;
1776             }
1777             break;
1778         case FIB_PATH_TYPE_RECURSIVE:
1779             switch (fct)
1780             {
1781             case FIB_FORW_CHAIN_TYPE_MPLS_EOS:
1782             case FIB_FORW_CHAIN_TYPE_UNICAST_IP4:
1783             case FIB_FORW_CHAIN_TYPE_UNICAST_IP6:
1784             case FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS:
1785                 fib_path_recursive_adj_update(path, fct, dpo);
1786                 break;
1787             case FIB_FORW_CHAIN_TYPE_MCAST_IP4:
1788             case FIB_FORW_CHAIN_TYPE_MCAST_IP6:
1789             case FIB_FORW_CHAIN_TYPE_ETHERNET:
1790                 ASSERT(0);
1791                 break;
1792             }
1793             break;
1794         case FIB_PATH_TYPE_DEAG:
1795             switch (fct)
1796             {
1797             case FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS:
1798                 lookup_dpo_add_or_lock_w_table_id(MPLS_FIB_DEFAULT_TABLE_ID,
1799                                                   DPO_PROTO_MPLS,
1800                                                   LOOKUP_INPUT_DST_ADDR,
1801                                                   LOOKUP_TABLE_FROM_CONFIG,
1802                                                   dpo);
1803                 break;
1804             case FIB_FORW_CHAIN_TYPE_UNICAST_IP4:
1805             case FIB_FORW_CHAIN_TYPE_UNICAST_IP6:
1806             case FIB_FORW_CHAIN_TYPE_MPLS_EOS:
1807                 dpo_copy(dpo, &path->fp_dpo);
1808                 break;
1809             case FIB_FORW_CHAIN_TYPE_MCAST_IP4:
1810             case FIB_FORW_CHAIN_TYPE_MCAST_IP6:
1811             case FIB_FORW_CHAIN_TYPE_ETHERNET:
1812                 ASSERT(0);
1813                 break;
1814             }
1815             break;
1816         case FIB_PATH_TYPE_EXCLUSIVE:
1817             dpo_copy(dpo, &path->exclusive.fp_ex_dpo);
1818             break;
1819         case FIB_PATH_TYPE_ATTACHED:
1820             switch (fct)
1821             {
1822             case FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS:
1823             case FIB_FORW_CHAIN_TYPE_UNICAST_IP4:
1824             case FIB_FORW_CHAIN_TYPE_UNICAST_IP6:
1825             case FIB_FORW_CHAIN_TYPE_MPLS_EOS:
1826             case FIB_FORW_CHAIN_TYPE_ETHERNET:
1827                 break;
1828             case FIB_FORW_CHAIN_TYPE_MCAST_IP4:
1829             case FIB_FORW_CHAIN_TYPE_MCAST_IP6:
1830                 {
1831                     adj_index_t ai;
1832
1833                     /*
1834                      * Create the adj needed for sending IP multicast traffic
1835                      */
1836                     ai = adj_mcast_add_or_lock(path->fp_nh_proto,
1837                                                fib_forw_chain_type_to_link_type(fct),
1838                                                path->attached.fp_interface);
1839                     dpo_set(dpo, DPO_ADJACENCY_MCAST,
1840                             fib_forw_chain_type_to_dpo_proto(fct),
1841                             ai);
1842                     adj_unlock(ai);
1843                 }
1844                 break;
1845             }
1846             break;
1847         case FIB_PATH_TYPE_RECEIVE:
1848         case FIB_PATH_TYPE_SPECIAL:
1849             dpo_copy(dpo, &path->fp_dpo);
1850             break;
1851         }
1852     }
1853 }
1854
1855 load_balance_path_t *
1856 fib_path_append_nh_for_multipath_hash (fib_node_index_t path_index,
1857                                        fib_forward_chain_type_t fct,
1858                                        load_balance_path_t *hash_key)
1859 {
1860     load_balance_path_t *mnh;
1861     fib_path_t *path;
1862
1863     path = fib_path_get(path_index);
1864
1865     ASSERT(path);
1866
1867     if (fib_path_is_resolved(path_index))
1868     {
1869         vec_add2(hash_key, mnh, 1);
1870
1871         mnh->path_weight = path->fp_weight;
1872         mnh->path_index = path_index;
1873         fib_path_contribute_forwarding(path_index, fct, &mnh->path_dpo);
1874     }
1875
1876     return (hash_key);
1877 }
1878
1879 int
1880 fib_path_is_recursive (fib_node_index_t path_index)
1881 {
1882     fib_path_t *path;
1883
1884     path = fib_path_get(path_index);
1885
1886     return (FIB_PATH_TYPE_RECURSIVE == path->fp_type);
1887 }
1888
1889 int
1890 fib_path_is_exclusive (fib_node_index_t path_index)
1891 {
1892     fib_path_t *path;
1893
1894     path = fib_path_get(path_index);
1895
1896     return (FIB_PATH_TYPE_EXCLUSIVE == path->fp_type);
1897 }
1898
1899 int
1900 fib_path_is_deag (fib_node_index_t path_index)
1901 {
1902     fib_path_t *path;
1903
1904     path = fib_path_get(path_index);
1905
1906     return (FIB_PATH_TYPE_DEAG == path->fp_type);
1907 }
1908
1909 int
1910 fib_path_is_resolved (fib_node_index_t path_index)
1911 {
1912     fib_path_t *path;
1913
1914     path = fib_path_get(path_index);
1915
1916     return (dpo_id_is_valid(&path->fp_dpo) &&
1917             (path->fp_oper_flags & FIB_PATH_OPER_FLAG_RESOLVED) &&
1918             !fib_path_is_looped(path_index) &&
1919             !fib_path_is_permanent_drop(path));
1920 }
1921
1922 int
1923 fib_path_is_looped (fib_node_index_t path_index)
1924 {
1925     fib_path_t *path;
1926
1927     path = fib_path_get(path_index);
1928
1929     return (path->fp_oper_flags & FIB_PATH_OPER_FLAG_RECURSIVE_LOOP);
1930 }
1931
1932 int
1933 fib_path_encode (fib_node_index_t path_list_index,
1934                  fib_node_index_t path_index,
1935                  void *ctx)
1936 {
1937     fib_route_path_encode_t **api_rpaths = ctx;
1938     fib_route_path_encode_t *api_rpath;
1939     fib_path_t *path;
1940
1941     path = fib_path_get(path_index);
1942     if (!path)
1943       return (0);
1944     vec_add2(*api_rpaths, api_rpath, 1);
1945     api_rpath->rpath.frp_weight = path->fp_weight;
1946     api_rpath->rpath.frp_proto = path->fp_nh_proto;
1947     api_rpath->rpath.frp_sw_if_index = ~0;
1948     api_rpath->dpo = path->exclusive.fp_ex_dpo;
1949     switch (path->fp_type)
1950       {
1951       case FIB_PATH_TYPE_RECEIVE:
1952         api_rpath->rpath.frp_addr = path->receive.fp_addr;
1953         api_rpath->rpath.frp_sw_if_index = path->receive.fp_interface;
1954         break;
1955       case FIB_PATH_TYPE_ATTACHED:
1956         api_rpath->rpath.frp_sw_if_index = path->attached.fp_interface;
1957         break;
1958       case FIB_PATH_TYPE_ATTACHED_NEXT_HOP:
1959         api_rpath->rpath.frp_sw_if_index = path->attached_next_hop.fp_interface;
1960         api_rpath->rpath.frp_addr = path->attached_next_hop.fp_nh;
1961         break;
1962       case FIB_PATH_TYPE_SPECIAL:
1963         break;
1964       case FIB_PATH_TYPE_DEAG:
1965         break;
1966       case FIB_PATH_TYPE_RECURSIVE:
1967         api_rpath->rpath.frp_addr = path->recursive.fp_nh.fp_ip;
1968         break;
1969       default:
1970         break;
1971       }
1972     return (1);
1973 }
1974
1975 fib_protocol_t
1976 fib_path_get_proto (fib_node_index_t path_index)
1977 {
1978     fib_path_t *path;
1979
1980     path = fib_path_get(path_index);
1981
1982     return (path->fp_nh_proto);
1983 }
1984
1985 void
1986 fib_path_module_init (void)
1987 {
1988     fib_node_register_type (FIB_NODE_TYPE_PATH, &fib_path_vft);
1989 }
1990
1991 static clib_error_t *
1992 show_fib_path_command (vlib_main_t * vm,
1993                         unformat_input_t * input,
1994                         vlib_cli_command_t * cmd)
1995 {
1996     fib_node_index_t pi;
1997     fib_path_t *path;
1998
1999     if (unformat (input, "%d", &pi))
2000     {
2001         /*
2002          * show one in detail
2003          */
2004         if (!pool_is_free_index(fib_path_pool, pi))
2005         {
2006             path = fib_path_get(pi);
2007             u8 *s = fib_path_format(pi, NULL);
2008             s = format(s, "children:");
2009             s = fib_node_children_format(path->fp_node.fn_children, s);
2010             vlib_cli_output (vm, "%s", s);
2011             vec_free(s);
2012         }
2013         else
2014         {
2015             vlib_cli_output (vm, "path %d invalid", pi);
2016         }
2017     }
2018     else
2019     {
2020         vlib_cli_output (vm, "FIB Paths");
2021         pool_foreach(path, fib_path_pool,
2022         ({
2023             vlib_cli_output (vm, "%U", format_fib_path, path);
2024         }));
2025     }
2026
2027     return (NULL);
2028 }
2029
2030 VLIB_CLI_COMMAND (show_fib_path, static) = {
2031   .path = "show fib paths",
2032   .function = show_fib_path_command,
2033   .short_help = "show fib paths",
2034 };