Reorganize source tree to use single autotools instance
[vpp.git] / src / vnet / fib / fib_path.c
1 /*
2  * Copyright (c) 2016 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15
16 #include <vlib/vlib.h>
17 #include <vnet/vnet.h>
18 #include <vnet/ip/format.h>
19 #include <vnet/ip/ip.h>
20 #include <vnet/dpo/drop_dpo.h>
21 #include <vnet/dpo/receive_dpo.h>
22 #include <vnet/dpo/load_balance_map.h>
23 #include <vnet/dpo/lookup_dpo.h>
24
25 #include <vnet/adj/adj.h>
26
27 #include <vnet/fib/fib_path.h>
28 #include <vnet/fib/fib_node.h>
29 #include <vnet/fib/fib_table.h>
30 #include <vnet/fib/fib_entry.h>
31 #include <vnet/fib/fib_path_list.h>
32 #include <vnet/fib/fib_internal.h>
33 #include <vnet/fib/fib_urpf_list.h>
34
35 /**
36  * Enurmeration of path types
37  */
38 typedef enum fib_path_type_t_ {
39     /**
40      * Marker. Add new types after this one.
41      */
42     FIB_PATH_TYPE_FIRST = 0,
43     /**
44      * Attached-nexthop. An interface and a nexthop are known.
45      */
46     FIB_PATH_TYPE_ATTACHED_NEXT_HOP = FIB_PATH_TYPE_FIRST,
47     /**
48      * attached. Only the interface is known.
49      */
50     FIB_PATH_TYPE_ATTACHED,
51     /**
52      * recursive. Only the next-hop is known.
53      */
54     FIB_PATH_TYPE_RECURSIVE,
55     /**
56      * special. nothing is known. so we drop.
57      */
58     FIB_PATH_TYPE_SPECIAL,
59     /**
60      * exclusive. user provided adj.
61      */
62     FIB_PATH_TYPE_EXCLUSIVE,
63     /**
64      * deag. Link to a lookup adj in the next table
65      */
66     FIB_PATH_TYPE_DEAG,
67     /**
68      * receive. it's for-us.
69      */
70     FIB_PATH_TYPE_RECEIVE,
71     /**
72      * Marker. Add new types before this one, then update it.
73      */
74     FIB_PATH_TYPE_LAST = FIB_PATH_TYPE_RECEIVE,
75 } __attribute__ ((packed)) fib_path_type_t;
76
77 /**
78  * The maximum number of path_types
79  */
80 #define FIB_PATH_TYPE_MAX (FIB_PATH_TYPE_LAST + 1)
81
82 #define FIB_PATH_TYPES {                                        \
83     [FIB_PATH_TYPE_ATTACHED_NEXT_HOP] = "attached-nexthop",     \
84     [FIB_PATH_TYPE_ATTACHED]          = "attached",             \
85     [FIB_PATH_TYPE_RECURSIVE]         = "recursive",            \
86     [FIB_PATH_TYPE_SPECIAL]           = "special",              \
87     [FIB_PATH_TYPE_EXCLUSIVE]         = "exclusive",            \
88     [FIB_PATH_TYPE_DEAG]              = "deag",                 \
89     [FIB_PATH_TYPE_RECEIVE]           = "receive",              \
90 }
91
92 #define FOR_EACH_FIB_PATH_TYPE(_item) \
93     for (_item = FIB_PATH_TYPE_FIRST; _item <= FIB_PATH_TYPE_LAST; _item++)
94
95 /**
96  * Enurmeration of path operational (i.e. derived) attributes
97  */
98 typedef enum fib_path_oper_attribute_t_ {
99     /**
100      * Marker. Add new types after this one.
101      */
102     FIB_PATH_OPER_ATTRIBUTE_FIRST = 0,
103     /**
104      * The path forms part of a recursive loop.
105      */
106     FIB_PATH_OPER_ATTRIBUTE_RECURSIVE_LOOP = FIB_PATH_OPER_ATTRIBUTE_FIRST,
107     /**
108      * The path is resolved
109      */
110     FIB_PATH_OPER_ATTRIBUTE_RESOLVED,
111     /**
112      * The path has become a permanent drop.
113      */
114     FIB_PATH_OPER_ATTRIBUTE_DROP,
115     /**
116      * Marker. Add new types before this one, then update it.
117      */
118     FIB_PATH_OPER_ATTRIBUTE_LAST = FIB_PATH_OPER_ATTRIBUTE_DROP,
119 } __attribute__ ((packed)) fib_path_oper_attribute_t;
120
121 /**
122  * The maximum number of path operational attributes
123  */
124 #define FIB_PATH_OPER_ATTRIBUTE_MAX (FIB_PATH_OPER_ATTRIBUTE_LAST + 1)
125
126 #define FIB_PATH_OPER_ATTRIBUTES {                                      \
127     [FIB_PATH_OPER_ATTRIBUTE_RECURSIVE_LOOP] = "recursive-loop",        \
128     [FIB_PATH_OPER_ATTRIBUTE_RESOLVED]       = "resolved",              \
129     [FIB_PATH_OPER_ATTRIBUTE_DROP]           = "drop",                  \
130 }
131
132 #define FOR_EACH_FIB_PATH_OPER_ATTRIBUTE(_item) \
133     for (_item = FIB_PATH_OPER_ATTRIBUTE_FIRST; \
134          _item <= FIB_PATH_OPER_ATTRIBUTE_LAST; \
135          _item++)
136
137 /**
138  * Path flags from the attributes
139  */
140 typedef enum fib_path_oper_flags_t_ {
141     FIB_PATH_OPER_FLAG_NONE = 0,
142     FIB_PATH_OPER_FLAG_RECURSIVE_LOOP = (1 << FIB_PATH_OPER_ATTRIBUTE_RECURSIVE_LOOP),
143     FIB_PATH_OPER_FLAG_DROP = (1 << FIB_PATH_OPER_ATTRIBUTE_DROP),
144     FIB_PATH_OPER_FLAG_RESOLVED = (1 << FIB_PATH_OPER_ATTRIBUTE_RESOLVED),
145 } __attribute__ ((packed)) fib_path_oper_flags_t;
146
147 /**
148  * A FIB path
149  */
150 typedef struct fib_path_t_ {
151     /**
152      * A path is a node in the FIB graph.
153      */
154     fib_node_t fp_node;
155
156     /**
157      * The index of the path-list to which this path belongs
158      */
159     u32 fp_pl_index;
160
161     /**
162      * This marks the start of the memory area used to hash
163      * the path
164      */
165     STRUCT_MARK(path_hash_start);
166
167     /**
168      * Configuration Flags
169      */
170     fib_path_cfg_flags_t fp_cfg_flags;
171
172     /**
173      * The type of the path. This is the selector for the union
174      */
175     fib_path_type_t fp_type;
176
177     /**
178      * The protocol of the next-hop, i.e. the address family of the
179      * next-hop's address. We can't derive this from the address itself
180      * since the address can be all zeros
181      */
182     fib_protocol_t fp_nh_proto;
183
184     /**
185      * UCMP [unnormalised] weigt
186      */
187     u32 fp_weight;
188
189     /**
190      * per-type union of the data required to resolve the path
191      */
192     union {
193         struct {
194             /**
195              * The next-hop
196              */
197             ip46_address_t fp_nh;
198             /**
199              * The interface
200              */
201             u32 fp_interface;
202         } attached_next_hop;
203         struct {
204             /**
205              * The interface
206              */
207             u32 fp_interface;
208         } attached;
209         struct {
210             union
211             {
212                 /**
213                  * The next-hop
214                  */
215                 ip46_address_t fp_ip;
216                 /**
217                  * The local label to resolve through.
218                  */
219                 mpls_label_t fp_local_label;
220             } fp_nh;
221             /**
222              * The FIB table index in which to find the next-hop.
223              * This needs to be fixed. We should lookup the adjacencies in
224              * a separate table of adjacencies, rather than from the FIB.
225              * Two reasons I can think of:
226              *   - consider:
227              *       int ip addr Gig0 10.0.0.1/24
228              *       ip route 10.0.0.2/32 via Gig1 192.168.1.2
229              *       ip route 1.1.1.1/32 via Gig0 10.0.0.2
230              *     this is perfectly valid.
231              *     Packets addressed to 10.0.0.2 should be sent via Gig1.
232              *     Packets address to 1.1.1.1 should be sent via Gig0.
233              *    when we perform the adj resolution from the FIB for the path
234              *    "via Gig0 10.0.0.2" the lookup will result in the route via Gig1
235              *    and so we will pick up the adj via Gig1 - which was not what the
236              *    operator wanted.
237              *  - we can only return link-type IPv4 and so not the link-type MPLS.
238              *    more on this in a later commit.
239              *
240              * The table ID should only belong to a recursive path and indicate
241              * which FIB should be used to resolve the next-hop.
242              */
243             fib_node_index_t fp_tbl_id;
244         } recursive;
245         struct {
246             /**
247              * The FIB index in which to perfom the next lookup
248              */
249             fib_node_index_t fp_tbl_id;
250         } deag;
251         struct {
252         } special;
253         struct {
254             /**
255              * The user provided 'exclusive' DPO
256              */
257             dpo_id_t fp_ex_dpo;
258         } exclusive;
259         struct {
260             /**
261              * The interface on which the local address is configured
262              */
263             u32 fp_interface;
264             /**
265              * The next-hop
266              */
267             ip46_address_t fp_addr;
268         } receive;
269     };
270     STRUCT_MARK(path_hash_end);
271
272     /**
273      * Memebers in this last section represent information that is
274      * dervied during resolution. It should not be copied to new paths
275      * nor compared.
276      */
277
278     /**
279      * Operational Flags
280      */
281     fib_path_oper_flags_t fp_oper_flags;
282
283     /**
284      * the resolving via fib. not part of the union, since it it not part
285      * of the path's hash.
286      */
287     fib_node_index_t fp_via_fib;
288
289     /**
290      * The Data-path objects through which this path resolves for IP.
291      */
292     dpo_id_t fp_dpo;
293
294     /**
295      * the index of this path in the parent's child list.
296      */
297     u32 fp_sibling;
298 } fib_path_t;
299
300 /*
301  * Array of strings/names for the path types and attributes
302  */
303 static const char *fib_path_type_names[] = FIB_PATH_TYPES;
304 static const char *fib_path_oper_attribute_names[] = FIB_PATH_OPER_ATTRIBUTES;
305 static const char *fib_path_cfg_attribute_names[]  = FIB_PATH_CFG_ATTRIBUTES;
306
307 /*
308  * The memory pool from which we allocate all the paths
309  */
310 static fib_path_t *fib_path_pool;
311
312 /*
313  * Debug macro
314  */
315 #ifdef FIB_DEBUG
316 #define FIB_PATH_DBG(_p, _fmt, _args...)                        \
317 {                                                               \
318     u8 *_tmp = NULL;                                            \
319     _tmp = fib_path_format(fib_path_get_index(_p), _tmp);       \
320     clib_warning("path:[%d:%s]:" _fmt,                          \
321                  fib_path_get_index(_p), _tmp,                  \
322                  ##_args);                                      \
323     vec_free(_tmp);                                             \
324 }
325 #else
326 #define FIB_PATH_DBG(_p, _fmt, _args...)
327 #endif
328
329 static fib_path_t *
330 fib_path_get (fib_node_index_t index)
331 {
332     return (pool_elt_at_index(fib_path_pool, index));
333 }
334
335 static fib_node_index_t 
336 fib_path_get_index (fib_path_t *path)
337 {
338     return (path - fib_path_pool);
339 }
340
341 static fib_node_t *
342 fib_path_get_node (fib_node_index_t index)
343 {
344     return ((fib_node_t*)fib_path_get(index));
345 }
346
347 static fib_path_t*
348 fib_path_from_fib_node (fib_node_t *node)
349 {
350 #if CLIB_DEBUG > 0
351     ASSERT(FIB_NODE_TYPE_PATH == node->fn_type);
352 #endif
353     return ((fib_path_t*)node);
354 }
355
356 u8 *
357 format_fib_path (u8 * s, va_list * args)
358 {
359     fib_path_t *path = va_arg (*args, fib_path_t *);
360     vnet_main_t * vnm = vnet_get_main();
361     fib_path_oper_attribute_t oattr;
362     fib_path_cfg_attribute_t cattr;
363
364     s = format (s, "      index:%d ", fib_path_get_index(path));
365     s = format (s, "pl-index:%d ", path->fp_pl_index);
366     s = format (s, "%U ", format_fib_protocol, path->fp_nh_proto);
367     s = format (s, "weight=%d ", path->fp_weight);
368     s = format (s, "%s: ", fib_path_type_names[path->fp_type]);
369     if (FIB_PATH_OPER_FLAG_NONE != path->fp_oper_flags) {
370         s = format(s, " oper-flags:");
371         FOR_EACH_FIB_PATH_OPER_ATTRIBUTE(oattr) {
372             if ((1<<oattr) & path->fp_oper_flags) {
373                 s = format (s, "%s,", fib_path_oper_attribute_names[oattr]);
374             }
375         }
376     }
377     if (FIB_PATH_CFG_FLAG_NONE != path->fp_cfg_flags) {
378         s = format(s, " cfg-flags:");
379         FOR_EACH_FIB_PATH_CFG_ATTRIBUTE(cattr) {
380             if ((1<<cattr) & path->fp_cfg_flags) {
381                 s = format (s, "%s,", fib_path_cfg_attribute_names[cattr]);
382             }
383         }
384     }
385     s = format(s, "\n       ");
386
387     switch (path->fp_type)
388     {
389     case FIB_PATH_TYPE_ATTACHED_NEXT_HOP:
390         s = format (s, "%U", format_ip46_address,
391                     &path->attached_next_hop.fp_nh,
392                     IP46_TYPE_ANY);
393         if (path->fp_oper_flags & FIB_PATH_OPER_FLAG_DROP)
394         {
395             s = format (s, " if_index:%d", path->attached_next_hop.fp_interface);
396         }
397         else
398         {
399             s = format (s, " %U",
400                         format_vnet_sw_interface_name,
401                         vnm,
402                         vnet_get_sw_interface(
403                             vnm,
404                             path->attached_next_hop.fp_interface));
405             if (vnet_sw_interface_is_p2p(vnet_get_main(),
406                                          path->attached_next_hop.fp_interface))
407             {
408                 s = format (s, " (p2p)");
409             }
410         }
411         if (!dpo_id_is_valid(&path->fp_dpo))
412         {
413             s = format(s, "\n          unresolved");
414         }
415         else
416         {
417             s = format(s, "\n          %U",
418                        format_dpo_id,
419                        &path->fp_dpo, 13);
420         }
421         break;
422     case FIB_PATH_TYPE_ATTACHED:
423         if (path->fp_oper_flags & FIB_PATH_OPER_FLAG_DROP)
424         {
425             s = format (s, " if_index:%d", path->attached_next_hop.fp_interface);
426         }
427         else
428         {
429             s = format (s, " %U",
430                         format_vnet_sw_interface_name,
431                         vnm,
432                         vnet_get_sw_interface(
433                             vnm,
434                             path->attached.fp_interface));
435         }
436         break;
437     case FIB_PATH_TYPE_RECURSIVE:
438         if (FIB_PROTOCOL_MPLS == path->fp_nh_proto)
439         {
440             s = format (s, "via %U",
441                         format_mpls_unicast_label,
442                         path->recursive.fp_nh.fp_local_label);
443         }
444         else
445         {
446             s = format (s, "via %U",
447                         format_ip46_address,
448                         &path->recursive.fp_nh.fp_ip,
449                         IP46_TYPE_ANY);
450         }
451         s = format (s, " in fib:%d",
452                     path->recursive.fp_tbl_id,
453                     path->fp_via_fib); 
454         s = format (s, " via-fib:%d", path->fp_via_fib); 
455         s = format (s, " via-dpo:[%U:%d]",
456                     format_dpo_type, path->fp_dpo.dpoi_type, 
457                     path->fp_dpo.dpoi_index);
458
459         break;
460     case FIB_PATH_TYPE_RECEIVE:
461     case FIB_PATH_TYPE_SPECIAL:
462     case FIB_PATH_TYPE_DEAG:
463     case FIB_PATH_TYPE_EXCLUSIVE:
464         if (dpo_id_is_valid(&path->fp_dpo))
465         {
466             s = format(s, "%U", format_dpo_id,
467                        &path->fp_dpo, 2);
468         }
469         break;
470     }
471     return (s);
472 }
473
474 u8 *
475 fib_path_format (fib_node_index_t pi, u8 *s)
476 {
477     fib_path_t *path;
478
479     path = fib_path_get(pi);
480     ASSERT(NULL != path);
481
482     return (format (s, "%U", format_fib_path, path));
483 }
484
485 u8 *
486 fib_path_adj_format (fib_node_index_t pi,
487                      u32 indent,
488                      u8 *s)
489 {
490     fib_path_t *path;
491
492     path = fib_path_get(pi);
493     ASSERT(NULL != path);
494
495     if (!dpo_id_is_valid(&path->fp_dpo))
496     {
497         s = format(s, " unresolved");
498     }
499     else
500     {
501         s = format(s, "%U", format_dpo_id,
502                    &path->fp_dpo, 2);
503     }
504
505     return (s);
506 }
507
508 /*
509  * fib_path_last_lock_gone
510  *
511  * We don't share paths, we share path lists, so the [un]lock functions
512  * are no-ops
513  */
514 static void
515 fib_path_last_lock_gone (fib_node_t *node)
516 {
517     ASSERT(0);
518 }
519
520 static const adj_index_t
521 fib_path_attached_next_hop_get_adj (fib_path_t *path,
522                                     vnet_link_t link)
523 {
524     if (vnet_sw_interface_is_p2p(vnet_get_main(),
525                                  path->attached_next_hop.fp_interface))
526     {
527         /*
528          * if the interface is p2p then the adj for the specific
529          * neighbour on that link will never exist. on p2p links
530          * the subnet address (the attached route) links to the
531          * auto-adj (see below), we want that adj here too.
532          */
533         return (adj_nbr_add_or_lock(path->fp_nh_proto,
534                                     link,
535                                     &zero_addr,
536                                     path->attached_next_hop.fp_interface));
537     }
538     else
539     {
540         return (adj_nbr_add_or_lock(path->fp_nh_proto,
541                                     link,
542                                     &path->attached_next_hop.fp_nh,
543                                     path->attached_next_hop.fp_interface));
544     }
545 }
546
547 static void
548 fib_path_attached_next_hop_set (fib_path_t *path)
549 {
550     /*
551      * resolve directly via the adjacnecy discribed by the
552      * interface and next-hop
553      */
554     if (!vnet_sw_interface_is_admin_up(vnet_get_main(),
555                                       path->attached_next_hop.fp_interface))
556     {
557         path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RESOLVED;
558     }
559
560     dpo_set(&path->fp_dpo,
561             DPO_ADJACENCY,
562             fib_proto_to_dpo(path->fp_nh_proto),
563             fib_path_attached_next_hop_get_adj(
564                  path,
565                  fib_proto_to_link(path->fp_nh_proto)));
566
567     /*
568      * become a child of the adjacency so we receive updates
569      * when its rewrite changes
570      */
571     path->fp_sibling = adj_child_add(path->fp_dpo.dpoi_index,
572                                      FIB_NODE_TYPE_PATH,
573                                      fib_path_get_index(path));
574 }
575
576 /*
577  * create of update the paths recursive adj
578  */
579 static void
580 fib_path_recursive_adj_update (fib_path_t *path,
581                                fib_forward_chain_type_t fct,
582                                dpo_id_t *dpo)
583 {
584     dpo_id_t via_dpo = DPO_INVALID;
585
586     /*
587      * get the DPO to resolve through from the via-entry
588      */
589     fib_entry_contribute_forwarding(path->fp_via_fib,
590                                     fct,
591                                     &via_dpo);
592
593
594     /*
595      * hope for the best - clear if restrictions apply.
596      */
597     path->fp_oper_flags |= FIB_PATH_OPER_FLAG_RESOLVED;
598
599     /*
600      * Validate any recursion constraints and over-ride the via
601      * adj if not met
602      */
603     if (path->fp_oper_flags & FIB_PATH_OPER_FLAG_RECURSIVE_LOOP)
604     {
605         path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RESOLVED;
606         dpo_copy(&via_dpo, drop_dpo_get(fib_proto_to_dpo(path->fp_nh_proto)));
607     }
608     else if (path->fp_cfg_flags & FIB_PATH_CFG_FLAG_RESOLVE_HOST)
609     {
610         /*
611          * the via FIB must be a host route.
612          * note the via FIB just added will always be a host route
613          * since it is an RR source added host route. So what we need to
614          * check is whether the route has other sources. If it does then
615          * some other source has added it as a host route. If it doesn't
616          * then it was added only here and inherits forwarding from a cover.
617          * the cover is not a host route.
618          * The RR source is the lowest priority source, so we check if it
619          * is the best. if it is there are no other sources.
620          */
621         if (fib_entry_get_best_source(path->fp_via_fib) >= FIB_SOURCE_RR)
622         {
623             path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RESOLVED;
624             dpo_copy(&via_dpo, drop_dpo_get(fib_proto_to_dpo(path->fp_nh_proto)));
625
626             /*
627              * PIC edge trigger. let the load-balance maps know
628              */
629             load_balance_map_path_state_change(fib_path_get_index(path));
630         }
631     }
632     else if (path->fp_cfg_flags & FIB_PATH_CFG_FLAG_RESOLVE_ATTACHED)
633     {
634         /*
635          * RR source entries inherit the flags from the cover, so
636          * we can check the via directly
637          */
638         if (!(FIB_ENTRY_FLAG_ATTACHED & fib_entry_get_flags(path->fp_via_fib)))
639         {
640             path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RESOLVED;
641             dpo_copy(&via_dpo, drop_dpo_get(fib_proto_to_dpo(path->fp_nh_proto)));
642
643             /*
644              * PIC edge trigger. let the load-balance maps know
645              */
646             load_balance_map_path_state_change(fib_path_get_index(path));
647         }
648     }
649
650     /*
651      * update the path's contributed DPO
652      */
653     dpo_copy(dpo, &via_dpo);
654
655     FIB_PATH_DBG(path, "recursive update: %U",
656                  fib_get_lookup_main(path->fp_nh_proto),
657                  &path->fp_dpo, 2);
658
659     dpo_reset(&via_dpo);
660 }
661
662 /*
663  * fib_path_is_permanent_drop
664  *
665  * Return !0 if the path is configured to permanently drop,
666  * despite other attributes.
667  */
668 static int
669 fib_path_is_permanent_drop (fib_path_t *path)
670 {
671     return ((path->fp_cfg_flags & FIB_PATH_CFG_FLAG_DROP) ||
672             (path->fp_oper_flags & FIB_PATH_OPER_FLAG_DROP));
673 }
674
675 /*
676  * fib_path_unresolve
677  *
678  * Remove our dependency on the resolution target
679  */
680 static void
681 fib_path_unresolve (fib_path_t *path)
682 {
683     /*
684      * the forced drop path does not need unresolving
685      */
686     if (fib_path_is_permanent_drop(path))
687     {
688         return;
689     }
690
691     switch (path->fp_type)
692     {
693     case FIB_PATH_TYPE_RECURSIVE:
694         if (FIB_NODE_INDEX_INVALID != path->fp_via_fib)
695         {
696             fib_prefix_t pfx;
697
698             fib_entry_get_prefix(path->fp_via_fib, &pfx);
699             fib_entry_child_remove(path->fp_via_fib,
700                                    path->fp_sibling);
701             fib_table_entry_special_remove(path->recursive.fp_tbl_id,
702                                            &pfx,
703                                            FIB_SOURCE_RR);
704             path->fp_via_fib = FIB_NODE_INDEX_INVALID;
705         }
706         break;
707     case FIB_PATH_TYPE_ATTACHED_NEXT_HOP:
708     case FIB_PATH_TYPE_ATTACHED:
709         adj_child_remove(path->fp_dpo.dpoi_index,
710                          path->fp_sibling);
711         adj_unlock(path->fp_dpo.dpoi_index);
712         break;
713     case FIB_PATH_TYPE_EXCLUSIVE:
714         dpo_reset(&path->exclusive.fp_ex_dpo);
715         break;
716     case FIB_PATH_TYPE_SPECIAL:
717     case FIB_PATH_TYPE_RECEIVE:
718     case FIB_PATH_TYPE_DEAG:
719         /*
720          * these hold only the path's DPO, which is reset below.
721          */
722         break;
723     }
724
725     /*
726      * release the adj we were holding and pick up the
727      * drop just in case.
728      */
729     dpo_reset(&path->fp_dpo);
730     path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RESOLVED;
731
732     return;
733 }
734
735 static fib_forward_chain_type_t
736 fib_path_proto_to_chain_type (fib_protocol_t proto)
737 {
738     switch (proto)
739     {
740     case FIB_PROTOCOL_IP4:
741         return (FIB_FORW_CHAIN_TYPE_UNICAST_IP4);
742     case FIB_PROTOCOL_IP6:
743         return (FIB_FORW_CHAIN_TYPE_UNICAST_IP6);
744     case FIB_PROTOCOL_MPLS:
745         return (FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS);
746     }
747     return (FIB_FORW_CHAIN_TYPE_UNICAST_IP4);
748 }
749
750 /*
751  * fib_path_back_walk_notify
752  *
753  * A back walk has reach this path.
754  */
755 static fib_node_back_walk_rc_t
756 fib_path_back_walk_notify (fib_node_t *node,
757                            fib_node_back_walk_ctx_t *ctx)
758 {
759     fib_path_t *path;
760
761     path = fib_path_from_fib_node(node);
762
763     switch (path->fp_type)
764     {
765     case FIB_PATH_TYPE_RECURSIVE:
766         if (FIB_NODE_BW_REASON_FLAG_EVALUATE & ctx->fnbw_reason)
767         {
768             /*
769              * modify the recursive adjacency to use the new forwarding
770              * of the via-fib.
771              * this update is visible to packets in flight in the DP.
772              */
773             fib_path_recursive_adj_update(
774                 path,
775                 fib_path_proto_to_chain_type(path->fp_nh_proto),
776                 &path->fp_dpo);
777         }
778         if ((FIB_NODE_BW_REASON_FLAG_ADJ_UPDATE & ctx->fnbw_reason) ||
779             (FIB_NODE_BW_REASON_FLAG_ADJ_DOWN   & ctx->fnbw_reason))
780         {
781             /*
782              * ADJ updates (complete<->incomplete) do not need to propagate to
783              * recursive entries.
784              * The only reason its needed as far back as here, is that the adj
785              * and the incomplete adj are a different DPO type, so the LBs need
786              * to re-stack.
787              * If this walk was quashed in the fib_entry, then any non-fib_path
788              * children (like tunnels that collapse out the LB when they stack)
789              * would not see the update.
790              */
791             return (FIB_NODE_BACK_WALK_CONTINUE);
792         }
793         break;
794     case FIB_PATH_TYPE_ATTACHED_NEXT_HOP:
795         /*
796 FIXME comment
797          * ADJ_UPDATE backwalk pass silently through here and up to
798          * the path-list when the multipath adj collapse occurs.
799          * The reason we do this is that the assumtption is that VPP
800          * runs in an environment where the Control-Plane is remote
801          * and hence reacts slowly to link up down. In order to remove
802          * this down link from the ECMP set quickly, we back-walk.
803          * VPP also has dedicated CPUs, so we are not stealing resources
804          * from the CP to do so.
805          */
806         if (FIB_NODE_BW_REASON_FLAG_INTERFACE_UP & ctx->fnbw_reason)
807         {
808             if (path->fp_oper_flags & FIB_PATH_OPER_FLAG_RESOLVED)
809             {
810                 /*
811                  * alreday resolved. no need to walk back again
812                  */
813                 return (FIB_NODE_BACK_WALK_CONTINUE);
814             }
815             path->fp_oper_flags |= FIB_PATH_OPER_FLAG_RESOLVED;
816         }
817         if (FIB_NODE_BW_REASON_FLAG_INTERFACE_DOWN & ctx->fnbw_reason)
818         {
819             if (!(path->fp_oper_flags & FIB_PATH_OPER_FLAG_RESOLVED))
820             {
821                 /*
822                  * alreday unresolved. no need to walk back again
823                  */
824                 return (FIB_NODE_BACK_WALK_CONTINUE);
825             }
826             path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RESOLVED;
827         }
828         if (FIB_NODE_BW_REASON_FLAG_INTERFACE_DELETE & ctx->fnbw_reason)
829         {
830             /*
831              * The interface this path resolves through has been deleted.
832              * This will leave the path in a permanent drop state. The route
833              * needs to be removed and readded (and hence the path-list deleted)
834              * before it can forward again.
835              */
836             fib_path_unresolve(path);
837             path->fp_oper_flags |= FIB_PATH_OPER_FLAG_DROP;
838         }
839         if (FIB_NODE_BW_REASON_FLAG_ADJ_UPDATE & ctx->fnbw_reason)
840         {
841             /*
842              * restack the DPO to pick up the correct DPO sub-type
843              */
844             uword if_is_up;
845             adj_index_t ai;
846
847             if_is_up = vnet_sw_interface_is_admin_up(
848                            vnet_get_main(),
849                            path->attached_next_hop.fp_interface);
850
851             if (if_is_up)
852             {
853                 path->fp_oper_flags |= FIB_PATH_OPER_FLAG_RESOLVED;
854             }
855
856             ai = fib_path_attached_next_hop_get_adj(
857                      path,
858                      fib_proto_to_link(path->fp_nh_proto));
859
860             dpo_set(&path->fp_dpo, DPO_ADJACENCY,
861                     fib_proto_to_dpo(path->fp_nh_proto),
862                     ai);
863             adj_unlock(ai);
864
865             if (!if_is_up)
866             {
867                 /*
868                  * If the interface is not up there is no reason to walk
869                  * back to children. if we did they would only evalute
870                  * that this path is unresolved and hence it would
871                  * not contribute the adjacency - so it would be wasted
872                  * CPU time.
873                  */
874                 return (FIB_NODE_BACK_WALK_CONTINUE);
875             }
876         }
877         if (FIB_NODE_BW_REASON_FLAG_ADJ_DOWN & ctx->fnbw_reason)
878         {
879             if (!(path->fp_oper_flags & FIB_PATH_OPER_FLAG_RESOLVED))
880             {
881                 /*
882                  * alreday unresolved. no need to walk back again
883                  */
884                 return (FIB_NODE_BACK_WALK_CONTINUE);
885             }
886             /*
887              * the adj has gone down. the path is no longer resolved.
888              */
889             path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RESOLVED;
890         }
891         break;
892     case FIB_PATH_TYPE_ATTACHED:
893         /*
894          * FIXME; this could schedule a lower priority walk, since attached
895          * routes are not usually in ECMP configurations so the backwalk to
896          * the FIB entry does not need to be high priority
897          */
898         if (FIB_NODE_BW_REASON_FLAG_INTERFACE_UP & ctx->fnbw_reason)
899         {
900             path->fp_oper_flags |= FIB_PATH_OPER_FLAG_RESOLVED;
901         }
902         if (FIB_NODE_BW_REASON_FLAG_INTERFACE_DOWN & ctx->fnbw_reason)
903         {
904             path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RESOLVED;
905         }
906         if (FIB_NODE_BW_REASON_FLAG_INTERFACE_DELETE & ctx->fnbw_reason)
907         {
908             fib_path_unresolve(path);
909             path->fp_oper_flags |= FIB_PATH_OPER_FLAG_DROP;
910         }
911         break;
912     case FIB_PATH_TYPE_DEAG:
913         /*
914          * FIXME When VRF delete is allowed this will need a poke.
915          */
916     case FIB_PATH_TYPE_SPECIAL:
917     case FIB_PATH_TYPE_RECEIVE:
918     case FIB_PATH_TYPE_EXCLUSIVE:
919         /*
920          * these path types have no parents. so to be
921          * walked from one is unexpected.
922          */
923         ASSERT(0);
924         break;
925     }
926
927     /*
928      * propagate the backwalk further to the path-list
929      */
930     fib_path_list_back_walk(path->fp_pl_index, ctx);
931
932     return (FIB_NODE_BACK_WALK_CONTINUE);
933 }
934
935 static void
936 fib_path_memory_show (void)
937 {
938     fib_show_memory_usage("Path",
939                           pool_elts(fib_path_pool),
940                           pool_len(fib_path_pool),
941                           sizeof(fib_path_t));
942 }
943
944 /*
945  * The FIB path's graph node virtual function table
946  */
947 static const fib_node_vft_t fib_path_vft = {
948     .fnv_get = fib_path_get_node,
949     .fnv_last_lock = fib_path_last_lock_gone,
950     .fnv_back_walk = fib_path_back_walk_notify,
951     .fnv_mem_show = fib_path_memory_show,
952 };
953
954 static fib_path_cfg_flags_t
955 fib_path_route_flags_to_cfg_flags (const fib_route_path_t *rpath)
956 {
957     fib_path_cfg_flags_t cfg_flags = FIB_PATH_CFG_FLAG_NONE;
958
959     if (rpath->frp_flags & FIB_ROUTE_PATH_RESOLVE_VIA_HOST)
960         cfg_flags |= FIB_PATH_CFG_FLAG_RESOLVE_HOST;
961     if (rpath->frp_flags & FIB_ROUTE_PATH_RESOLVE_VIA_ATTACHED)
962         cfg_flags |= FIB_PATH_CFG_FLAG_RESOLVE_ATTACHED;
963
964     return (cfg_flags);
965 }
966
967 /*
968  * fib_path_create
969  *
970  * Create and initialise a new path object.
971  * return the index of the path.
972  */
973 fib_node_index_t
974 fib_path_create (fib_node_index_t pl_index,
975                  fib_protocol_t nh_proto,
976                  fib_path_cfg_flags_t flags,
977                  const fib_route_path_t *rpath)
978 {
979     fib_path_t *path;
980
981     pool_get(fib_path_pool, path);
982     memset(path, 0, sizeof(*path));
983
984     fib_node_init(&path->fp_node,
985                   FIB_NODE_TYPE_PATH);
986
987     dpo_reset(&path->fp_dpo);
988     path->fp_pl_index = pl_index;
989     path->fp_nh_proto = nh_proto;
990     path->fp_via_fib = FIB_NODE_INDEX_INVALID;
991     path->fp_weight = rpath->frp_weight;
992     if (0 == path->fp_weight)
993     {
994         /*
995          * a weight of 0 is a meaningless value. We could either reject it, and thus force
996          * clients to always use 1, or we can accept it and fixup approrpiately.
997          */
998         path->fp_weight = 1;
999     }
1000     path->fp_cfg_flags = flags;
1001     path->fp_cfg_flags |= fib_path_route_flags_to_cfg_flags(rpath);
1002
1003     /*
1004      * deduce the path's tpye from the parementers and save what is needed.
1005      */
1006     if (~0 != rpath->frp_sw_if_index)
1007     {
1008         if (flags & FIB_PATH_CFG_FLAG_LOCAL)
1009         {
1010             path->fp_type = FIB_PATH_TYPE_RECEIVE;
1011             path->receive.fp_interface = rpath->frp_sw_if_index;
1012             path->receive.fp_addr = rpath->frp_addr;
1013         }
1014         else
1015         {
1016             if (ip46_address_is_zero(&rpath->frp_addr))
1017             {
1018                 path->fp_type = FIB_PATH_TYPE_ATTACHED;
1019                 path->attached.fp_interface = rpath->frp_sw_if_index;
1020             }
1021             else
1022             {
1023                 path->fp_type = FIB_PATH_TYPE_ATTACHED_NEXT_HOP;
1024                 path->attached_next_hop.fp_interface = rpath->frp_sw_if_index;
1025                 path->attached_next_hop.fp_nh = rpath->frp_addr;
1026             }
1027         }
1028     }
1029     else
1030     {
1031         if (ip46_address_is_zero(&rpath->frp_addr))
1032         {
1033             if (~0 == rpath->frp_fib_index)
1034             {
1035                 path->fp_type = FIB_PATH_TYPE_SPECIAL;
1036             }
1037             else
1038             {
1039                 path->fp_type = FIB_PATH_TYPE_DEAG;
1040                 path->deag.fp_tbl_id = rpath->frp_fib_index;
1041             }           
1042         }
1043         else
1044         {
1045             path->fp_type = FIB_PATH_TYPE_RECURSIVE;
1046             if (FIB_PROTOCOL_MPLS == path->fp_nh_proto)
1047             {
1048                 path->recursive.fp_nh.fp_local_label = rpath->frp_local_label;
1049             }
1050             else
1051             {
1052                 path->recursive.fp_nh.fp_ip = rpath->frp_addr;
1053             }
1054             path->recursive.fp_tbl_id = rpath->frp_fib_index;
1055         }
1056     }
1057
1058     FIB_PATH_DBG(path, "create");
1059
1060     return (fib_path_get_index(path));
1061 }
1062
1063 /*
1064  * fib_path_create_special
1065  *
1066  * Create and initialise a new path object.
1067  * return the index of the path.
1068  */
1069 fib_node_index_t
1070 fib_path_create_special (fib_node_index_t pl_index,
1071                          fib_protocol_t nh_proto,
1072                          fib_path_cfg_flags_t flags,
1073                          const dpo_id_t *dpo)
1074 {
1075     fib_path_t *path;
1076
1077     pool_get(fib_path_pool, path);
1078     memset(path, 0, sizeof(*path));
1079
1080     fib_node_init(&path->fp_node,
1081                   FIB_NODE_TYPE_PATH);
1082     dpo_reset(&path->fp_dpo);
1083
1084     path->fp_pl_index = pl_index;
1085     path->fp_weight = 1;
1086     path->fp_nh_proto = nh_proto;
1087     path->fp_via_fib = FIB_NODE_INDEX_INVALID;
1088     path->fp_cfg_flags = flags;
1089
1090     if (FIB_PATH_CFG_FLAG_DROP & flags)
1091     {
1092         path->fp_type = FIB_PATH_TYPE_SPECIAL;
1093     }
1094     else if (FIB_PATH_CFG_FLAG_LOCAL & flags)
1095     {
1096         path->fp_type = FIB_PATH_TYPE_RECEIVE;
1097         path->attached.fp_interface = FIB_NODE_INDEX_INVALID;
1098     }
1099     else
1100     {
1101         path->fp_type = FIB_PATH_TYPE_EXCLUSIVE;
1102         ASSERT(NULL != dpo);
1103         dpo_copy(&path->exclusive.fp_ex_dpo, dpo);
1104     }
1105
1106     return (fib_path_get_index(path));
1107 }
1108
1109 /*
1110  * fib_path_copy
1111  *
1112  * Copy a path. return index of new path.
1113  */
1114 fib_node_index_t
1115 fib_path_copy (fib_node_index_t path_index,
1116                fib_node_index_t path_list_index)
1117 {
1118     fib_path_t *path, *orig_path;
1119
1120     pool_get(fib_path_pool, path);
1121
1122     orig_path = fib_path_get(path_index);
1123     ASSERT(NULL != orig_path);
1124
1125     memcpy(path, orig_path, sizeof(*path));
1126
1127     FIB_PATH_DBG(path, "create-copy:%d", path_index);
1128
1129     /*
1130      * reset the dynamic section
1131      */
1132     fib_node_init(&path->fp_node, FIB_NODE_TYPE_PATH);
1133     path->fp_oper_flags     = FIB_PATH_OPER_FLAG_NONE;
1134     path->fp_pl_index  = path_list_index;
1135     path->fp_via_fib   = FIB_NODE_INDEX_INVALID;
1136     memset(&path->fp_dpo, 0, sizeof(path->fp_dpo));
1137     dpo_reset(&path->fp_dpo);
1138
1139     return (fib_path_get_index(path));
1140 }
1141
1142 /*
1143  * fib_path_destroy
1144  *
1145  * destroy a path that is no longer required
1146  */
1147 void
1148 fib_path_destroy (fib_node_index_t path_index)
1149 {
1150     fib_path_t *path;
1151
1152     path = fib_path_get(path_index);
1153
1154     ASSERT(NULL != path);
1155     FIB_PATH_DBG(path, "destroy");
1156
1157     fib_path_unresolve(path);
1158
1159     fib_node_deinit(&path->fp_node);
1160     pool_put(fib_path_pool, path);
1161 }
1162
1163 /*
1164  * fib_path_destroy
1165  *
1166  * destroy a path that is no longer required
1167  */
1168 uword
1169 fib_path_hash (fib_node_index_t path_index)
1170 {
1171     fib_path_t *path;
1172
1173     path = fib_path_get(path_index);
1174
1175     return (hash_memory(STRUCT_MARK_PTR(path, path_hash_start),
1176                         (STRUCT_OFFSET_OF(fib_path_t, path_hash_end) -
1177                          STRUCT_OFFSET_OF(fib_path_t, path_hash_start)),
1178                         0));
1179 }
1180
1181 /*
1182  * fib_path_cmp_i
1183  *
1184  * Compare two paths for equivalence.
1185  */
1186 static int
1187 fib_path_cmp_i (const fib_path_t *path1,
1188                 const fib_path_t *path2)
1189 {
1190     int res;
1191
1192     res = 1;
1193
1194     /*
1195      * paths of different types and protocol are not equal.
1196      * different weights only are the same path.
1197      */
1198     if (path1->fp_type != path2->fp_type)
1199     {
1200         res = (path1->fp_type - path2->fp_type);
1201     }
1202     if (path1->fp_nh_proto != path2->fp_nh_proto)
1203     {
1204         res = (path1->fp_nh_proto - path2->fp_nh_proto);
1205     }
1206     else
1207     {
1208         /*
1209          * both paths are of the same type.
1210          * consider each type and its attributes in turn.
1211          */
1212         switch (path1->fp_type)
1213         {
1214         case FIB_PATH_TYPE_ATTACHED_NEXT_HOP:
1215             res = ip46_address_cmp(&path1->attached_next_hop.fp_nh,
1216                                    &path2->attached_next_hop.fp_nh);
1217             if (0 == res) {
1218                 res = vnet_sw_interface_compare(
1219                           vnet_get_main(),
1220                           path1->attached_next_hop.fp_interface,
1221                           path2->attached_next_hop.fp_interface);
1222             }
1223             break;
1224         case FIB_PATH_TYPE_ATTACHED:
1225             res = vnet_sw_interface_compare(
1226                       vnet_get_main(),
1227                       path1->attached.fp_interface,
1228                       path2->attached.fp_interface);
1229             break;
1230         case FIB_PATH_TYPE_RECURSIVE:
1231             res = ip46_address_cmp(&path1->recursive.fp_nh,
1232                                    &path2->recursive.fp_nh);
1233  
1234             if (0 == res)
1235             {
1236                 res = (path1->recursive.fp_tbl_id - path2->recursive.fp_tbl_id);
1237             }
1238             break;
1239         case FIB_PATH_TYPE_DEAG:
1240             res = (path1->deag.fp_tbl_id - path2->deag.fp_tbl_id);
1241             break;
1242         case FIB_PATH_TYPE_SPECIAL:
1243         case FIB_PATH_TYPE_RECEIVE:
1244         case FIB_PATH_TYPE_EXCLUSIVE:
1245             res = 0;
1246             break;
1247         }
1248     }
1249     return (res);
1250 }
1251
1252 /*
1253  * fib_path_cmp_for_sort
1254  *
1255  * Compare two paths for equivalence. Used during path sorting.
1256  * As usual 0 means equal.
1257  */
1258 int
1259 fib_path_cmp_for_sort (void * v1,
1260                        void * v2)
1261 {
1262     fib_node_index_t *pi1 = v1, *pi2 = v2;
1263     fib_path_t *path1, *path2;
1264
1265     path1 = fib_path_get(*pi1);
1266     path2 = fib_path_get(*pi2);
1267
1268     return (fib_path_cmp_i(path1, path2));
1269 }
1270
1271 /*
1272  * fib_path_cmp
1273  *
1274  * Compare two paths for equivalence.
1275  */
1276 int
1277 fib_path_cmp (fib_node_index_t pi1,
1278               fib_node_index_t pi2)
1279 {
1280     fib_path_t *path1, *path2;
1281
1282     path1 = fib_path_get(pi1);
1283     path2 = fib_path_get(pi2);
1284
1285     return (fib_path_cmp_i(path1, path2));
1286 }
1287
1288 int
1289 fib_path_cmp_w_route_path (fib_node_index_t path_index,
1290                            const fib_route_path_t *rpath)
1291 {
1292     fib_path_t *path;
1293     int res;
1294
1295     path = fib_path_get(path_index);
1296
1297     res = 1;
1298
1299     if (path->fp_weight != rpath->frp_weight)
1300     {
1301         res = (path->fp_weight - rpath->frp_weight);
1302     }
1303     else
1304     {
1305         /*
1306          * both paths are of the same type.
1307          * consider each type and its attributes in turn.
1308          */
1309         switch (path->fp_type)
1310         {
1311         case FIB_PATH_TYPE_ATTACHED_NEXT_HOP:
1312             res = ip46_address_cmp(&path->attached_next_hop.fp_nh,
1313                                    &rpath->frp_addr);
1314             if (0 == res)
1315             {
1316                 res = vnet_sw_interface_compare(
1317                           vnet_get_main(),
1318                           path->attached_next_hop.fp_interface,
1319                           rpath->frp_sw_if_index);
1320             }
1321             break;
1322         case FIB_PATH_TYPE_ATTACHED:
1323             res = vnet_sw_interface_compare(
1324                       vnet_get_main(),
1325                       path->attached.fp_interface,
1326                       rpath->frp_sw_if_index);
1327             break;
1328         case FIB_PATH_TYPE_RECURSIVE:
1329             if (FIB_PROTOCOL_MPLS == path->fp_nh_proto)
1330             {
1331                 res = path->recursive.fp_nh.fp_local_label - rpath->frp_local_label;
1332             }
1333             else
1334             {
1335                 res = ip46_address_cmp(&path->recursive.fp_nh.fp_ip,
1336                                        &rpath->frp_addr);
1337             }
1338
1339             if (0 == res)
1340             {
1341                 res = (path->recursive.fp_tbl_id - rpath->frp_fib_index);
1342             }
1343             break;
1344         case FIB_PATH_TYPE_DEAG:
1345             res = (path->deag.fp_tbl_id - rpath->frp_fib_index);
1346             break;
1347         case FIB_PATH_TYPE_SPECIAL:
1348         case FIB_PATH_TYPE_RECEIVE:
1349         case FIB_PATH_TYPE_EXCLUSIVE:
1350             res = 0;
1351             break;
1352         }
1353     }
1354     return (res);
1355 }
1356
1357 /*
1358  * fib_path_recursive_loop_detect
1359  *
1360  * A forward walk of the FIB object graph to detect for a cycle/loop. This
1361  * walk is initiated when an entry is linking to a new path list or from an old.
1362  * The entry vector passed contains all the FIB entrys that are children of this
1363  * path (it is all the entries encountered on the walk so far). If this vector
1364  * contains the entry this path resolve via, then a loop is about to form.
1365  * The loop must be allowed to form, since we need the dependencies in place
1366  * so that we can track when the loop breaks.
1367  * However, we MUST not produce a loop in the forwarding graph (else packets
1368  * would loop around the switch path until the loop breaks), so we mark recursive
1369  * paths as looped so that they do not contribute forwarding information.
1370  * By marking the path as looped, an etry such as;
1371  *    X/Y
1372  *     via a.a.a.a (looped)
1373  *     via b.b.b.b (not looped)
1374  * can still forward using the info provided by b.b.b.b only
1375  */
1376 int
1377 fib_path_recursive_loop_detect (fib_node_index_t path_index,
1378                                 fib_node_index_t **entry_indicies)
1379 {
1380     fib_path_t *path;
1381
1382     path = fib_path_get(path_index);
1383
1384     /*
1385      * the forced drop path is never looped, cos it is never resolved.
1386      */
1387     if (fib_path_is_permanent_drop(path))
1388     {
1389         return (0);
1390     }
1391
1392     switch (path->fp_type)
1393     {
1394     case FIB_PATH_TYPE_RECURSIVE:
1395     {
1396         fib_node_index_t *entry_index, *entries;
1397         int looped = 0;
1398         entries = *entry_indicies;
1399
1400         vec_foreach(entry_index, entries) {
1401             if (*entry_index == path->fp_via_fib)
1402             {
1403                 /*
1404                  * the entry that is about to link to this path-list (or
1405                  * one of this path-list's children) is the same entry that
1406                  * this recursive path resolves through. this is a cycle.
1407                  * abort the walk.
1408                  */
1409                 looped = 1;
1410                 break;
1411             }
1412         }
1413
1414         if (looped)
1415         {
1416             FIB_PATH_DBG(path, "recursive loop formed");
1417             path->fp_oper_flags |= FIB_PATH_OPER_FLAG_RECURSIVE_LOOP;
1418
1419             dpo_copy(&path->fp_dpo,
1420                     drop_dpo_get(fib_proto_to_dpo(path->fp_nh_proto)));
1421         }
1422         else
1423         {
1424             /*
1425              * no loop here yet. keep forward walking the graph.
1426              */     
1427             if (fib_entry_recursive_loop_detect(path->fp_via_fib, entry_indicies))
1428             {
1429                 FIB_PATH_DBG(path, "recursive loop formed");
1430                 path->fp_oper_flags |= FIB_PATH_OPER_FLAG_RECURSIVE_LOOP;
1431             }
1432             else
1433             {
1434                 FIB_PATH_DBG(path, "recursive loop cleared");
1435                 path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RECURSIVE_LOOP;
1436             }
1437         }
1438         break;
1439     }
1440     case FIB_PATH_TYPE_ATTACHED_NEXT_HOP:
1441     case FIB_PATH_TYPE_ATTACHED:
1442     case FIB_PATH_TYPE_SPECIAL:
1443     case FIB_PATH_TYPE_DEAG:
1444     case FIB_PATH_TYPE_RECEIVE:
1445     case FIB_PATH_TYPE_EXCLUSIVE:
1446         /*
1447          * these path types cannot be part of a loop, since they are the leaves
1448          * of the graph.
1449          */
1450         break;
1451     }
1452
1453     return (fib_path_is_looped(path_index));
1454 }
1455
1456 int
1457 fib_path_resolve (fib_node_index_t path_index)
1458 {
1459     fib_path_t *path;
1460
1461     path = fib_path_get(path_index);
1462
1463     /*
1464      * hope for the best.
1465      */
1466     path->fp_oper_flags |= FIB_PATH_OPER_FLAG_RESOLVED;
1467
1468     /*
1469      * the forced drop path resolves via the drop adj
1470      */
1471     if (fib_path_is_permanent_drop(path))
1472     {
1473         dpo_copy(&path->fp_dpo,
1474                  drop_dpo_get(fib_proto_to_dpo(path->fp_nh_proto)));
1475         path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RESOLVED;
1476         return (fib_path_is_resolved(path_index));
1477     }
1478
1479     switch (path->fp_type)
1480     {
1481     case FIB_PATH_TYPE_ATTACHED_NEXT_HOP:
1482         fib_path_attached_next_hop_set(path);
1483         break;
1484     case FIB_PATH_TYPE_ATTACHED:
1485         /*
1486          * path->attached.fp_interface
1487          */
1488         if (!vnet_sw_interface_is_admin_up(vnet_get_main(),
1489                                            path->attached.fp_interface))
1490         {
1491             path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RESOLVED;
1492         }
1493         if (vnet_sw_interface_is_p2p(vnet_get_main(),
1494                                      path->attached.fp_interface))
1495         {
1496             /*
1497              * point-2-point interfaces do not require a glean, since
1498              * there is nothing to ARP. Install a rewrite/nbr adj instead
1499              */
1500             dpo_set(&path->fp_dpo,
1501                     DPO_ADJACENCY,
1502                     fib_proto_to_dpo(path->fp_nh_proto),
1503                     adj_nbr_add_or_lock(
1504                         path->fp_nh_proto,
1505                         fib_proto_to_link(path->fp_nh_proto),
1506                         &zero_addr,
1507                         path->attached.fp_interface));
1508         }
1509         else
1510         {
1511             dpo_set(&path->fp_dpo,
1512                     DPO_ADJACENCY_GLEAN,
1513                     fib_proto_to_dpo(path->fp_nh_proto),
1514                     adj_glean_add_or_lock(path->fp_nh_proto,
1515                                           path->attached.fp_interface,
1516                                           NULL));
1517         }
1518         /*
1519          * become a child of the adjacency so we receive updates
1520          * when the interface state changes
1521          */
1522         path->fp_sibling = adj_child_add(path->fp_dpo.dpoi_index,
1523                                          FIB_NODE_TYPE_PATH,
1524                                          fib_path_get_index(path));
1525
1526         break;
1527     case FIB_PATH_TYPE_RECURSIVE:
1528     {
1529         /*
1530          * Create a RR source entry in the table for the address
1531          * that this path recurses through.
1532          * This resolve action is recursive, hence we may create
1533          * more paths in the process. more creates mean maybe realloc
1534          * of this path.
1535          */
1536         fib_node_index_t fei;
1537         fib_prefix_t pfx;
1538
1539         ASSERT(FIB_NODE_INDEX_INVALID == path->fp_via_fib);
1540
1541         if (FIB_PROTOCOL_MPLS == path->fp_nh_proto)
1542         {
1543             fib_prefix_from_mpls_label(path->recursive.fp_nh.fp_local_label, &pfx);
1544         }
1545         else
1546         {
1547             fib_prefix_from_ip46_addr(&path->recursive.fp_nh.fp_ip, &pfx);
1548         }
1549
1550         fei = fib_table_entry_special_add(path->recursive.fp_tbl_id,
1551                                           &pfx,
1552                                           FIB_SOURCE_RR,
1553                                           FIB_ENTRY_FLAG_NONE,
1554                                           ADJ_INDEX_INVALID);
1555
1556         path = fib_path_get(path_index);
1557         path->fp_via_fib = fei;
1558
1559         /*
1560          * become a dependent child of the entry so the path is 
1561          * informed when the forwarding for the entry changes.
1562          */
1563         path->fp_sibling = fib_entry_child_add(path->fp_via_fib,
1564                                                FIB_NODE_TYPE_PATH,
1565                                                fib_path_get_index(path));
1566
1567         /*
1568          * create and configure the IP DPO
1569          */
1570         fib_path_recursive_adj_update(
1571             path,
1572             fib_path_proto_to_chain_type(path->fp_nh_proto),
1573             &path->fp_dpo);
1574
1575         break;
1576     }
1577     case FIB_PATH_TYPE_SPECIAL:
1578         /*
1579          * Resolve via the drop
1580          */
1581         dpo_copy(&path->fp_dpo,
1582                  drop_dpo_get(fib_proto_to_dpo(path->fp_nh_proto)));
1583         break;
1584     case FIB_PATH_TYPE_DEAG:
1585         /*
1586          * Resolve via a lookup DPO.
1587          * FIXME. control plane should add routes with a table ID
1588          */
1589         lookup_dpo_add_or_lock_w_fib_index(path->deag.fp_tbl_id,
1590                                           fib_proto_to_dpo(path->fp_nh_proto),
1591                                           LOOKUP_INPUT_DST_ADDR,
1592                                           LOOKUP_TABLE_FROM_CONFIG,
1593                                           &path->fp_dpo);
1594         break;
1595     case FIB_PATH_TYPE_RECEIVE:
1596         /*
1597          * Resolve via a receive DPO.
1598          */
1599         receive_dpo_add_or_lock(fib_proto_to_dpo(path->fp_nh_proto),
1600                                 path->receive.fp_interface,
1601                                 &path->receive.fp_addr,
1602                                 &path->fp_dpo);
1603         break;
1604     case FIB_PATH_TYPE_EXCLUSIVE:
1605         /*
1606          * Resolve via the user provided DPO
1607          */
1608         dpo_copy(&path->fp_dpo, &path->exclusive.fp_ex_dpo);
1609         break;
1610     }
1611
1612     return (fib_path_is_resolved(path_index));
1613 }
1614
1615 u32
1616 fib_path_get_resolving_interface (fib_node_index_t path_index)
1617 {
1618     fib_path_t *path;
1619
1620     path = fib_path_get(path_index);
1621
1622     switch (path->fp_type)
1623     {
1624     case FIB_PATH_TYPE_ATTACHED_NEXT_HOP:
1625         return (path->attached_next_hop.fp_interface);
1626     case FIB_PATH_TYPE_ATTACHED:
1627         return (path->attached.fp_interface);
1628     case FIB_PATH_TYPE_RECEIVE:
1629         return (path->receive.fp_interface);
1630     case FIB_PATH_TYPE_RECURSIVE:
1631         return (fib_entry_get_resolving_interface(path->fp_via_fib));    
1632     case FIB_PATH_TYPE_SPECIAL:
1633     case FIB_PATH_TYPE_DEAG:
1634     case FIB_PATH_TYPE_EXCLUSIVE:
1635         break;
1636     }
1637     return (~0);
1638 }
1639
1640 adj_index_t
1641 fib_path_get_adj (fib_node_index_t path_index)
1642 {
1643     fib_path_t *path;
1644
1645     path = fib_path_get(path_index);
1646
1647     ASSERT(dpo_is_adj(&path->fp_dpo));
1648     if (dpo_is_adj(&path->fp_dpo))
1649     {
1650         return (path->fp_dpo.dpoi_index);
1651     }
1652     return (ADJ_INDEX_INVALID);
1653 }
1654
1655 int
1656 fib_path_get_weight (fib_node_index_t path_index)
1657 {
1658     fib_path_t *path;
1659
1660     path = fib_path_get(path_index);
1661
1662     ASSERT(path);
1663
1664     return (path->fp_weight);
1665 }
1666
1667 /**
1668  * @brief Contribute the path's adjacency to the list passed.
1669  * By calling this function over all paths, recursively, a child
1670  * can construct its full set of forwarding adjacencies, and hence its
1671  * uRPF list.
1672  */
1673 void
1674 fib_path_contribute_urpf (fib_node_index_t path_index,
1675                           index_t urpf)
1676 {
1677     fib_path_t *path;
1678
1679     if (!fib_path_is_resolved(path_index))
1680         return;
1681
1682     path = fib_path_get(path_index);
1683
1684     switch (path->fp_type)
1685     {
1686     case FIB_PATH_TYPE_ATTACHED_NEXT_HOP:
1687         fib_urpf_list_append(urpf, path->attached_next_hop.fp_interface);
1688         break;
1689
1690     case FIB_PATH_TYPE_ATTACHED:
1691         fib_urpf_list_append(urpf, path->attached.fp_interface);
1692         break;
1693
1694     case FIB_PATH_TYPE_RECURSIVE:
1695         fib_entry_contribute_urpf(path->fp_via_fib, urpf);
1696         break;
1697
1698     case FIB_PATH_TYPE_EXCLUSIVE:
1699     case FIB_PATH_TYPE_SPECIAL:
1700         /*
1701          * these path types may link to an adj, if that's what
1702          * the clinet gave
1703          */
1704         if (dpo_is_adj(&path->fp_dpo))
1705         {
1706             ip_adjacency_t *adj;
1707
1708             adj = adj_get(path->fp_dpo.dpoi_index);
1709
1710             fib_urpf_list_append(urpf, adj->rewrite_header.sw_if_index);
1711         }
1712         break;
1713
1714     case FIB_PATH_TYPE_DEAG:
1715     case FIB_PATH_TYPE_RECEIVE:
1716         /*
1717          * these path types don't link to an adj
1718          */
1719         break;
1720     }
1721 }
1722
1723 void
1724 fib_path_contribute_forwarding (fib_node_index_t path_index,
1725                                 fib_forward_chain_type_t fct,
1726                                 dpo_id_t *dpo)
1727 {
1728     fib_path_t *path;
1729
1730     path = fib_path_get(path_index);
1731
1732     ASSERT(path);
1733     ASSERT(FIB_FORW_CHAIN_TYPE_MPLS_EOS != fct);
1734
1735     FIB_PATH_DBG(path, "contribute");
1736
1737     /*
1738      * The DPO stored in the path was created when the path was resolved.
1739      * This then represents the path's 'native' protocol; IP.
1740      * For all others will need to go find something else.
1741      */
1742     if (fib_path_proto_to_chain_type(path->fp_nh_proto) == fct)
1743     {
1744         dpo_copy(dpo, &path->fp_dpo);
1745     }
1746     else
1747     {
1748         switch (path->fp_type)
1749         {
1750         case FIB_PATH_TYPE_ATTACHED_NEXT_HOP:
1751             switch (fct)
1752             {
1753             case FIB_FORW_CHAIN_TYPE_UNICAST_IP4:
1754             case FIB_FORW_CHAIN_TYPE_UNICAST_IP6:
1755             case FIB_FORW_CHAIN_TYPE_MPLS_EOS:
1756             case FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS:
1757             case FIB_FORW_CHAIN_TYPE_ETHERNET:
1758             {
1759                 adj_index_t ai;
1760
1761                 /*
1762                  * get a appropriate link type adj.
1763                  */
1764                 ai = fib_path_attached_next_hop_get_adj(
1765                          path,
1766                          fib_forw_chain_type_to_link_type(fct));
1767                 dpo_set(dpo, DPO_ADJACENCY,
1768                         fib_forw_chain_type_to_dpo_proto(fct), ai);
1769                 adj_unlock(ai);
1770
1771                 break;
1772             }
1773             }
1774             break;
1775         case FIB_PATH_TYPE_RECURSIVE:
1776             switch (fct)
1777             {
1778             case FIB_FORW_CHAIN_TYPE_MPLS_EOS:
1779             case FIB_FORW_CHAIN_TYPE_UNICAST_IP4:
1780             case FIB_FORW_CHAIN_TYPE_UNICAST_IP6:
1781             case FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS:
1782                 fib_path_recursive_adj_update(path, fct, dpo);
1783                 break;
1784             case FIB_FORW_CHAIN_TYPE_ETHERNET:
1785                 ASSERT(0);
1786                 break;
1787             }
1788             break;
1789         case FIB_PATH_TYPE_DEAG:
1790             switch (fct)
1791             {
1792             case FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS:
1793                 lookup_dpo_add_or_lock_w_table_id(MPLS_FIB_DEFAULT_TABLE_ID,
1794                                                   DPO_PROTO_MPLS,
1795                                                   LOOKUP_INPUT_DST_ADDR,
1796                                                   LOOKUP_TABLE_FROM_CONFIG,
1797                                                   dpo);
1798                 break;
1799             case FIB_FORW_CHAIN_TYPE_UNICAST_IP4:
1800             case FIB_FORW_CHAIN_TYPE_UNICAST_IP6:
1801             case FIB_FORW_CHAIN_TYPE_MPLS_EOS:
1802                 dpo_copy(dpo, &path->fp_dpo);
1803                 break;          
1804             case FIB_FORW_CHAIN_TYPE_ETHERNET:
1805                 ASSERT(0);
1806                 break;
1807             }
1808             break;
1809         case FIB_PATH_TYPE_EXCLUSIVE:
1810             dpo_copy(dpo, &path->exclusive.fp_ex_dpo);
1811             break;
1812         case FIB_PATH_TYPE_ATTACHED:
1813         case FIB_PATH_TYPE_RECEIVE:
1814         case FIB_PATH_TYPE_SPECIAL:
1815             ASSERT(0);
1816             break;
1817         }
1818
1819     }
1820 }
1821
1822 load_balance_path_t *
1823 fib_path_append_nh_for_multipath_hash (fib_node_index_t path_index,
1824                                        fib_forward_chain_type_t fct,
1825                                        load_balance_path_t *hash_key)
1826 {
1827     load_balance_path_t *mnh;
1828     fib_path_t *path;
1829
1830     path = fib_path_get(path_index);
1831
1832     ASSERT(path);
1833
1834     if (fib_path_is_resolved(path_index))
1835     {
1836         vec_add2(hash_key, mnh, 1);
1837
1838         mnh->path_weight = path->fp_weight;
1839         mnh->path_index = path_index;
1840         fib_path_contribute_forwarding(path_index, fct, &mnh->path_dpo);
1841     }
1842
1843     return (hash_key);
1844 }
1845
1846 int
1847 fib_path_is_recursive (fib_node_index_t path_index)
1848 {
1849     fib_path_t *path;
1850
1851     path = fib_path_get(path_index);
1852
1853     return (FIB_PATH_TYPE_RECURSIVE == path->fp_type);
1854 }
1855
1856 int
1857 fib_path_is_exclusive (fib_node_index_t path_index)
1858 {
1859     fib_path_t *path;
1860
1861     path = fib_path_get(path_index);
1862
1863     return (FIB_PATH_TYPE_EXCLUSIVE == path->fp_type);
1864 }
1865
1866 int
1867 fib_path_is_deag (fib_node_index_t path_index)
1868 {
1869     fib_path_t *path;
1870
1871     path = fib_path_get(path_index);
1872
1873     return (FIB_PATH_TYPE_DEAG == path->fp_type);
1874 }
1875
1876 int
1877 fib_path_is_resolved (fib_node_index_t path_index)
1878 {
1879     fib_path_t *path;
1880
1881     path = fib_path_get(path_index);
1882
1883     return (dpo_id_is_valid(&path->fp_dpo) &&
1884             (path->fp_oper_flags & FIB_PATH_OPER_FLAG_RESOLVED) &&
1885             !fib_path_is_looped(path_index) &&
1886             !fib_path_is_permanent_drop(path));
1887 }
1888
1889 int
1890 fib_path_is_looped (fib_node_index_t path_index)
1891 {
1892     fib_path_t *path;
1893
1894     path = fib_path_get(path_index);
1895
1896     return (path->fp_oper_flags & FIB_PATH_OPER_FLAG_RECURSIVE_LOOP);
1897 }
1898
1899 int
1900 fib_path_encode (fib_node_index_t path_list_index,
1901                  fib_node_index_t path_index,
1902                  void *ctx)
1903 {
1904     fib_route_path_encode_t **api_rpaths = ctx;
1905     fib_route_path_encode_t *api_rpath;
1906     fib_path_t *path;
1907
1908     path = fib_path_get(path_index);
1909     if (!path)
1910       return (0);
1911     vec_add2(*api_rpaths, api_rpath, 1);
1912     api_rpath->rpath.frp_weight = path->fp_weight;
1913     api_rpath->rpath.frp_proto = path->fp_nh_proto;
1914     api_rpath->rpath.frp_sw_if_index = ~0;
1915     api_rpath->dpo = path->exclusive.fp_ex_dpo;
1916     switch (path->fp_type)
1917       {
1918       case FIB_PATH_TYPE_RECEIVE:
1919         api_rpath->rpath.frp_addr = path->receive.fp_addr;
1920         api_rpath->rpath.frp_sw_if_index = path->receive.fp_interface;
1921         break;
1922       case FIB_PATH_TYPE_ATTACHED:
1923         api_rpath->rpath.frp_sw_if_index = path->attached.fp_interface;
1924         break;
1925       case FIB_PATH_TYPE_ATTACHED_NEXT_HOP:
1926         api_rpath->rpath.frp_sw_if_index = path->attached_next_hop.fp_interface;
1927         api_rpath->rpath.frp_addr = path->attached_next_hop.fp_nh;
1928         break;
1929       case FIB_PATH_TYPE_SPECIAL:
1930         break;
1931       case FIB_PATH_TYPE_DEAG:
1932         break;
1933       case FIB_PATH_TYPE_RECURSIVE:
1934         api_rpath->rpath.frp_addr = path->recursive.fp_nh.fp_ip;
1935         break;
1936       default:
1937         break;
1938       }
1939     return (1);
1940 }
1941
1942 fib_protocol_t
1943 fib_path_get_proto (fib_node_index_t path_index)
1944 {
1945     fib_path_t *path;
1946
1947     path = fib_path_get(path_index);
1948
1949     return (path->fp_nh_proto);
1950 }
1951
1952 void
1953 fib_path_module_init (void)
1954 {
1955     fib_node_register_type (FIB_NODE_TYPE_PATH, &fib_path_vft);
1956 }
1957
1958 static clib_error_t *
1959 show_fib_path_command (vlib_main_t * vm,
1960                         unformat_input_t * input,
1961                         vlib_cli_command_t * cmd)
1962 {
1963     fib_node_index_t pi;
1964     fib_path_t *path;
1965
1966     if (unformat (input, "%d", &pi))
1967     {
1968         /*
1969          * show one in detail
1970          */
1971         if (!pool_is_free_index(fib_path_pool, pi))
1972         {
1973             path = fib_path_get(pi);
1974             u8 *s = fib_path_format(pi, NULL);
1975             s = format(s, "children:");
1976             s = fib_node_children_format(path->fp_node.fn_children, s);
1977             vlib_cli_output (vm, "%s", s);
1978             vec_free(s);
1979         }
1980         else
1981         {
1982             vlib_cli_output (vm, "path %d invalid", pi);
1983         }
1984     }
1985     else
1986     {
1987         vlib_cli_output (vm, "FIB Paths");
1988         pool_foreach(path, fib_path_pool,
1989         ({
1990             vlib_cli_output (vm, "%U", format_fib_path, path);
1991         }));
1992     }
1993
1994     return (NULL);
1995 }
1996
1997 VLIB_CLI_COMMAND (show_fib_path, static) = {
1998   .path = "show fib paths",
1999   .function = show_fib_path_command,
2000   .short_help = "show fib paths",
2001 };