5796539b19aa159469e60e89b5813a4cd9c282d4
[vpp.git] / vnet / vnet / fib / fib_path.c
1 /*
2  * Copyright (c) 2016 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15
16 #include <vlib/vlib.h>
17 #include <vnet/vnet.h>
18 #include <vnet/ip/format.h>
19 #include <vnet/ip/ip.h>
20 #include <vnet/dpo/drop_dpo.h>
21 #include <vnet/dpo/receive_dpo.h>
22 #include <vnet/dpo/load_balance_map.h>
23 #include <vnet/dpo/lookup_dpo.h>
24
25 #include <vnet/adj/adj.h>
26
27 #include <vnet/fib/fib_path.h>
28 #include <vnet/fib/fib_node.h>
29 #include <vnet/fib/fib_table.h>
30 #include <vnet/fib/fib_entry.h>
31 #include <vnet/fib/fib_path_list.h>
32 #include <vnet/fib/fib_internal.h>
33 #include <vnet/fib/fib_urpf_list.h>
34
35 /**
36  * Enurmeration of path types
37  */
38 typedef enum fib_path_type_t_ {
39     /**
40      * Marker. Add new types after this one.
41      */
42     FIB_PATH_TYPE_FIRST = 0,
43     /**
44      * Attached-nexthop. An interface and a nexthop are known.
45      */
46     FIB_PATH_TYPE_ATTACHED_NEXT_HOP = FIB_PATH_TYPE_FIRST,
47     /**
48      * attached. Only the interface is known.
49      */
50     FIB_PATH_TYPE_ATTACHED,
51     /**
52      * recursive. Only the next-hop is known.
53      */
54     FIB_PATH_TYPE_RECURSIVE,
55     /**
56      * special. nothing is known. so we drop.
57      */
58     FIB_PATH_TYPE_SPECIAL,
59     /**
60      * exclusive. user provided adj.
61      */
62     FIB_PATH_TYPE_EXCLUSIVE,
63     /**
64      * deag. Link to a lookup adj in the next table
65      */
66     FIB_PATH_TYPE_DEAG,
67     /**
68      * receive. it's for-us.
69      */
70     FIB_PATH_TYPE_RECEIVE,
71     /**
72      * Marker. Add new types before this one, then update it.
73      */
74     FIB_PATH_TYPE_LAST = FIB_PATH_TYPE_RECEIVE,
75 } __attribute__ ((packed)) fib_path_type_t;
76
77 /**
78  * The maximum number of path_types
79  */
80 #define FIB_PATH_TYPE_MAX (FIB_PATH_TYPE_LAST + 1)
81
82 #define FIB_PATH_TYPES {                                        \
83     [FIB_PATH_TYPE_ATTACHED_NEXT_HOP] = "attached-nexthop",     \
84     [FIB_PATH_TYPE_ATTACHED]          = "attached",             \
85     [FIB_PATH_TYPE_RECURSIVE]         = "recursive",            \
86     [FIB_PATH_TYPE_SPECIAL]           = "special",              \
87     [FIB_PATH_TYPE_EXCLUSIVE]         = "exclusive",            \
88     [FIB_PATH_TYPE_DEAG]              = "deag",                 \
89     [FIB_PATH_TYPE_RECEIVE]           = "receive",              \
90 }
91
92 #define FOR_EACH_FIB_PATH_TYPE(_item) \
93     for (_item = FIB_PATH_TYPE_FIRST; _item <= FIB_PATH_TYPE_LAST; _item++)
94
95 /**
96  * Enurmeration of path operational (i.e. derived) attributes
97  */
98 typedef enum fib_path_oper_attribute_t_ {
99     /**
100      * Marker. Add new types after this one.
101      */
102     FIB_PATH_OPER_ATTRIBUTE_FIRST = 0,
103     /**
104      * The path forms part of a recursive loop.
105      */
106     FIB_PATH_OPER_ATTRIBUTE_RECURSIVE_LOOP = FIB_PATH_OPER_ATTRIBUTE_FIRST,
107     /**
108      * The path is resolved
109      */
110     FIB_PATH_OPER_ATTRIBUTE_RESOLVED,
111     /**
112      * The path has become a permanent drop.
113      */
114     FIB_PATH_OPER_ATTRIBUTE_DROP,
115     /**
116      * Marker. Add new types before this one, then update it.
117      */
118     FIB_PATH_OPER_ATTRIBUTE_LAST = FIB_PATH_OPER_ATTRIBUTE_DROP,
119 } __attribute__ ((packed)) fib_path_oper_attribute_t;
120
121 /**
122  * The maximum number of path operational attributes
123  */
124 #define FIB_PATH_OPER_ATTRIBUTE_MAX (FIB_PATH_OPER_ATTRIBUTE_LAST + 1)
125
126 #define FIB_PATH_OPER_ATTRIBUTES {                                      \
127     [FIB_PATH_OPER_ATTRIBUTE_RECURSIVE_LOOP] = "recursive-loop",        \
128     [FIB_PATH_OPER_ATTRIBUTE_RESOLVED]       = "resolved",              \
129     [FIB_PATH_OPER_ATTRIBUTE_DROP]           = "drop",                  \
130 }
131
132 #define FOR_EACH_FIB_PATH_OPER_ATTRIBUTE(_item) \
133     for (_item = FIB_PATH_OPER_ATTRIBUTE_FIRST; \
134          _item <= FIB_PATH_OPER_ATTRIBUTE_LAST; \
135          _item++)
136
137 /**
138  * Path flags from the attributes
139  */
140 typedef enum fib_path_oper_flags_t_ {
141     FIB_PATH_OPER_FLAG_NONE = 0,
142     FIB_PATH_OPER_FLAG_RECURSIVE_LOOP = (1 << FIB_PATH_OPER_ATTRIBUTE_RECURSIVE_LOOP),
143     FIB_PATH_OPER_FLAG_DROP = (1 << FIB_PATH_OPER_ATTRIBUTE_DROP),
144     FIB_PATH_OPER_FLAG_RESOLVED = (1 << FIB_PATH_OPER_ATTRIBUTE_RESOLVED),
145 } __attribute__ ((packed)) fib_path_oper_flags_t;
146
147 /**
148  * A FIB path
149  */
150 typedef struct fib_path_t_ {
151     /**
152      * A path is a node in the FIB graph.
153      */
154     fib_node_t fp_node;
155
156     /**
157      * The index of the path-list to which this path belongs
158      */
159     u32 fp_pl_index;
160
161     /**
162      * This marks the start of the memory area used to hash
163      * the path
164      */
165     STRUCT_MARK(path_hash_start);
166
167     /**
168      * Configuration Flags
169      */
170     fib_path_cfg_flags_t fp_cfg_flags;
171
172     /**
173      * The type of the path. This is the selector for the union
174      */
175     fib_path_type_t fp_type;
176
177     /**
178      * The protocol of the next-hop, i.e. the address family of the
179      * next-hop's address. We can't derive this from the address itself
180      * since the address can be all zeros
181      */
182     fib_protocol_t fp_nh_proto;
183
184     /**
185      * UCMP [unnormalised] weigt
186      */
187     u32 fp_weight;
188
189     /**
190      * per-type union of the data required to resolve the path
191      */
192     union {
193         struct {
194             /**
195              * The next-hop
196              */
197             ip46_address_t fp_nh;
198             /**
199              * The interface
200              */
201             u32 fp_interface;
202         } attached_next_hop;
203         struct {
204             /**
205              * The interface
206              */
207             u32 fp_interface;
208         } attached;
209         struct {
210             /**
211              * The next-hop
212              */
213             ip46_address_t fp_nh;
214             /**
215              * The FIB table index in which to find the next-hop.
216              * This needs to be fixed. We should lookup the adjacencies in
217              * a separate table of adjacencies, rather than from the FIB.
218              * Two reasons I can think of:
219              *   - consider:
220              *       int ip addr Gig0 10.0.0.1/24
221              *       ip route 10.0.0.2/32 via Gig1 192.168.1.2
222              *       ip route 1.1.1.1/32 via Gig0 10.0.0.2
223              *     this is perfectly valid.
224              *     Packets addressed to 10.0.0.2 should be sent via Gig1.
225              *     Packets address to 1.1.1.1 should be sent via Gig0.
226              *    when we perform the adj resolution from the FIB for the path
227              *    "via Gig0 10.0.0.2" the lookup will result in the route via Gig1
228              *    and so we will pick up the adj via Gig1 - which was not what the
229              *    operator wanted.
230              *  - we can only return link-type IPv4 and so not the link-type MPLS.
231              *    more on this in a later commit.
232              *
233              * The table ID should only belong to a recursive path and indicate
234              * which FIB should be used to resolve the next-hop.
235              */
236             fib_node_index_t fp_tbl_id;
237         } recursive;
238         struct {
239             /**
240              * The FIN index in which to perfom the next lookup
241              */
242             fib_node_index_t fp_tbl_id;
243         } deag;
244         struct {
245         } special;
246         struct {
247             /**
248              * The user provided 'exclusive' DPO
249              */
250             dpo_id_t fp_ex_dpo;
251         } exclusive;
252         struct {
253             /**
254              * The interface on which the local address is configured
255              */
256             u32 fp_interface;
257             /**
258              * The next-hop
259              */
260             ip46_address_t fp_addr;
261         } receive;
262     };
263     STRUCT_MARK(path_hash_end);
264
265     /**
266      * Memebers in this last section represent information that is
267      * dervied during resolution. It should not be copied to new paths
268      * nor compared.
269      */
270
271     /**
272      * Operational Flags
273      */
274     fib_path_oper_flags_t fp_oper_flags;
275
276     /**
277      * the resolving via fib. not part of the union, since it it not part
278      * of the path's hash.
279      */
280     fib_node_index_t fp_via_fib;
281
282     /**
283      * The Data-path objects through which this path resolves for IP.
284      */
285     dpo_id_t fp_dpo;
286
287     /**
288      * the index of this path in the parent's child list.
289      */
290     u32 fp_sibling;
291 } fib_path_t;
292
293 /*
294  * Array of strings/names for the path types and attributes
295  */
296 static const char *fib_path_type_names[] = FIB_PATH_TYPES;
297 static const char *fib_path_oper_attribute_names[] = FIB_PATH_OPER_ATTRIBUTES;
298 static const char *fib_path_cfg_attribute_names[]  = FIB_PATH_CFG_ATTRIBUTES;
299
300 /*
301  * The memory pool from which we allocate all the paths
302  */
303 static fib_path_t *fib_path_pool;
304
305 /*
306  * Debug macro
307  */
308 #ifdef FIB_DEBUG
309 #define FIB_PATH_DBG(_p, _fmt, _args...)                        \
310 {                                                               \
311     u8 *_tmp = NULL;                                            \
312     _tmp = fib_path_format(fib_path_get_index(_p), _tmp);       \
313     clib_warning("path:[%d:%s]:" _fmt,                          \
314                  fib_path_get_index(_p), _tmp,                  \
315                  ##_args);                                      \
316     vec_free(_tmp);                                             \
317 }
318 #else
319 #define FIB_PATH_DBG(_p, _fmt, _args...)
320 #endif
321
322 static fib_path_t *
323 fib_path_get (fib_node_index_t index)
324 {
325     return (pool_elt_at_index(fib_path_pool, index));
326 }
327
328 static fib_node_index_t 
329 fib_path_get_index (fib_path_t *path)
330 {
331     return (path - fib_path_pool);
332 }
333
334 static fib_node_t *
335 fib_path_get_node (fib_node_index_t index)
336 {
337     return ((fib_node_t*)fib_path_get(index));
338 }
339
340 static fib_path_t*
341 fib_path_from_fib_node (fib_node_t *node)
342 {
343 #if CLIB_DEBUG > 0
344     ASSERT(FIB_NODE_TYPE_PATH == node->fn_type);
345 #endif
346     return ((fib_path_t*)node);
347 }
348
349 u8 *
350 format_fib_path (u8 * s, va_list * args)
351 {
352     fib_path_t *path = va_arg (*args, fib_path_t *);
353     vnet_main_t * vnm = vnet_get_main();
354     fib_path_oper_attribute_t oattr;
355     fib_path_cfg_attribute_t cattr;
356
357     s = format (s, "      index:%d ", fib_path_get_index(path));
358     s = format (s, "pl-index:%d ", path->fp_pl_index);
359     s = format (s, "%U ", format_fib_protocol, path->fp_nh_proto);
360     s = format (s, "weight=%d ", path->fp_weight);
361     s = format (s, "%s: ", fib_path_type_names[path->fp_type]);
362     if (FIB_PATH_OPER_FLAG_NONE != path->fp_oper_flags) {
363         s = format(s, " oper-flags:");
364         FOR_EACH_FIB_PATH_OPER_ATTRIBUTE(oattr) {
365             if ((1<<oattr) & path->fp_oper_flags) {
366                 s = format (s, "%s,", fib_path_oper_attribute_names[oattr]);
367             }
368         }
369     }
370     if (FIB_PATH_CFG_FLAG_NONE != path->fp_cfg_flags) {
371         s = format(s, " cfg-flags:");
372         FOR_EACH_FIB_PATH_CFG_ATTRIBUTE(cattr) {
373             if ((1<<cattr) & path->fp_cfg_flags) {
374                 s = format (s, "%s,", fib_path_cfg_attribute_names[cattr]);
375             }
376         }
377     }
378     s = format(s, "\n       ");
379
380     switch (path->fp_type)
381     {
382     case FIB_PATH_TYPE_ATTACHED_NEXT_HOP:
383         s = format (s, "%U", format_ip46_address,
384                     &path->attached_next_hop.fp_nh,
385                     IP46_TYPE_ANY);
386         if (path->fp_oper_flags & FIB_PATH_OPER_FLAG_DROP)
387         {
388             s = format (s, " if_index:%d", path->attached_next_hop.fp_interface);
389         }
390         else
391         {
392             s = format (s, " %U",
393                         format_vnet_sw_interface_name,
394                         vnm,
395                         vnet_get_sw_interface(
396                             vnm,
397                             path->attached_next_hop.fp_interface));
398             if (vnet_sw_interface_is_p2p(vnet_get_main(),
399                                          path->attached_next_hop.fp_interface))
400             {
401                 s = format (s, " (p2p)");
402             }
403         }
404         if (!dpo_id_is_valid(&path->fp_dpo))
405         {
406             s = format(s, "\n          unresolved");
407         }
408         else
409         {
410             s = format(s, "\n          %U",
411                        format_dpo_id,
412                        &path->fp_dpo, 13);
413         }
414         break;
415     case FIB_PATH_TYPE_ATTACHED:
416         if (path->fp_oper_flags & FIB_PATH_OPER_FLAG_DROP)
417         {
418             s = format (s, " if_index:%d", path->attached_next_hop.fp_interface);
419         }
420         else
421         {
422             s = format (s, " %U",
423                         format_vnet_sw_interface_name,
424                         vnm,
425                         vnet_get_sw_interface(
426                             vnm,
427                             path->attached.fp_interface));
428         }
429         break;
430     case FIB_PATH_TYPE_RECURSIVE:
431         s = format (s, "via %U",
432                     format_ip46_address,
433                     &path->recursive.fp_nh,
434                     IP46_TYPE_ANY);
435         s = format (s, " in fib:%d", path->recursive.fp_tbl_id, path->fp_via_fib); 
436         s = format (s, " via-fib:%d", path->fp_via_fib); 
437         s = format (s, " via-dpo:[%U:%d]",
438                     format_dpo_type, path->fp_dpo.dpoi_type, 
439                     path->fp_dpo.dpoi_index);
440
441         break;
442     case FIB_PATH_TYPE_RECEIVE:
443     case FIB_PATH_TYPE_SPECIAL:
444     case FIB_PATH_TYPE_DEAG:
445     case FIB_PATH_TYPE_EXCLUSIVE:
446         if (dpo_id_is_valid(&path->fp_dpo))
447         {
448             s = format(s, "%U", format_dpo_id,
449                        &path->fp_dpo, 2);
450         }
451         break;
452     }
453     return (s);
454 }
455
456 u8 *
457 fib_path_format (fib_node_index_t pi, u8 *s)
458 {
459     fib_path_t *path;
460
461     path = fib_path_get(pi);
462     ASSERT(NULL != path);
463
464     return (format (s, "%U", format_fib_path, path));
465 }
466
467 u8 *
468 fib_path_adj_format (fib_node_index_t pi,
469                      u32 indent,
470                      u8 *s)
471 {
472     fib_path_t *path;
473
474     path = fib_path_get(pi);
475     ASSERT(NULL != path);
476
477     if (!dpo_id_is_valid(&path->fp_dpo))
478     {
479         s = format(s, " unresolved");
480     }
481     else
482     {
483         s = format(s, "%U", format_dpo_id,
484                    &path->fp_dpo, 2);
485     }
486
487     return (s);
488 }
489
490 /*
491  * fib_path_last_lock_gone
492  *
493  * We don't share paths, we share path lists, so the [un]lock functions
494  * are no-ops
495  */
496 static void
497 fib_path_last_lock_gone (fib_node_t *node)
498 {
499     ASSERT(0);
500 }
501
502 static const adj_index_t
503 fib_path_attached_next_hop_get_adj (fib_path_t *path,
504                                     vnet_link_t link)
505 {
506     if (vnet_sw_interface_is_p2p(vnet_get_main(),
507                                  path->attached_next_hop.fp_interface))
508     {
509         /*
510          * if the interface is p2p then the adj for the specific
511          * neighbour on that link will never exist. on p2p links
512          * the subnet address (the attached route) links to the
513          * auto-adj (see below), we want that adj here too.
514          */
515         return (adj_nbr_add_or_lock(path->fp_nh_proto,
516                                     link,
517                                     &zero_addr,
518                                     path->attached_next_hop.fp_interface));
519     }
520     else
521     {
522         return (adj_nbr_add_or_lock(path->fp_nh_proto,
523                                     link,
524                                     &path->attached_next_hop.fp_nh,
525                                     path->attached_next_hop.fp_interface));
526     }
527 }
528
529 static void
530 fib_path_attached_next_hop_set (fib_path_t *path)
531 {
532     /*
533      * resolve directly via the adjacnecy discribed by the
534      * interface and next-hop
535      */
536     if (!vnet_sw_interface_is_admin_up(vnet_get_main(),
537                                       path->attached_next_hop.fp_interface))
538     {
539         path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RESOLVED;
540     }
541
542     dpo_set(&path->fp_dpo,
543             DPO_ADJACENCY,
544             fib_proto_to_dpo(path->fp_nh_proto),
545             fib_path_attached_next_hop_get_adj(
546                  path,
547                  fib_proto_to_link(path->fp_nh_proto)));
548
549     /*
550      * become a child of the adjacency so we receive updates
551      * when its rewrite changes
552      */
553     path->fp_sibling = adj_child_add(path->fp_dpo.dpoi_index,
554                                      FIB_NODE_TYPE_PATH,
555                                      fib_path_get_index(path));
556 }
557
558 /*
559  * create of update the paths recursive adj
560  */
561 static void
562 fib_path_recursive_adj_update (fib_path_t *path,
563                                fib_forward_chain_type_t fct,
564                                dpo_id_t *dpo)
565 {
566     dpo_id_t via_dpo = DPO_INVALID;
567
568     /*
569      * get the DPO to resolve through from the via-entry
570      */
571     fib_entry_contribute_forwarding(path->fp_via_fib,
572                                     fct,
573                                     &via_dpo);
574
575
576     /*
577      * hope for the best - clear if restrictions apply.
578      */
579     path->fp_oper_flags |= FIB_PATH_OPER_FLAG_RESOLVED;
580
581     /*
582      * Validate any recursion constraints and over-ride the via
583      * adj if not met
584      */
585     if (path->fp_oper_flags & FIB_PATH_OPER_FLAG_RECURSIVE_LOOP)
586     {
587         path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RESOLVED;
588         dpo_copy(&via_dpo, drop_dpo_get(fib_proto_to_dpo(path->fp_nh_proto)));
589     }
590     else if (path->fp_cfg_flags & FIB_PATH_CFG_FLAG_RESOLVE_HOST)
591     {
592         /*
593          * the via FIB must be a host route.
594          * note the via FIB just added will always be a host route
595          * since it is an RR source added host route. So what we need to
596          * check is whether the route has other sources. If it does then
597          * some other source has added it as a host route. If it doesn't
598          * then it was added only here and inherits forwarding from a cover.
599          * the cover is not a host route.
600          * The RR source is the lowest priority source, so we check if it
601          * is the best. if it is there are no other sources.
602          */
603         if (fib_entry_get_best_source(path->fp_via_fib) >= FIB_SOURCE_RR)
604         {
605             path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RESOLVED;
606             dpo_copy(&via_dpo, drop_dpo_get(fib_proto_to_dpo(path->fp_nh_proto)));
607
608             /*
609              * PIC edge trigger. let the load-balance maps know
610              */
611             load_balance_map_path_state_change(fib_path_get_index(path));
612         }
613     }
614     else if (path->fp_cfg_flags & FIB_PATH_CFG_FLAG_RESOLVE_ATTACHED)
615     {
616         /*
617          * RR source entries inherit the flags from the cover, so
618          * we can check the via directly
619          */
620         if (!(FIB_ENTRY_FLAG_ATTACHED & fib_entry_get_flags(path->fp_via_fib)))
621         {
622             path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RESOLVED;
623             dpo_copy(&via_dpo, drop_dpo_get(fib_proto_to_dpo(path->fp_nh_proto)));
624
625             /*
626              * PIC edge trigger. let the load-balance maps know
627              */
628             load_balance_map_path_state_change(fib_path_get_index(path));
629         }
630     }
631
632     /*
633      * update the path's contributed DPO
634      */
635     dpo_copy(dpo, &via_dpo);
636
637     FIB_PATH_DBG(path, "recursive update: %U",
638                  fib_get_lookup_main(path->fp_nh_proto),
639                  &path->fp_dpo, 2);
640
641     dpo_reset(&via_dpo);
642 }
643
644 /*
645  * fib_path_is_permanent_drop
646  *
647  * Return !0 if the path is configured to permanently drop,
648  * despite other attributes.
649  */
650 static int
651 fib_path_is_permanent_drop (fib_path_t *path)
652 {
653     return ((path->fp_cfg_flags & FIB_PATH_CFG_FLAG_DROP) ||
654             (path->fp_oper_flags & FIB_PATH_OPER_FLAG_DROP));
655 }
656
657 /*
658  * fib_path_unresolve
659  *
660  * Remove our dependency on the resolution target
661  */
662 static void
663 fib_path_unresolve (fib_path_t *path)
664 {
665     /*
666      * the forced drop path does not need unresolving
667      */
668     if (fib_path_is_permanent_drop(path))
669     {
670         return;
671     }
672
673     switch (path->fp_type)
674     {
675     case FIB_PATH_TYPE_RECURSIVE:
676         if (FIB_NODE_INDEX_INVALID != path->fp_via_fib)
677         {
678             fib_prefix_t pfx;
679
680             fib_prefix_from_ip46_addr(&path->recursive.fp_nh, &pfx);
681             fib_entry_child_remove(path->fp_via_fib,
682                                    path->fp_sibling);
683             fib_table_entry_special_remove(path->recursive.fp_tbl_id,
684                                            &pfx,
685                                            FIB_SOURCE_RR);
686             path->fp_via_fib = FIB_NODE_INDEX_INVALID;
687         }
688         break;
689     case FIB_PATH_TYPE_ATTACHED_NEXT_HOP:
690     case FIB_PATH_TYPE_ATTACHED:
691         adj_child_remove(path->fp_dpo.dpoi_index,
692                          path->fp_sibling);
693         adj_unlock(path->fp_dpo.dpoi_index);
694         break;
695     case FIB_PATH_TYPE_EXCLUSIVE:
696         dpo_reset(&path->exclusive.fp_ex_dpo);
697         break;
698     case FIB_PATH_TYPE_SPECIAL:
699     case FIB_PATH_TYPE_RECEIVE:
700     case FIB_PATH_TYPE_DEAG:
701         /*
702          * these hold only the path's DPO, which is reset below.
703          */
704         break;
705     }
706
707     /*
708      * release the adj we were holding and pick up the
709      * drop just in case.
710      */
711     dpo_reset(&path->fp_dpo);
712     path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RESOLVED;
713
714     return;
715 }
716
717 static fib_forward_chain_type_t
718 fib_path_proto_to_chain_type (fib_protocol_t proto)
719 {
720     switch (proto)
721     {
722     case FIB_PROTOCOL_IP4:
723         return (FIB_FORW_CHAIN_TYPE_UNICAST_IP4);
724     case FIB_PROTOCOL_IP6:
725         return (FIB_FORW_CHAIN_TYPE_UNICAST_IP6);
726     case FIB_PROTOCOL_MPLS:
727         return (FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS);
728     }
729     return (FIB_FORW_CHAIN_TYPE_UNICAST_IP4);
730 }
731
732 /*
733  * fib_path_back_walk_notify
734  *
735  * A back walk has reach this path.
736  */
737 static fib_node_back_walk_rc_t
738 fib_path_back_walk_notify (fib_node_t *node,
739                            fib_node_back_walk_ctx_t *ctx)
740 {
741     fib_path_t *path;
742
743     path = fib_path_from_fib_node(node);
744
745     switch (path->fp_type)
746     {
747     case FIB_PATH_TYPE_RECURSIVE:
748         if (FIB_NODE_BW_REASON_FLAG_EVALUATE & ctx->fnbw_reason)
749         {
750             /*
751              * modify the recursive adjacency to use the new forwarding
752              * of the via-fib.
753              * this update is visible to packets in flight in the DP.
754              */
755             fib_path_recursive_adj_update(
756                 path,
757                 fib_path_proto_to_chain_type(path->fp_nh_proto),
758                 &path->fp_dpo);
759         }
760         if ((FIB_NODE_BW_REASON_FLAG_ADJ_UPDATE & ctx->fnbw_reason) ||
761             (FIB_NODE_BW_REASON_FLAG_ADJ_DOWN   & ctx->fnbw_reason))
762         {
763             /*
764              * ADJ updates (complete<->incomplete) do not need to propagate to
765              * recursive entries.
766              * The only reason its needed as far back as here, is that the adj
767              * and the incomplete adj are a different DPO type, so the LBs need
768              * to re-stack.
769              * If this walk was quashed in the fib_entry, then any non-fib_path
770              * children (like tunnels that collapse out the LB when they stack)
771              * would not see the update.
772              */
773             return (FIB_NODE_BACK_WALK_CONTINUE);
774         }
775         break;
776     case FIB_PATH_TYPE_ATTACHED_NEXT_HOP:
777         /*
778 FIXME comment
779          * ADJ_UPDATE backwalk pass silently through here and up to
780          * the path-list when the multipath adj collapse occurs.
781          * The reason we do this is that the assumtption is that VPP
782          * runs in an environment where the Control-Plane is remote
783          * and hence reacts slowly to link up down. In order to remove
784          * this down link from the ECMP set quickly, we back-walk.
785          * VPP also has dedicated CPUs, so we are not stealing resources
786          * from the CP to do so.
787          */
788         if (FIB_NODE_BW_REASON_FLAG_INTERFACE_UP & ctx->fnbw_reason)
789         {
790             path->fp_oper_flags |= FIB_PATH_OPER_FLAG_RESOLVED;
791         }
792         if (FIB_NODE_BW_REASON_FLAG_INTERFACE_DOWN & ctx->fnbw_reason)
793         {
794             path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RESOLVED;
795         }
796         if (FIB_NODE_BW_REASON_FLAG_INTERFACE_DELETE & ctx->fnbw_reason)
797         {
798             /*
799              * The interface this path resolves through has been deleted.
800              * This will leave the path in a permanent drop state. The route
801              * needs to be removed and readded (and hence the path-list deleted)
802              * before it can forward again.
803              */
804             fib_path_unresolve(path);
805             path->fp_oper_flags |= FIB_PATH_OPER_FLAG_DROP;
806         }
807         if (FIB_NODE_BW_REASON_FLAG_ADJ_UPDATE & ctx->fnbw_reason)
808         {
809             /*
810              * restack the DPO to pick up the correct DPO sub-type
811              */
812             adj_index_t ai;
813
814             if (vnet_sw_interface_is_admin_up(vnet_get_main(),
815                                               path->attached_next_hop.fp_interface))
816             {
817                 path->fp_oper_flags |= FIB_PATH_OPER_FLAG_RESOLVED;
818             }
819
820             ai = fib_path_attached_next_hop_get_adj(
821                      path,
822                      fib_proto_to_link(path->fp_nh_proto));
823
824             dpo_set(&path->fp_dpo, DPO_ADJACENCY,
825                     fib_proto_to_dpo(path->fp_nh_proto),
826                     ai);
827             adj_unlock(ai);
828         }
829         if (FIB_NODE_BW_REASON_FLAG_ADJ_DOWN & ctx->fnbw_reason)
830         {
831             /*
832              * the adj has gone down. the path is no longer resolved.
833              */
834             path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RESOLVED;
835         }
836         break;
837     case FIB_PATH_TYPE_ATTACHED:
838         /*
839          * FIXME; this could schedule a lower priority walk, since attached
840          * routes are not usually in ECMP configurations so the backwalk to
841          * the FIB entry does not need to be high priority
842          */
843         if (FIB_NODE_BW_REASON_FLAG_INTERFACE_UP & ctx->fnbw_reason)
844         {
845             path->fp_oper_flags |= FIB_PATH_OPER_FLAG_RESOLVED;
846         }
847         if (FIB_NODE_BW_REASON_FLAG_INTERFACE_DOWN & ctx->fnbw_reason)
848         {
849             path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RESOLVED;
850         }
851         if (FIB_NODE_BW_REASON_FLAG_INTERFACE_DELETE & ctx->fnbw_reason)
852         {
853             fib_path_unresolve(path);
854             path->fp_oper_flags |= FIB_PATH_OPER_FLAG_DROP;
855         }
856         break;
857     case FIB_PATH_TYPE_DEAG:
858         /*
859          * FIXME When VRF delete is allowed this will need a poke.
860          */
861     case FIB_PATH_TYPE_SPECIAL:
862     case FIB_PATH_TYPE_RECEIVE:
863     case FIB_PATH_TYPE_EXCLUSIVE:
864         /*
865          * these path types have no parents. so to be
866          * walked from one is unexpected.
867          */
868         ASSERT(0);
869         break;
870     }
871
872     /*
873      * propagate the backwalk further to the path-list
874      */
875     fib_path_list_back_walk(path->fp_pl_index, ctx);
876
877     return (FIB_NODE_BACK_WALK_CONTINUE);
878 }
879
880 static void
881 fib_path_memory_show (void)
882 {
883     fib_show_memory_usage("Path",
884                           pool_elts(fib_path_pool),
885                           pool_len(fib_path_pool),
886                           sizeof(fib_path_t));
887 }
888
889 /*
890  * The FIB path's graph node virtual function table
891  */
892 static const fib_node_vft_t fib_path_vft = {
893     .fnv_get = fib_path_get_node,
894     .fnv_last_lock = fib_path_last_lock_gone,
895     .fnv_back_walk = fib_path_back_walk_notify,
896     .fnv_mem_show = fib_path_memory_show,
897 };
898
899 static fib_path_cfg_flags_t
900 fib_path_route_flags_to_cfg_flags (const fib_route_path_t *rpath)
901 {
902     fib_path_cfg_flags_t cfg_flags = FIB_PATH_CFG_FLAG_NONE;
903
904     if (rpath->frp_flags & FIB_ROUTE_PATH_RESOLVE_VIA_HOST)
905         cfg_flags |= FIB_PATH_CFG_FLAG_RESOLVE_HOST;
906     if (rpath->frp_flags & FIB_ROUTE_PATH_RESOLVE_VIA_ATTACHED)
907         cfg_flags |= FIB_PATH_CFG_FLAG_RESOLVE_ATTACHED;
908
909     return (cfg_flags);
910 }
911
912 /*
913  * fib_path_create
914  *
915  * Create and initialise a new path object.
916  * return the index of the path.
917  */
918 fib_node_index_t
919 fib_path_create (fib_node_index_t pl_index,
920                  fib_protocol_t nh_proto,
921                  fib_path_cfg_flags_t flags,
922                  const fib_route_path_t *rpath)
923 {
924     fib_path_t *path;
925
926     pool_get(fib_path_pool, path);
927     memset(path, 0, sizeof(*path));
928
929     fib_node_init(&path->fp_node,
930                   FIB_NODE_TYPE_PATH);
931
932     dpo_reset(&path->fp_dpo);
933     path->fp_pl_index = pl_index;
934     path->fp_nh_proto = nh_proto;
935     path->fp_via_fib = FIB_NODE_INDEX_INVALID;
936     path->fp_weight = rpath->frp_weight;
937     if (0 == path->fp_weight)
938     {
939         /*
940          * a weight of 0 is a meaningless value. We could either reject it, and thus force
941          * clients to always use 1, or we can accept it and fixup approrpiately.
942          */
943         path->fp_weight = 1;
944     }
945     path->fp_cfg_flags = flags;
946     path->fp_cfg_flags |= fib_path_route_flags_to_cfg_flags(rpath);
947
948     /*
949      * deduce the path's tpye from the parementers and save what is needed.
950      */
951     if (~0 != rpath->frp_sw_if_index)
952     {
953         if (flags & FIB_PATH_CFG_FLAG_LOCAL)
954         {
955             path->fp_type = FIB_PATH_TYPE_RECEIVE;
956             path->receive.fp_interface = rpath->frp_sw_if_index;
957             path->receive.fp_addr = rpath->frp_addr;
958         }
959         else
960         {
961             if (ip46_address_is_zero(&rpath->frp_addr))
962             {
963                 path->fp_type = FIB_PATH_TYPE_ATTACHED;
964                 path->attached.fp_interface = rpath->frp_sw_if_index;
965             }
966             else
967             {
968                 path->fp_type = FIB_PATH_TYPE_ATTACHED_NEXT_HOP;
969                 path->attached_next_hop.fp_interface = rpath->frp_sw_if_index;
970                 path->attached_next_hop.fp_nh = rpath->frp_addr;
971             }
972         }
973     }
974     else
975     {
976         if (ip46_address_is_zero(&rpath->frp_addr))
977         {
978             if (~0 == rpath->frp_fib_index)
979             {
980                 path->fp_type = FIB_PATH_TYPE_SPECIAL;
981             }
982             else
983             {
984                 path->fp_type = FIB_PATH_TYPE_DEAG;
985                 path->deag.fp_tbl_id = rpath->frp_fib_index;
986             }           
987         }
988         else
989         {
990             path->fp_type = FIB_PATH_TYPE_RECURSIVE;
991             path->recursive.fp_nh = rpath->frp_addr;
992             path->recursive.fp_tbl_id = rpath->frp_fib_index;
993         }
994     }
995
996     FIB_PATH_DBG(path, "create");
997
998     return (fib_path_get_index(path));
999 }
1000
1001 /*
1002  * fib_path_create_special
1003  *
1004  * Create and initialise a new path object.
1005  * return the index of the path.
1006  */
1007 fib_node_index_t
1008 fib_path_create_special (fib_node_index_t pl_index,
1009                          fib_protocol_t nh_proto,
1010                          fib_path_cfg_flags_t flags,
1011                          const dpo_id_t *dpo)
1012 {
1013     fib_path_t *path;
1014
1015     pool_get(fib_path_pool, path);
1016     memset(path, 0, sizeof(*path));
1017
1018     fib_node_init(&path->fp_node,
1019                   FIB_NODE_TYPE_PATH);
1020     dpo_reset(&path->fp_dpo);
1021
1022     path->fp_pl_index = pl_index;
1023     path->fp_weight = 1;
1024     path->fp_nh_proto = nh_proto;
1025     path->fp_via_fib = FIB_NODE_INDEX_INVALID;
1026     path->fp_cfg_flags = flags;
1027
1028     if (FIB_PATH_CFG_FLAG_DROP & flags)
1029     {
1030         path->fp_type = FIB_PATH_TYPE_SPECIAL;
1031     }
1032     else if (FIB_PATH_CFG_FLAG_LOCAL & flags)
1033     {
1034         path->fp_type = FIB_PATH_TYPE_RECEIVE;
1035         path->attached.fp_interface = FIB_NODE_INDEX_INVALID;
1036     }
1037     else
1038     {
1039         path->fp_type = FIB_PATH_TYPE_EXCLUSIVE;
1040         ASSERT(NULL != dpo);
1041         dpo_copy(&path->exclusive.fp_ex_dpo, dpo);
1042     }
1043
1044     return (fib_path_get_index(path));
1045 }
1046
1047 /*
1048  * fib_path_copy
1049  *
1050  * Copy a path. return index of new path.
1051  */
1052 fib_node_index_t
1053 fib_path_copy (fib_node_index_t path_index,
1054                fib_node_index_t path_list_index)
1055 {
1056     fib_path_t *path, *orig_path;
1057
1058     pool_get(fib_path_pool, path);
1059
1060     orig_path = fib_path_get(path_index);
1061     ASSERT(NULL != orig_path);
1062
1063     memcpy(path, orig_path, sizeof(*path));
1064
1065     FIB_PATH_DBG(path, "create-copy:%d", path_index);
1066
1067     /*
1068      * reset the dynamic section
1069      */
1070     fib_node_init(&path->fp_node, FIB_NODE_TYPE_PATH);
1071     path->fp_oper_flags     = FIB_PATH_OPER_FLAG_NONE;
1072     path->fp_pl_index  = path_list_index;
1073     path->fp_via_fib   = FIB_NODE_INDEX_INVALID;
1074     memset(&path->fp_dpo, 0, sizeof(path->fp_dpo));
1075     dpo_reset(&path->fp_dpo);
1076
1077     return (fib_path_get_index(path));
1078 }
1079
1080 /*
1081  * fib_path_destroy
1082  *
1083  * destroy a path that is no longer required
1084  */
1085 void
1086 fib_path_destroy (fib_node_index_t path_index)
1087 {
1088     fib_path_t *path;
1089
1090     path = fib_path_get(path_index);
1091
1092     ASSERT(NULL != path);
1093     FIB_PATH_DBG(path, "destroy");
1094
1095     fib_path_unresolve(path);
1096
1097     fib_node_deinit(&path->fp_node);
1098     pool_put(fib_path_pool, path);
1099 }
1100
1101 /*
1102  * fib_path_destroy
1103  *
1104  * destroy a path that is no longer required
1105  */
1106 uword
1107 fib_path_hash (fib_node_index_t path_index)
1108 {
1109     fib_path_t *path;
1110
1111     path = fib_path_get(path_index);
1112
1113     return (hash_memory(STRUCT_MARK_PTR(path, path_hash_start),
1114                         (STRUCT_OFFSET_OF(fib_path_t, path_hash_end) -
1115                          STRUCT_OFFSET_OF(fib_path_t, path_hash_start)),
1116                         0));
1117 }
1118
1119 /*
1120  * fib_path_cmp_i
1121  *
1122  * Compare two paths for equivalence.
1123  */
1124 static int
1125 fib_path_cmp_i (const fib_path_t *path1,
1126                 const fib_path_t *path2)
1127 {
1128     int res;
1129
1130     res = 1;
1131
1132     /*
1133      * paths of different types and protocol are not equal.
1134      * different weights only are the same path.
1135      */
1136     if (path1->fp_type != path2->fp_type)
1137     {
1138         res = (path1->fp_type - path2->fp_type);
1139     }
1140     if (path1->fp_nh_proto != path2->fp_nh_proto)
1141     {
1142         res = (path1->fp_nh_proto - path2->fp_nh_proto);
1143     }
1144     else
1145     {
1146         /*
1147          * both paths are of the same type.
1148          * consider each type and its attributes in turn.
1149          */
1150         switch (path1->fp_type)
1151         {
1152         case FIB_PATH_TYPE_ATTACHED_NEXT_HOP:
1153             res = ip46_address_cmp(&path1->attached_next_hop.fp_nh,
1154                                    &path2->attached_next_hop.fp_nh);
1155             if (0 == res) {
1156                 res = vnet_sw_interface_compare(
1157                           vnet_get_main(),
1158                           path1->attached_next_hop.fp_interface,
1159                           path2->attached_next_hop.fp_interface);
1160             }
1161             break;
1162         case FIB_PATH_TYPE_ATTACHED:
1163             res = vnet_sw_interface_compare(
1164                       vnet_get_main(),
1165                       path1->attached.fp_interface,
1166                       path2->attached.fp_interface);
1167             break;
1168         case FIB_PATH_TYPE_RECURSIVE:
1169             res = ip46_address_cmp(&path1->recursive.fp_nh,
1170                                    &path2->recursive.fp_nh);
1171  
1172             if (0 == res)
1173             {
1174                 res = (path1->recursive.fp_tbl_id - path2->recursive.fp_tbl_id);
1175             }
1176             break;
1177         case FIB_PATH_TYPE_DEAG:
1178             res = (path1->deag.fp_tbl_id - path2->deag.fp_tbl_id);
1179             break;
1180         case FIB_PATH_TYPE_SPECIAL:
1181         case FIB_PATH_TYPE_RECEIVE:
1182         case FIB_PATH_TYPE_EXCLUSIVE:
1183             res = 0;
1184             break;
1185         }
1186     }
1187     return (res);
1188 }
1189
1190 /*
1191  * fib_path_cmp_for_sort
1192  *
1193  * Compare two paths for equivalence. Used during path sorting.
1194  * As usual 0 means equal.
1195  */
1196 int
1197 fib_path_cmp_for_sort (void * v1,
1198                        void * v2)
1199 {
1200     fib_node_index_t *pi1 = v1, *pi2 = v2;
1201     fib_path_t *path1, *path2;
1202
1203     path1 = fib_path_get(*pi1);
1204     path2 = fib_path_get(*pi2);
1205
1206     return (fib_path_cmp_i(path1, path2));
1207 }
1208
1209 /*
1210  * fib_path_cmp
1211  *
1212  * Compare two paths for equivalence.
1213  */
1214 int
1215 fib_path_cmp (fib_node_index_t pi1,
1216               fib_node_index_t pi2)
1217 {
1218     fib_path_t *path1, *path2;
1219
1220     path1 = fib_path_get(pi1);
1221     path2 = fib_path_get(pi2);
1222
1223     return (fib_path_cmp_i(path1, path2));
1224 }
1225
1226 int
1227 fib_path_cmp_w_route_path (fib_node_index_t path_index,
1228                            const fib_route_path_t *rpath)
1229 {
1230     fib_path_t *path;
1231     int res;
1232
1233     path = fib_path_get(path_index);
1234
1235     res = 1;
1236
1237     if (path->fp_weight != rpath->frp_weight)
1238     {
1239         res = (path->fp_weight - rpath->frp_weight);
1240     }
1241     else
1242     {
1243         /*
1244          * both paths are of the same type.
1245          * consider each type and its attributes in turn.
1246          */
1247         switch (path->fp_type)
1248         {
1249         case FIB_PATH_TYPE_ATTACHED_NEXT_HOP:
1250             res = ip46_address_cmp(&path->attached_next_hop.fp_nh,
1251                                    &rpath->frp_addr);
1252             if (0 == res)
1253             {
1254                 res = vnet_sw_interface_compare(
1255                           vnet_get_main(),
1256                           path->attached_next_hop.fp_interface,
1257                           rpath->frp_sw_if_index);
1258             }
1259             break;
1260         case FIB_PATH_TYPE_ATTACHED:
1261             res = vnet_sw_interface_compare(
1262                       vnet_get_main(),
1263                       path->attached.fp_interface,
1264                       rpath->frp_sw_if_index);
1265             break;
1266         case FIB_PATH_TYPE_RECURSIVE:
1267             res = ip46_address_cmp(&path->recursive.fp_nh,
1268                                    &rpath->frp_addr);
1269  
1270             if (0 == res)
1271             {
1272                 res = (path->recursive.fp_tbl_id - rpath->frp_fib_index);
1273             }
1274             break;
1275         case FIB_PATH_TYPE_DEAG:
1276             res = (path->deag.fp_tbl_id - rpath->frp_fib_index);
1277             break;
1278         case FIB_PATH_TYPE_SPECIAL:
1279         case FIB_PATH_TYPE_RECEIVE:
1280         case FIB_PATH_TYPE_EXCLUSIVE:
1281             res = 0;
1282             break;
1283         }
1284     }
1285     return (res);
1286 }
1287
1288 /*
1289  * fib_path_recursive_loop_detect
1290  *
1291  * A forward walk of the FIB object graph to detect for a cycle/loop. This
1292  * walk is initiated when an entry is linking to a new path list or from an old.
1293  * The entry vector passed contains all the FIB entrys that are children of this
1294  * path (it is all the entries encountered on the walk so far). If this vector
1295  * contains the entry this path resolve via, then a loop is about to form.
1296  * The loop must be allowed to form, since we need the dependencies in place
1297  * so that we can track when the loop breaks.
1298  * However, we MUST not produce a loop in the forwarding graph (else packets
1299  * would loop around the switch path until the loop breaks), so we mark recursive
1300  * paths as looped so that they do not contribute forwarding information.
1301  * By marking the path as looped, an etry such as;
1302  *    X/Y
1303  *     via a.a.a.a (looped)
1304  *     via b.b.b.b (not looped)
1305  * can still forward using the info provided by b.b.b.b only
1306  */
1307 int
1308 fib_path_recursive_loop_detect (fib_node_index_t path_index,
1309                                 fib_node_index_t **entry_indicies)
1310 {
1311     fib_path_t *path;
1312
1313     path = fib_path_get(path_index);
1314
1315     /*
1316      * the forced drop path is never looped, cos it is never resolved.
1317      */
1318     if (fib_path_is_permanent_drop(path))
1319     {
1320         return (0);
1321     }
1322
1323     switch (path->fp_type)
1324     {
1325     case FIB_PATH_TYPE_RECURSIVE:
1326     {
1327         fib_node_index_t *entry_index, *entries;
1328         int looped = 0;
1329         entries = *entry_indicies;
1330
1331         vec_foreach(entry_index, entries) {
1332             if (*entry_index == path->fp_via_fib)
1333             {
1334                 /*
1335                  * the entry that is about to link to this path-list (or
1336                  * one of this path-list's children) is the same entry that
1337                  * this recursive path resolves through. this is a cycle.
1338                  * abort the walk.
1339                  */
1340                 looped = 1;
1341                 break;
1342             }
1343         }
1344
1345         if (looped)
1346         {
1347             FIB_PATH_DBG(path, "recursive loop formed");
1348             path->fp_oper_flags |= FIB_PATH_OPER_FLAG_RECURSIVE_LOOP;
1349
1350             dpo_copy(&path->fp_dpo,
1351                     drop_dpo_get(fib_proto_to_dpo(path->fp_nh_proto)));
1352         }
1353         else
1354         {
1355             /*
1356              * no loop here yet. keep forward walking the graph.
1357              */     
1358             if (fib_entry_recursive_loop_detect(path->fp_via_fib, entry_indicies))
1359             {
1360                 FIB_PATH_DBG(path, "recursive loop formed");
1361                 path->fp_oper_flags |= FIB_PATH_OPER_FLAG_RECURSIVE_LOOP;
1362             }
1363             else
1364             {
1365                 FIB_PATH_DBG(path, "recursive loop cleared");
1366                 path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RECURSIVE_LOOP;
1367             }
1368         }
1369         break;
1370     }
1371     case FIB_PATH_TYPE_ATTACHED_NEXT_HOP:
1372     case FIB_PATH_TYPE_ATTACHED:
1373     case FIB_PATH_TYPE_SPECIAL:
1374     case FIB_PATH_TYPE_DEAG:
1375     case FIB_PATH_TYPE_RECEIVE:
1376     case FIB_PATH_TYPE_EXCLUSIVE:
1377         /*
1378          * these path types cannot be part of a loop, since they are the leaves
1379          * of the graph.
1380          */
1381         break;
1382     }
1383
1384     return (fib_path_is_looped(path_index));
1385 }
1386
1387 int
1388 fib_path_resolve (fib_node_index_t path_index)
1389 {
1390     fib_path_t *path;
1391
1392     path = fib_path_get(path_index);
1393
1394     /*
1395      * hope for the best.
1396      */
1397     path->fp_oper_flags |= FIB_PATH_OPER_FLAG_RESOLVED;
1398
1399     /*
1400      * the forced drop path resolves via the drop adj
1401      */
1402     if (fib_path_is_permanent_drop(path))
1403     {
1404         dpo_copy(&path->fp_dpo,
1405                  drop_dpo_get(fib_proto_to_dpo(path->fp_nh_proto)));
1406         path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RESOLVED;
1407         return (fib_path_is_resolved(path_index));
1408     }
1409
1410     switch (path->fp_type)
1411     {
1412     case FIB_PATH_TYPE_ATTACHED_NEXT_HOP:
1413         fib_path_attached_next_hop_set(path);
1414         break;
1415     case FIB_PATH_TYPE_ATTACHED:
1416         /*
1417          * path->attached.fp_interface
1418          */
1419         if (!vnet_sw_interface_is_admin_up(vnet_get_main(),
1420                                            path->attached.fp_interface))
1421         {
1422             path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RESOLVED;
1423         }
1424         if (vnet_sw_interface_is_p2p(vnet_get_main(),
1425                                      path->attached.fp_interface))
1426         {
1427             /*
1428              * point-2-point interfaces do not require a glean, since
1429              * there is nothing to ARP. Install a rewrite/nbr adj instead
1430              */
1431             dpo_set(&path->fp_dpo,
1432                     DPO_ADJACENCY,
1433                     fib_proto_to_dpo(path->fp_nh_proto),
1434                     adj_nbr_add_or_lock(
1435                         path->fp_nh_proto,
1436                         fib_proto_to_link(path->fp_nh_proto),
1437                         &zero_addr,
1438                         path->attached.fp_interface));
1439         }
1440         else
1441         {
1442             dpo_set(&path->fp_dpo,
1443                     DPO_ADJACENCY_GLEAN,
1444                     fib_proto_to_dpo(path->fp_nh_proto),
1445                     adj_glean_add_or_lock(path->fp_nh_proto,
1446                                           path->attached.fp_interface,
1447                                           NULL));
1448         }
1449         /*
1450          * become a child of the adjacency so we receive updates
1451          * when the interface state changes
1452          */
1453         path->fp_sibling = adj_child_add(path->fp_dpo.dpoi_index,
1454                                          FIB_NODE_TYPE_PATH,
1455                                          fib_path_get_index(path));
1456
1457         break;
1458     case FIB_PATH_TYPE_RECURSIVE:
1459     {
1460         /*
1461          * Create a RR source entry in the table for the address
1462          * that this path recurses through.
1463          * This resolve action is recursive, hence we may create
1464          * more paths in the process. more creates mean maybe realloc
1465          * of this path.
1466          */
1467         fib_node_index_t fei;
1468         fib_prefix_t pfx;
1469
1470         ASSERT(FIB_NODE_INDEX_INVALID == path->fp_via_fib);
1471
1472         fib_prefix_from_ip46_addr(&path->recursive.fp_nh, &pfx);
1473
1474         fei = fib_table_entry_special_add(path->recursive.fp_tbl_id,
1475                                           &pfx,
1476                                           FIB_SOURCE_RR,
1477                                           FIB_ENTRY_FLAG_NONE,
1478                                           ADJ_INDEX_INVALID);
1479
1480         path = fib_path_get(path_index);
1481         path->fp_via_fib = fei;
1482
1483         /*
1484          * become a dependent child of the entry so the path is 
1485          * informed when the forwarding for the entry changes.
1486          */
1487         path->fp_sibling = fib_entry_child_add(path->fp_via_fib,
1488                                                FIB_NODE_TYPE_PATH,
1489                                                fib_path_get_index(path));
1490
1491         /*
1492          * create and configure the IP DPO
1493          */
1494         fib_path_recursive_adj_update(
1495             path,
1496             fib_path_proto_to_chain_type(path->fp_nh_proto),
1497             &path->fp_dpo);
1498
1499         break;
1500     }
1501     case FIB_PATH_TYPE_SPECIAL:
1502         /*
1503          * Resolve via the drop
1504          */
1505         dpo_copy(&path->fp_dpo,
1506                  drop_dpo_get(fib_proto_to_dpo(path->fp_nh_proto)));
1507         break;
1508     case FIB_PATH_TYPE_DEAG:
1509         /*
1510          * Resolve via a lookup DPO.
1511          * FIXME. control plane should add routes with a table ID
1512          */
1513         lookup_dpo_add_or_lock_w_fib_index(path->deag.fp_tbl_id,
1514                                           fib_proto_to_dpo(path->fp_nh_proto),
1515                                           LOOKUP_INPUT_DST_ADDR,
1516                                           LOOKUP_TABLE_FROM_CONFIG,
1517                                           &path->fp_dpo);
1518         break;
1519     case FIB_PATH_TYPE_RECEIVE:
1520         /*
1521          * Resolve via a receive DPO.
1522          */
1523         receive_dpo_add_or_lock(fib_proto_to_dpo(path->fp_nh_proto),
1524                                 path->receive.fp_interface,
1525                                 &path->receive.fp_addr,
1526                                 &path->fp_dpo);
1527         break;
1528     case FIB_PATH_TYPE_EXCLUSIVE:
1529         /*
1530          * Resolve via the user provided DPO
1531          */
1532         dpo_copy(&path->fp_dpo, &path->exclusive.fp_ex_dpo);
1533         break;
1534     }
1535
1536     return (fib_path_is_resolved(path_index));
1537 }
1538
1539 u32
1540 fib_path_get_resolving_interface (fib_node_index_t path_index)
1541 {
1542     fib_path_t *path;
1543
1544     path = fib_path_get(path_index);
1545
1546     switch (path->fp_type)
1547     {
1548     case FIB_PATH_TYPE_ATTACHED_NEXT_HOP:
1549         return (path->attached_next_hop.fp_interface);
1550     case FIB_PATH_TYPE_ATTACHED:
1551         return (path->attached.fp_interface);
1552     case FIB_PATH_TYPE_RECEIVE:
1553         return (path->receive.fp_interface);
1554     case FIB_PATH_TYPE_RECURSIVE:
1555         return (fib_entry_get_resolving_interface(path->fp_via_fib));    
1556     case FIB_PATH_TYPE_SPECIAL:
1557     case FIB_PATH_TYPE_DEAG:
1558     case FIB_PATH_TYPE_EXCLUSIVE:
1559         break;
1560     }
1561     return (~0);
1562 }
1563
1564 adj_index_t
1565 fib_path_get_adj (fib_node_index_t path_index)
1566 {
1567     fib_path_t *path;
1568
1569     path = fib_path_get(path_index);
1570
1571     ASSERT(dpo_is_adj(&path->fp_dpo));
1572     if (dpo_is_adj(&path->fp_dpo))
1573     {
1574         return (path->fp_dpo.dpoi_index);
1575     }
1576     return (ADJ_INDEX_INVALID);
1577 }
1578
1579 int
1580 fib_path_get_weight (fib_node_index_t path_index)
1581 {
1582     fib_path_t *path;
1583
1584     path = fib_path_get(path_index);
1585
1586     ASSERT(path);
1587
1588     return (path->fp_weight);
1589 }
1590
1591 /**
1592  * @brief Contribute the path's adjacency to the list passed.
1593  * By calling this function over all paths, recursively, a child
1594  * can construct its full set of forwarding adjacencies, and hence its
1595  * uRPF list.
1596  */
1597 void
1598 fib_path_contribute_urpf (fib_node_index_t path_index,
1599                           index_t urpf)
1600 {
1601     fib_path_t *path;
1602
1603     if (!fib_path_is_resolved(path_index))
1604         return;
1605
1606     path = fib_path_get(path_index);
1607
1608     switch (path->fp_type)
1609     {
1610     case FIB_PATH_TYPE_ATTACHED_NEXT_HOP:
1611         fib_urpf_list_append(urpf, path->attached_next_hop.fp_interface);
1612         break;
1613
1614     case FIB_PATH_TYPE_ATTACHED:
1615         fib_urpf_list_append(urpf, path->attached.fp_interface);
1616         break;
1617
1618     case FIB_PATH_TYPE_RECURSIVE:
1619         fib_entry_contribute_urpf(path->fp_via_fib, urpf);
1620         break;
1621
1622     case FIB_PATH_TYPE_EXCLUSIVE:
1623     case FIB_PATH_TYPE_SPECIAL:
1624         /*
1625          * these path types may link to an adj, if that's what
1626          * the clinet gave
1627          */
1628         if (dpo_is_adj(&path->fp_dpo))
1629         {
1630             ip_adjacency_t *adj;
1631
1632             adj = adj_get(path->fp_dpo.dpoi_index);
1633
1634             fib_urpf_list_append(urpf, adj->rewrite_header.sw_if_index);
1635         }
1636         break;
1637
1638     case FIB_PATH_TYPE_DEAG:
1639     case FIB_PATH_TYPE_RECEIVE:
1640         /*
1641          * these path types don't link to an adj
1642          */
1643         break;
1644     }
1645 }
1646
1647 void
1648 fib_path_contribute_forwarding (fib_node_index_t path_index,
1649                                 fib_forward_chain_type_t fct,
1650                                 dpo_id_t *dpo)
1651 {
1652     fib_path_t *path;
1653
1654     path = fib_path_get(path_index);
1655
1656     ASSERT(path);
1657     ASSERT(FIB_FORW_CHAIN_TYPE_MPLS_EOS != fct);
1658
1659     FIB_PATH_DBG(path, "contribute");
1660
1661     /*
1662      * The DPO stored in the path was created when the path was resolved.
1663      * This then represents the path's 'native' protocol; IP.
1664      * For all others will need to go find something else.
1665      */
1666     if (fib_path_proto_to_chain_type(path->fp_nh_proto) == fct)
1667     {
1668         dpo_copy(dpo, &path->fp_dpo);
1669     }
1670     else
1671     {
1672         switch (path->fp_type)
1673         {
1674         case FIB_PATH_TYPE_ATTACHED_NEXT_HOP:
1675             switch (fct)
1676             {
1677             case FIB_FORW_CHAIN_TYPE_UNICAST_IP4:
1678             case FIB_FORW_CHAIN_TYPE_UNICAST_IP6:
1679             case FIB_FORW_CHAIN_TYPE_MPLS_EOS:
1680             case FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS:
1681             case FIB_FORW_CHAIN_TYPE_ETHERNET:
1682             {
1683                 adj_index_t ai;
1684
1685                 /*
1686                  * get a MPLS link type adj.
1687                  */
1688                 ai = fib_path_attached_next_hop_get_adj(
1689                          path,
1690                          fib_forw_chain_type_to_link_type(fct));
1691                 dpo_set(dpo, DPO_ADJACENCY,
1692                         fib_forw_chain_type_to_dpo_proto(fct), ai);
1693                 adj_unlock(ai);
1694
1695                 break;
1696             }
1697             }
1698             break;
1699         case FIB_PATH_TYPE_RECURSIVE:
1700             switch (fct)
1701             {
1702             case FIB_FORW_CHAIN_TYPE_MPLS_EOS:
1703             case FIB_FORW_CHAIN_TYPE_UNICAST_IP4:
1704             case FIB_FORW_CHAIN_TYPE_UNICAST_IP6:
1705                 /*
1706                  * Assume that EOS and IP forwarding is the same.
1707                  * revisit for ieBGP
1708                  */
1709                 dpo_copy(dpo, &path->fp_dpo);
1710                 break;
1711             case FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS:
1712                 fib_path_recursive_adj_update(path, fct, dpo);
1713                 break;
1714             case FIB_FORW_CHAIN_TYPE_ETHERNET:
1715                 ASSERT(0);
1716                 break;
1717             }
1718             break;
1719         case FIB_PATH_TYPE_DEAG:
1720             switch (fct)
1721             {
1722             case FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS:
1723                 lookup_dpo_add_or_lock_w_table_id(MPLS_FIB_DEFAULT_TABLE_ID,
1724                                                   DPO_PROTO_MPLS,
1725                                                   LOOKUP_INPUT_DST_ADDR,
1726                                                   LOOKUP_TABLE_FROM_CONFIG,
1727                                                   dpo);
1728                 break;
1729             case FIB_FORW_CHAIN_TYPE_UNICAST_IP4:
1730             case FIB_FORW_CHAIN_TYPE_UNICAST_IP6:
1731             case FIB_FORW_CHAIN_TYPE_MPLS_EOS:
1732                 dpo_copy(dpo, &path->fp_dpo);
1733                 break;          
1734             case FIB_FORW_CHAIN_TYPE_ETHERNET:
1735                 ASSERT(0);
1736                 break;
1737             }
1738             break;
1739         case FIB_PATH_TYPE_EXCLUSIVE:
1740             dpo_copy(dpo, &path->exclusive.fp_ex_dpo);
1741             break;
1742         case FIB_PATH_TYPE_ATTACHED:
1743         case FIB_PATH_TYPE_RECEIVE:
1744         case FIB_PATH_TYPE_SPECIAL:
1745             ASSERT(0);
1746             break;
1747         }
1748
1749     }
1750 }
1751
1752 load_balance_path_t *
1753 fib_path_append_nh_for_multipath_hash (fib_node_index_t path_index,
1754                                        fib_forward_chain_type_t fct,
1755                                        load_balance_path_t *hash_key)
1756 {
1757     load_balance_path_t *mnh;
1758     fib_path_t *path;
1759
1760     path = fib_path_get(path_index);
1761
1762     ASSERT(path);
1763
1764     if (fib_path_is_resolved(path_index))
1765     {
1766         vec_add2(hash_key, mnh, 1);
1767
1768         mnh->path_weight = path->fp_weight;
1769         mnh->path_index = path_index;
1770         fib_path_contribute_forwarding(path_index, fct, &mnh->path_dpo);
1771     }
1772
1773     return (hash_key);
1774 }
1775
1776 int
1777 fib_path_is_recursive (fib_node_index_t path_index)
1778 {
1779     fib_path_t *path;
1780
1781     path = fib_path_get(path_index);
1782
1783     return (FIB_PATH_TYPE_RECURSIVE == path->fp_type);
1784 }
1785
1786 int
1787 fib_path_is_exclusive (fib_node_index_t path_index)
1788 {
1789     fib_path_t *path;
1790
1791     path = fib_path_get(path_index);
1792
1793     return (FIB_PATH_TYPE_EXCLUSIVE == path->fp_type);
1794 }
1795
1796 int
1797 fib_path_is_deag (fib_node_index_t path_index)
1798 {
1799     fib_path_t *path;
1800
1801     path = fib_path_get(path_index);
1802
1803     return (FIB_PATH_TYPE_DEAG == path->fp_type);
1804 }
1805
1806 int
1807 fib_path_is_resolved (fib_node_index_t path_index)
1808 {
1809     fib_path_t *path;
1810
1811     path = fib_path_get(path_index);
1812
1813     return (dpo_id_is_valid(&path->fp_dpo) &&
1814             (path->fp_oper_flags & FIB_PATH_OPER_FLAG_RESOLVED) &&
1815             !fib_path_is_looped(path_index) &&
1816             !fib_path_is_permanent_drop(path));
1817 }
1818
1819 int
1820 fib_path_is_looped (fib_node_index_t path_index)
1821 {
1822     fib_path_t *path;
1823
1824     path = fib_path_get(path_index);
1825
1826     return (path->fp_oper_flags & FIB_PATH_OPER_FLAG_RECURSIVE_LOOP);
1827 }
1828
1829 int
1830 fib_path_encode (fib_node_index_t path_list_index,
1831                  fib_node_index_t path_index,
1832                  void *ctx)
1833 {
1834     fib_route_path_encode_t **api_rpaths = ctx;
1835     fib_route_path_encode_t *api_rpath;
1836     fib_path_t *path;
1837
1838     path = fib_path_get(path_index);
1839     if (!path)
1840       return (0);
1841     vec_add2(*api_rpaths, api_rpath, 1);
1842     api_rpath->rpath.frp_weight = path->fp_weight;
1843     api_rpath->rpath.frp_proto = path->fp_nh_proto;
1844     api_rpath->rpath.frp_sw_if_index = ~0;
1845     api_rpath->dpo = path->exclusive.fp_ex_dpo;
1846     switch (path->fp_type)
1847       {
1848       case FIB_PATH_TYPE_RECEIVE:
1849         api_rpath->rpath.frp_addr = path->receive.fp_addr;
1850         api_rpath->rpath.frp_sw_if_index = path->receive.fp_interface;
1851         break;
1852       case FIB_PATH_TYPE_ATTACHED:
1853         api_rpath->rpath.frp_sw_if_index = path->attached.fp_interface;
1854         break;
1855       case FIB_PATH_TYPE_ATTACHED_NEXT_HOP:
1856         api_rpath->rpath.frp_sw_if_index = path->attached_next_hop.fp_interface;
1857         api_rpath->rpath.frp_addr = path->attached_next_hop.fp_nh;
1858         break;
1859       case FIB_PATH_TYPE_SPECIAL:
1860         break;
1861       case FIB_PATH_TYPE_DEAG:
1862         break;
1863       case FIB_PATH_TYPE_RECURSIVE:
1864         api_rpath->rpath.frp_addr = path->recursive.fp_nh;
1865         break;
1866       default:
1867         break;
1868       }
1869     return (1);
1870 }
1871
1872 void
1873 fib_path_module_init (void)
1874 {
1875     fib_node_register_type (FIB_NODE_TYPE_PATH, &fib_path_vft);
1876 }
1877
1878 static clib_error_t *
1879 show_fib_path_command (vlib_main_t * vm,
1880                         unformat_input_t * input,
1881                         vlib_cli_command_t * cmd)
1882 {
1883     fib_node_index_t pi;
1884     fib_path_t *path;
1885
1886     if (unformat (input, "%d", &pi))
1887     {
1888         /*
1889          * show one in detail
1890          */
1891         if (!pool_is_free_index(fib_path_pool, pi))
1892         {
1893             path = fib_path_get(pi);
1894             u8 *s = fib_path_format(pi, NULL);
1895             s = format(s, "children:");
1896             s = fib_node_children_format(path->fp_node.fn_children, s);
1897             vlib_cli_output (vm, "%s", s);
1898             vec_free(s);
1899         }
1900         else
1901         {
1902             vlib_cli_output (vm, "path %d invalid", pi);
1903         }
1904     }
1905     else
1906     {
1907         vlib_cli_output (vm, "FIB Paths");
1908         pool_foreach(path, fib_path_pool,
1909         ({
1910             vlib_cli_output (vm, "%U", format_fib_path, path);
1911         }));
1912     }
1913
1914     return (NULL);
1915 }
1916
1917 VLIB_CLI_COMMAND (show_fib_path, static) = {
1918   .path = "show fib paths",
1919   .function = show_fib_path_command,
1920   .short_help = "show fib paths",
1921 };