988f689b10263e7436bb509d12db2f655e95588b
[vpp.git] / vnet / vnet / fib / fib_path.c
1 /*
2  * Copyright (c) 2016 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15
16 #include <vlib/vlib.h>
17 #include <vnet/vnet.h>
18 #include <vnet/ip/format.h>
19 #include <vnet/ip/ip.h>
20 #include <vnet/dpo/drop_dpo.h>
21 #include <vnet/dpo/receive_dpo.h>
22 #include <vnet/dpo/load_balance_map.h>
23 #include <vnet/dpo/lookup_dpo.h>
24
25 #include <vnet/adj/adj.h>
26
27 #include <vnet/fib/fib_path.h>
28 #include <vnet/fib/fib_node.h>
29 #include <vnet/fib/fib_table.h>
30 #include <vnet/fib/fib_entry.h>
31 #include <vnet/fib/fib_path_list.h>
32 #include <vnet/fib/fib_internal.h>
33 #include <vnet/fib/fib_urpf_list.h>
34
35 /**
36  * Enurmeration of path types
37  */
38 typedef enum fib_path_type_t_ {
39     /**
40      * Marker. Add new types after this one.
41      */
42     FIB_PATH_TYPE_FIRST = 0,
43     /**
44      * Attached-nexthop. An interface and a nexthop are known.
45      */
46     FIB_PATH_TYPE_ATTACHED_NEXT_HOP = FIB_PATH_TYPE_FIRST,
47     /**
48      * attached. Only the interface is known.
49      */
50     FIB_PATH_TYPE_ATTACHED,
51     /**
52      * recursive. Only the next-hop is known.
53      */
54     FIB_PATH_TYPE_RECURSIVE,
55     /**
56      * special. nothing is known. so we drop.
57      */
58     FIB_PATH_TYPE_SPECIAL,
59     /**
60      * exclusive. user provided adj.
61      */
62     FIB_PATH_TYPE_EXCLUSIVE,
63     /**
64      * deag. Link to a lookup adj in the next table
65      */
66     FIB_PATH_TYPE_DEAG,
67     /**
68      * receive. it's for-us.
69      */
70     FIB_PATH_TYPE_RECEIVE,
71     /**
72      * Marker. Add new types before this one, then update it.
73      */
74     FIB_PATH_TYPE_LAST = FIB_PATH_TYPE_RECEIVE,
75 } __attribute__ ((packed)) fib_path_type_t;
76
77 /**
78  * The maximum number of path_types
79  */
80 #define FIB_PATH_TYPE_MAX (FIB_PATH_TYPE_LAST + 1)
81
82 #define FIB_PATH_TYPES {                                        \
83     [FIB_PATH_TYPE_ATTACHED_NEXT_HOP] = "attached-nexthop",     \
84     [FIB_PATH_TYPE_ATTACHED]          = "attached",             \
85     [FIB_PATH_TYPE_RECURSIVE]         = "recursive",            \
86     [FIB_PATH_TYPE_SPECIAL]           = "special",              \
87     [FIB_PATH_TYPE_EXCLUSIVE]         = "exclusive",            \
88     [FIB_PATH_TYPE_DEAG]              = "deag",                 \
89     [FIB_PATH_TYPE_RECEIVE]           = "receive",              \
90 }
91
92 #define FOR_EACH_FIB_PATH_TYPE(_item) \
93     for (_item = FIB_PATH_TYPE_FIRST; _item <= FIB_PATH_TYPE_LAST; _item++)
94
95 /**
96  * Enurmeration of path operational (i.e. derived) attributes
97  */
98 typedef enum fib_path_oper_attribute_t_ {
99     /**
100      * Marker. Add new types after this one.
101      */
102     FIB_PATH_OPER_ATTRIBUTE_FIRST = 0,
103     /**
104      * The path forms part of a recursive loop.
105      */
106     FIB_PATH_OPER_ATTRIBUTE_RECURSIVE_LOOP = FIB_PATH_OPER_ATTRIBUTE_FIRST,
107     /**
108      * The path is resolved
109      */
110     FIB_PATH_OPER_ATTRIBUTE_RESOLVED,
111     /**
112      * The path has become a permanent drop.
113      */
114     FIB_PATH_OPER_ATTRIBUTE_DROP,
115     /**
116      * Marker. Add new types before this one, then update it.
117      */
118     FIB_PATH_OPER_ATTRIBUTE_LAST = FIB_PATH_OPER_ATTRIBUTE_DROP,
119 } __attribute__ ((packed)) fib_path_oper_attribute_t;
120
121 /**
122  * The maximum number of path operational attributes
123  */
124 #define FIB_PATH_OPER_ATTRIBUTE_MAX (FIB_PATH_OPER_ATTRIBUTE_LAST + 1)
125
126 #define FIB_PATH_OPER_ATTRIBUTES {                                      \
127     [FIB_PATH_OPER_ATTRIBUTE_RECURSIVE_LOOP] = "recursive-loop",        \
128     [FIB_PATH_OPER_ATTRIBUTE_RESOLVED]       = "resolved",              \
129     [FIB_PATH_OPER_ATTRIBUTE_DROP]           = "drop",                  \
130 }
131
132 #define FOR_EACH_FIB_PATH_OPER_ATTRIBUTE(_item) \
133     for (_item = FIB_PATH_OPER_ATTRIBUTE_FIRST; \
134          _item <= FIB_PATH_OPER_ATTRIBUTE_LAST; \
135          _item++)
136
137 /**
138  * Path flags from the attributes
139  */
140 typedef enum fib_path_oper_flags_t_ {
141     FIB_PATH_OPER_FLAG_NONE = 0,
142     FIB_PATH_OPER_FLAG_RECURSIVE_LOOP = (1 << FIB_PATH_OPER_ATTRIBUTE_RECURSIVE_LOOP),
143     FIB_PATH_OPER_FLAG_DROP = (1 << FIB_PATH_OPER_ATTRIBUTE_DROP),
144     FIB_PATH_OPER_FLAG_RESOLVED = (1 << FIB_PATH_OPER_ATTRIBUTE_RESOLVED),
145 } __attribute__ ((packed)) fib_path_oper_flags_t;
146
147 /**
148  * A FIB path
149  */
150 typedef struct fib_path_t_ {
151     /**
152      * A path is a node in the FIB graph.
153      */
154     fib_node_t fp_node;
155
156     /**
157      * The index of the path-list to which this path belongs
158      */
159     u32 fp_pl_index;
160
161     /**
162      * This marks the start of the memory area used to hash
163      * the path
164      */
165     STRUCT_MARK(path_hash_start);
166
167     /**
168      * Configuration Flags
169      */
170     fib_path_cfg_flags_t fp_cfg_flags;
171
172     /**
173      * The type of the path. This is the selector for the union
174      */
175     fib_path_type_t fp_type;
176
177     /**
178      * The protocol of the next-hop, i.e. the address family of the
179      * next-hop's address. We can't derive this from the address itself
180      * since the address can be all zeros
181      */
182     fib_protocol_t fp_nh_proto;
183
184     /**
185      * UCMP [unnormalised] weigt
186      */
187     u32 fp_weight;
188
189     /**
190      * per-type union of the data required to resolve the path
191      */
192     union {
193         struct {
194             /**
195              * The next-hop
196              */
197             ip46_address_t fp_nh;
198             /**
199              * The interface
200              */
201             u32 fp_interface;
202         } attached_next_hop;
203         struct {
204             /**
205              * The interface
206              */
207             u32 fp_interface;
208         } attached;
209         struct {
210             /**
211              * The next-hop
212              */
213             ip46_address_t fp_nh;
214             /**
215              * The FIB table index in which to find the next-hop.
216              * This needs to be fixed. We should lookup the adjacencies in
217              * a separate table of adjacencies, rather than from the FIB.
218              * Two reasons I can think of:
219              *   - consider:
220              *       int ip addr Gig0 10.0.0.1/24
221              *       ip route 10.0.0.2/32 via Gig1 192.168.1.2
222              *       ip route 1.1.1.1/32 via Gig0 10.0.0.2
223              *     this is perfectly valid.
224              *     Packets addressed to 10.0.0.2 should be sent via Gig1.
225              *     Packets address to 1.1.1.1 should be sent via Gig0.
226              *    when we perform the adj resolution from the FIB for the path
227              *    "via Gig0 10.0.0.2" the lookup will result in the route via Gig1
228              *    and so we will pick up the adj via Gig1 - which was not what the
229              *    operator wanted.
230              *  - we can only return link-type IPv4 and so not the link-type MPLS.
231              *    more on this in a later commit.
232              *
233              * The table ID should only belong to a recursive path and indicate
234              * which FIB should be used to resolve the next-hop.
235              */
236             fib_node_index_t fp_tbl_id;
237         } recursive;
238         struct {
239             /**
240              * The FIN index in which to perfom the next lookup
241              */
242             fib_node_index_t fp_tbl_id;
243         } deag;
244         struct {
245         } special;
246         struct {
247             /**
248              * The user provided 'exclusive' DPO
249              */
250             dpo_id_t fp_ex_dpo;
251         } exclusive;
252         struct {
253             /**
254              * The interface on which the local address is configured
255              */
256             u32 fp_interface;
257             /**
258              * The next-hop
259              */
260             ip46_address_t fp_addr;
261         } receive;
262     };
263     STRUCT_MARK(path_hash_end);
264
265     /**
266      * Memebers in this last section represent information that is
267      * dervied during resolution. It should not be copied to new paths
268      * nor compared.
269      */
270
271     /**
272      * Operational Flags
273      */
274     fib_path_oper_flags_t fp_oper_flags;
275
276     /**
277      * the resolving via fib. not part of the union, since it it not part
278      * of the path's hash.
279      */
280     fib_node_index_t fp_via_fib;
281
282     /**
283      * The Data-path objects through which this path resolves for IP.
284      */
285     dpo_id_t fp_dpo;
286
287     /**
288      * the index of this path in the parent's child list.
289      */
290     u32 fp_sibling;
291 } fib_path_t;
292
293 /*
294  * Array of strings/names for the path types and attributes
295  */
296 static const char *fib_path_type_names[] = FIB_PATH_TYPES;
297 static const char *fib_path_oper_attribute_names[] = FIB_PATH_OPER_ATTRIBUTES;
298 static const char *fib_path_cfg_attribute_names[]  = FIB_PATH_CFG_ATTRIBUTES;
299
300 /*
301  * The memory pool from which we allocate all the paths
302  */
303 static fib_path_t *fib_path_pool;
304
305 /*
306  * Debug macro
307  */
308 #ifdef FIB_DEBUG
309 #define FIB_PATH_DBG(_p, _fmt, _args...)                        \
310 {                                                               \
311     u8 *_tmp = NULL;                                            \
312     _tmp = fib_path_format(fib_path_get_index(_p), _tmp);       \
313     clib_warning("path:[%d:%s]:" _fmt,                          \
314                  fib_path_get_index(_p), _tmp,                  \
315                  ##_args);                                      \
316     vec_free(_tmp);                                             \
317 }
318 #else
319 #define FIB_PATH_DBG(_p, _fmt, _args...)
320 #endif
321
322 static fib_path_t *
323 fib_path_get (fib_node_index_t index)
324 {
325     return (pool_elt_at_index(fib_path_pool, index));
326 }
327
328 static fib_node_index_t 
329 fib_path_get_index (fib_path_t *path)
330 {
331     return (path - fib_path_pool);
332 }
333
334 static fib_node_t *
335 fib_path_get_node (fib_node_index_t index)
336 {
337     return ((fib_node_t*)fib_path_get(index));
338 }
339
340 static fib_path_t*
341 fib_path_from_fib_node (fib_node_t *node)
342 {
343 #if CLIB_DEBUG > 0
344     ASSERT(FIB_NODE_TYPE_PATH == node->fn_type);
345 #endif
346     return ((fib_path_t*)node);
347 }
348
349 u8 *
350 format_fib_path (u8 * s, va_list * args)
351 {
352     fib_path_t *path = va_arg (*args, fib_path_t *);
353     vnet_main_t * vnm = vnet_get_main();
354     fib_path_oper_attribute_t oattr;
355     fib_path_cfg_attribute_t cattr;
356
357     s = format (s, "      index:%d ", fib_path_get_index(path));
358     s = format (s, "pl-index:%d ", path->fp_pl_index);
359     s = format (s, "%U ", format_fib_protocol, path->fp_nh_proto);
360     s = format (s, "weight=%d ", path->fp_weight);
361     s = format (s, "%s: ", fib_path_type_names[path->fp_type]);
362     if (FIB_PATH_OPER_FLAG_NONE != path->fp_oper_flags) {
363         s = format(s, " oper-flags:");
364         FOR_EACH_FIB_PATH_OPER_ATTRIBUTE(oattr) {
365             if ((1<<oattr) & path->fp_oper_flags) {
366                 s = format (s, "%s,", fib_path_oper_attribute_names[oattr]);
367             }
368         }
369     }
370     if (FIB_PATH_CFG_FLAG_NONE != path->fp_cfg_flags) {
371         s = format(s, " cfg-flags:");
372         FOR_EACH_FIB_PATH_CFG_ATTRIBUTE(cattr) {
373             if ((1<<cattr) & path->fp_cfg_flags) {
374                 s = format (s, "%s,", fib_path_cfg_attribute_names[cattr]);
375             }
376         }
377     }
378     s = format(s, "\n       ");
379
380     switch (path->fp_type)
381     {
382     case FIB_PATH_TYPE_ATTACHED_NEXT_HOP:
383         s = format (s, "%U", format_ip46_address,
384                     &path->attached_next_hop.fp_nh,
385                     IP46_TYPE_ANY);
386         if (path->fp_oper_flags & FIB_PATH_OPER_FLAG_DROP)
387         {
388             s = format (s, " if_index:%d", path->attached_next_hop.fp_interface);
389         }
390         else
391         {
392             s = format (s, " %U",
393                         format_vnet_sw_interface_name,
394                         vnm,
395                         vnet_get_sw_interface(
396                             vnm,
397                             path->attached_next_hop.fp_interface));
398             if (vnet_sw_interface_is_p2p(vnet_get_main(),
399                                          path->attached_next_hop.fp_interface))
400             {
401                 s = format (s, " (p2p)");
402             }
403         }
404         if (!dpo_id_is_valid(&path->fp_dpo))
405         {
406             s = format(s, "\n          unresolved");
407         }
408         else
409         {
410             s = format(s, "\n          %U",
411                        format_dpo_id,
412                        &path->fp_dpo, 13);
413         }
414         break;
415     case FIB_PATH_TYPE_ATTACHED:
416         if (path->fp_oper_flags & FIB_PATH_OPER_FLAG_DROP)
417         {
418             s = format (s, " if_index:%d", path->attached_next_hop.fp_interface);
419         }
420         else
421         {
422             s = format (s, " %U",
423                         format_vnet_sw_interface_name,
424                         vnm,
425                         vnet_get_sw_interface(
426                             vnm,
427                             path->attached.fp_interface));
428         }
429         break;
430     case FIB_PATH_TYPE_RECURSIVE:
431         s = format (s, "via %U",
432                     format_ip46_address,
433                     &path->recursive.fp_nh,
434                     IP46_TYPE_ANY);
435         s = format (s, " in fib:%d", path->recursive.fp_tbl_id, path->fp_via_fib); 
436         s = format (s, " via-fib:%d", path->fp_via_fib); 
437         s = format (s, " via-dpo:[%U:%d]",
438                     format_dpo_type, path->fp_dpo.dpoi_type, 
439                     path->fp_dpo.dpoi_index);
440
441         break;
442     case FIB_PATH_TYPE_RECEIVE:
443     case FIB_PATH_TYPE_SPECIAL:
444     case FIB_PATH_TYPE_DEAG:
445     case FIB_PATH_TYPE_EXCLUSIVE:
446         if (dpo_id_is_valid(&path->fp_dpo))
447         {
448             s = format(s, "%U", format_dpo_id,
449                        &path->fp_dpo, 2);
450         }
451         break;
452     }
453     return (s);
454 }
455
456 u8 *
457 fib_path_format (fib_node_index_t pi, u8 *s)
458 {
459     fib_path_t *path;
460
461     path = fib_path_get(pi);
462     ASSERT(NULL != path);
463
464     return (format (s, "%U", format_fib_path, path));
465 }
466
467 u8 *
468 fib_path_adj_format (fib_node_index_t pi,
469                      u32 indent,
470                      u8 *s)
471 {
472     fib_path_t *path;
473
474     path = fib_path_get(pi);
475     ASSERT(NULL != path);
476
477     if (!dpo_id_is_valid(&path->fp_dpo))
478     {
479         s = format(s, " unresolved");
480     }
481     else
482     {
483         s = format(s, "%U", format_dpo_id,
484                    &path->fp_dpo, 2);
485     }
486
487     return (s);
488 }
489
490 /*
491  * fib_path_last_lock_gone
492  *
493  * We don't share paths, we share path lists, so the [un]lock functions
494  * are no-ops
495  */
496 static void
497 fib_path_last_lock_gone (fib_node_t *node)
498 {
499     ASSERT(0);
500 }
501
502 static const adj_index_t
503 fib_path_attached_next_hop_get_adj (fib_path_t *path,
504                                     vnet_link_t link)
505 {
506     if (vnet_sw_interface_is_p2p(vnet_get_main(),
507                                  path->attached_next_hop.fp_interface))
508     {
509         /*
510          * if the interface is p2p then the adj for the specific
511          * neighbour on that link will never exist. on p2p links
512          * the subnet address (the attached route) links to the
513          * auto-adj (see below), we want that adj here too.
514          */
515         return (adj_nbr_add_or_lock(path->fp_nh_proto,
516                                     link,
517                                     &zero_addr,
518                                     path->attached_next_hop.fp_interface));
519     }
520     else
521     {
522         return (adj_nbr_add_or_lock(path->fp_nh_proto,
523                                     link,
524                                     &path->attached_next_hop.fp_nh,
525                                     path->attached_next_hop.fp_interface));
526     }
527 }
528
529 static void
530 fib_path_attached_next_hop_set (fib_path_t *path)
531 {
532     /*
533      * resolve directly via the adjacnecy discribed by the
534      * interface and next-hop
535      */
536     if (!vnet_sw_interface_is_admin_up(vnet_get_main(),
537                                       path->attached_next_hop.fp_interface))
538     {
539         path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RESOLVED;
540     }
541
542     dpo_set(&path->fp_dpo,
543             DPO_ADJACENCY,
544             fib_proto_to_dpo(path->fp_nh_proto),
545             fib_path_attached_next_hop_get_adj(
546                  path,
547                  fib_proto_to_link(path->fp_nh_proto)));
548
549     /*
550      * become a child of the adjacency so we receive updates
551      * when its rewrite changes
552      */
553     path->fp_sibling = adj_child_add(path->fp_dpo.dpoi_index,
554                                      FIB_NODE_TYPE_PATH,
555                                      fib_path_get_index(path));
556 }
557
558 /*
559  * create of update the paths recursive adj
560  */
561 static void
562 fib_path_recursive_adj_update (fib_path_t *path,
563                                fib_forward_chain_type_t fct,
564                                dpo_id_t *dpo)
565 {
566     dpo_id_t via_dpo = DPO_INVALID;
567
568     /*
569      * get the DPO to resolve through from the via-entry
570      */
571     fib_entry_contribute_forwarding(path->fp_via_fib,
572                                     fct,
573                                     &via_dpo);
574
575
576     /*
577      * hope for the best - clear if restrictions apply.
578      */
579     path->fp_oper_flags |= FIB_PATH_OPER_FLAG_RESOLVED;
580
581     /*
582      * Validate any recursion constraints and over-ride the via
583      * adj if not met
584      */
585     if (path->fp_oper_flags & FIB_PATH_OPER_FLAG_RECURSIVE_LOOP)
586     {
587         path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RESOLVED;
588         dpo_copy(&via_dpo, drop_dpo_get(fib_proto_to_dpo(path->fp_nh_proto)));
589     }
590     else if (path->fp_cfg_flags & FIB_PATH_CFG_FLAG_RESOLVE_HOST)
591     {
592         /*
593          * the via FIB must be a host route.
594          * note the via FIB just added will always be a host route
595          * since it is an RR source added host route. So what we need to
596          * check is whether the route has other sources. If it does then
597          * some other source has added it as a host route. If it doesn't
598          * then it was added only here and inherits forwarding from a cover.
599          * the cover is not a host route.
600          * The RR source is the lowest priority source, so we check if it
601          * is the best. if it is there are no other sources.
602          */
603         if (fib_entry_get_best_source(path->fp_via_fib) >= FIB_SOURCE_RR)
604         {
605             path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RESOLVED;
606             dpo_copy(&via_dpo, drop_dpo_get(fib_proto_to_dpo(path->fp_nh_proto)));
607
608             /*
609              * PIC edge trigger. let the load-balance maps know
610              */
611             load_balance_map_path_state_change(fib_path_get_index(path));
612         }
613     }
614     else if (path->fp_cfg_flags & FIB_PATH_CFG_FLAG_RESOLVE_ATTACHED)
615     {
616         /*
617          * RR source entries inherit the flags from the cover, so
618          * we can check the via directly
619          */
620         if (!(FIB_ENTRY_FLAG_ATTACHED & fib_entry_get_flags(path->fp_via_fib)))
621         {
622             path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RESOLVED;
623             dpo_copy(&via_dpo, drop_dpo_get(fib_proto_to_dpo(path->fp_nh_proto)));
624
625             /*
626              * PIC edge trigger. let the load-balance maps know
627              */
628             load_balance_map_path_state_change(fib_path_get_index(path));
629         }
630     }
631
632     /*
633      * update the path's contributed DPO
634      */
635     dpo_copy(dpo, &via_dpo);
636
637     FIB_PATH_DBG(path, "recursive update: %U",
638                  fib_get_lookup_main(path->fp_nh_proto),
639                  &path->fp_dpo, 2);
640
641     dpo_reset(&via_dpo);
642 }
643
644 /*
645  * fib_path_is_permanent_drop
646  *
647  * Return !0 if the path is configured to permanently drop,
648  * despite other attributes.
649  */
650 static int
651 fib_path_is_permanent_drop (fib_path_t *path)
652 {
653     return ((path->fp_cfg_flags & FIB_PATH_CFG_FLAG_DROP) ||
654             (path->fp_oper_flags & FIB_PATH_OPER_FLAG_DROP));
655 }
656
657 /*
658  * fib_path_unresolve
659  *
660  * Remove our dependency on the resolution target
661  */
662 static void
663 fib_path_unresolve (fib_path_t *path)
664 {
665     /*
666      * the forced drop path does not need unresolving
667      */
668     if (fib_path_is_permanent_drop(path))
669     {
670         return;
671     }
672
673     switch (path->fp_type)
674     {
675     case FIB_PATH_TYPE_RECURSIVE:
676         if (FIB_NODE_INDEX_INVALID != path->fp_via_fib)
677         {
678             fib_prefix_t pfx;
679
680             fib_prefix_from_ip46_addr(&path->recursive.fp_nh, &pfx);
681             fib_entry_child_remove(path->fp_via_fib,
682                                    path->fp_sibling);
683             fib_table_entry_special_remove(path->recursive.fp_tbl_id,
684                                            &pfx,
685                                            FIB_SOURCE_RR);
686             path->fp_via_fib = FIB_NODE_INDEX_INVALID;
687         }
688         break;
689     case FIB_PATH_TYPE_ATTACHED_NEXT_HOP:
690     case FIB_PATH_TYPE_ATTACHED:
691         adj_child_remove(path->fp_dpo.dpoi_index,
692                          path->fp_sibling);
693         adj_unlock(path->fp_dpo.dpoi_index);
694         break;
695     case FIB_PATH_TYPE_EXCLUSIVE:
696         dpo_reset(&path->exclusive.fp_ex_dpo);
697         break;
698     case FIB_PATH_TYPE_SPECIAL:
699     case FIB_PATH_TYPE_RECEIVE:
700     case FIB_PATH_TYPE_DEAG:
701         /*
702          * these hold only the path's DPO, which is reset below.
703          */
704         break;
705     }
706
707     /*
708      * release the adj we were holding and pick up the
709      * drop just in case.
710      */
711     dpo_reset(&path->fp_dpo);
712     path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RESOLVED;
713
714     return;
715 }
716
717 static fib_forward_chain_type_t
718 fib_path_proto_to_chain_type (fib_protocol_t proto)
719 {
720     switch (proto)
721     {
722     case FIB_PROTOCOL_IP4:
723         return (FIB_FORW_CHAIN_TYPE_UNICAST_IP4);
724     case FIB_PROTOCOL_IP6:
725         return (FIB_FORW_CHAIN_TYPE_UNICAST_IP6);
726     case FIB_PROTOCOL_MPLS:
727         return (FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS);
728     }
729     return (FIB_FORW_CHAIN_TYPE_UNICAST_IP4);
730 }
731
732 /*
733  * fib_path_back_walk_notify
734  *
735  * A back walk has reach this path.
736  */
737 static fib_node_back_walk_rc_t
738 fib_path_back_walk_notify (fib_node_t *node,
739                            fib_node_back_walk_ctx_t *ctx)
740 {
741     fib_path_t *path;
742
743     path = fib_path_from_fib_node(node);
744
745     switch (path->fp_type)
746     {
747     case FIB_PATH_TYPE_RECURSIVE:
748         if (FIB_NODE_BW_REASON_FLAG_EVALUATE & ctx->fnbw_reason)
749         {
750             /*
751              * modify the recursive adjacency to use the new forwarding
752              * of the via-fib.
753              * this update is visible to packets in flight in the DP.
754              */
755             fib_path_recursive_adj_update(
756                 path,
757                 fib_path_proto_to_chain_type(path->fp_nh_proto),
758                 &path->fp_dpo);
759         }
760         if ((FIB_NODE_BW_REASON_FLAG_ADJ_UPDATE & ctx->fnbw_reason) ||
761             (FIB_NODE_BW_REASON_FLAG_ADJ_DOWN   & ctx->fnbw_reason))
762         {
763             /*
764              * ADJ updates (complete<->incomplete) do not need to propagate to
765              * recursive entries.
766              * The only reason its needed as far back as here, is that the adj
767              * and the incomplete adj are a different DPO type, so the LBs need
768              * to re-stack.
769              * If this walk was quashed in the fib_entry, then any non-fib_path
770              * children (like tunnels that collapse out the LB when they stack)
771              * would not see the update.
772              */
773             return (FIB_NODE_BACK_WALK_CONTINUE);
774         }
775         break;
776     case FIB_PATH_TYPE_ATTACHED_NEXT_HOP:
777         /*
778 FIXME comment
779          * ADJ_UPDATE backwalk pass silently through here and up to
780          * the path-list when the multipath adj collapse occurs.
781          * The reason we do this is that the assumtption is that VPP
782          * runs in an environment where the Control-Plane is remote
783          * and hence reacts slowly to link up down. In order to remove
784          * this down link from the ECMP set quickly, we back-walk.
785          * VPP also has dedicated CPUs, so we are not stealing resources
786          * from the CP to do so.
787          */
788         if (FIB_NODE_BW_REASON_FLAG_INTERFACE_UP & ctx->fnbw_reason)
789         {
790             if (path->fp_oper_flags & FIB_PATH_OPER_FLAG_RESOLVED)
791             {
792                 /*
793                  * alreday resolved. no need to walk back again
794                  */
795                 return (FIB_NODE_BACK_WALK_CONTINUE);
796             }
797             path->fp_oper_flags |= FIB_PATH_OPER_FLAG_RESOLVED;
798         }
799         if (FIB_NODE_BW_REASON_FLAG_INTERFACE_DOWN & ctx->fnbw_reason)
800         {
801             if (!(path->fp_oper_flags & FIB_PATH_OPER_FLAG_RESOLVED))
802             {
803                 /*
804                  * alreday unresolved. no need to walk back again
805                  */
806                 return (FIB_NODE_BACK_WALK_CONTINUE);
807             }
808             path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RESOLVED;
809         }
810         if (FIB_NODE_BW_REASON_FLAG_INTERFACE_DELETE & ctx->fnbw_reason)
811         {
812             /*
813              * The interface this path resolves through has been deleted.
814              * This will leave the path in a permanent drop state. The route
815              * needs to be removed and readded (and hence the path-list deleted)
816              * before it can forward again.
817              */
818             fib_path_unresolve(path);
819             path->fp_oper_flags |= FIB_PATH_OPER_FLAG_DROP;
820         }
821         if (FIB_NODE_BW_REASON_FLAG_ADJ_UPDATE & ctx->fnbw_reason)
822         {
823             /*
824              * restack the DPO to pick up the correct DPO sub-type
825              */
826             uword if_is_up;
827             adj_index_t ai;
828
829             if_is_up = vnet_sw_interface_is_admin_up(
830                            vnet_get_main(),
831                            path->attached_next_hop.fp_interface);
832
833             if (if_is_up)
834             {
835                 path->fp_oper_flags |= FIB_PATH_OPER_FLAG_RESOLVED;
836             }
837
838             ai = fib_path_attached_next_hop_get_adj(
839                      path,
840                      fib_proto_to_link(path->fp_nh_proto));
841
842             dpo_set(&path->fp_dpo, DPO_ADJACENCY,
843                     fib_proto_to_dpo(path->fp_nh_proto),
844                     ai);
845             adj_unlock(ai);
846
847             if (!if_is_up)
848             {
849                 /*
850                  * If the interface is not up there is no reason to walk
851                  * back to children. if we did they would only evalute
852                  * that this path is unresolved and hence it would
853                  * not contribute the adjacency - so it would be wasted
854                  * CPU time.
855                  */
856                 return (FIB_NODE_BACK_WALK_CONTINUE);
857             }
858         }
859         if (FIB_NODE_BW_REASON_FLAG_ADJ_DOWN & ctx->fnbw_reason)
860         {
861             if (!(path->fp_oper_flags & FIB_PATH_OPER_FLAG_RESOLVED))
862             {
863                 /*
864                  * alreday unresolved. no need to walk back again
865                  */
866                 return (FIB_NODE_BACK_WALK_CONTINUE);
867             }
868             /*
869              * the adj has gone down. the path is no longer resolved.
870              */
871             path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RESOLVED;
872         }
873         break;
874     case FIB_PATH_TYPE_ATTACHED:
875         /*
876          * FIXME; this could schedule a lower priority walk, since attached
877          * routes are not usually in ECMP configurations so the backwalk to
878          * the FIB entry does not need to be high priority
879          */
880         if (FIB_NODE_BW_REASON_FLAG_INTERFACE_UP & ctx->fnbw_reason)
881         {
882             path->fp_oper_flags |= FIB_PATH_OPER_FLAG_RESOLVED;
883         }
884         if (FIB_NODE_BW_REASON_FLAG_INTERFACE_DOWN & ctx->fnbw_reason)
885         {
886             path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RESOLVED;
887         }
888         if (FIB_NODE_BW_REASON_FLAG_INTERFACE_DELETE & ctx->fnbw_reason)
889         {
890             fib_path_unresolve(path);
891             path->fp_oper_flags |= FIB_PATH_OPER_FLAG_DROP;
892         }
893         break;
894     case FIB_PATH_TYPE_DEAG:
895         /*
896          * FIXME When VRF delete is allowed this will need a poke.
897          */
898     case FIB_PATH_TYPE_SPECIAL:
899     case FIB_PATH_TYPE_RECEIVE:
900     case FIB_PATH_TYPE_EXCLUSIVE:
901         /*
902          * these path types have no parents. so to be
903          * walked from one is unexpected.
904          */
905         ASSERT(0);
906         break;
907     }
908
909     /*
910      * propagate the backwalk further to the path-list
911      */
912     fib_path_list_back_walk(path->fp_pl_index, ctx);
913
914     return (FIB_NODE_BACK_WALK_CONTINUE);
915 }
916
917 static void
918 fib_path_memory_show (void)
919 {
920     fib_show_memory_usage("Path",
921                           pool_elts(fib_path_pool),
922                           pool_len(fib_path_pool),
923                           sizeof(fib_path_t));
924 }
925
926 /*
927  * The FIB path's graph node virtual function table
928  */
929 static const fib_node_vft_t fib_path_vft = {
930     .fnv_get = fib_path_get_node,
931     .fnv_last_lock = fib_path_last_lock_gone,
932     .fnv_back_walk = fib_path_back_walk_notify,
933     .fnv_mem_show = fib_path_memory_show,
934 };
935
936 static fib_path_cfg_flags_t
937 fib_path_route_flags_to_cfg_flags (const fib_route_path_t *rpath)
938 {
939     fib_path_cfg_flags_t cfg_flags = FIB_PATH_CFG_FLAG_NONE;
940
941     if (rpath->frp_flags & FIB_ROUTE_PATH_RESOLVE_VIA_HOST)
942         cfg_flags |= FIB_PATH_CFG_FLAG_RESOLVE_HOST;
943     if (rpath->frp_flags & FIB_ROUTE_PATH_RESOLVE_VIA_ATTACHED)
944         cfg_flags |= FIB_PATH_CFG_FLAG_RESOLVE_ATTACHED;
945
946     return (cfg_flags);
947 }
948
949 /*
950  * fib_path_create
951  *
952  * Create and initialise a new path object.
953  * return the index of the path.
954  */
955 fib_node_index_t
956 fib_path_create (fib_node_index_t pl_index,
957                  fib_protocol_t nh_proto,
958                  fib_path_cfg_flags_t flags,
959                  const fib_route_path_t *rpath)
960 {
961     fib_path_t *path;
962
963     pool_get(fib_path_pool, path);
964     memset(path, 0, sizeof(*path));
965
966     fib_node_init(&path->fp_node,
967                   FIB_NODE_TYPE_PATH);
968
969     dpo_reset(&path->fp_dpo);
970     path->fp_pl_index = pl_index;
971     path->fp_nh_proto = nh_proto;
972     path->fp_via_fib = FIB_NODE_INDEX_INVALID;
973     path->fp_weight = rpath->frp_weight;
974     if (0 == path->fp_weight)
975     {
976         /*
977          * a weight of 0 is a meaningless value. We could either reject it, and thus force
978          * clients to always use 1, or we can accept it and fixup approrpiately.
979          */
980         path->fp_weight = 1;
981     }
982     path->fp_cfg_flags = flags;
983     path->fp_cfg_flags |= fib_path_route_flags_to_cfg_flags(rpath);
984
985     /*
986      * deduce the path's tpye from the parementers and save what is needed.
987      */
988     if (~0 != rpath->frp_sw_if_index)
989     {
990         if (flags & FIB_PATH_CFG_FLAG_LOCAL)
991         {
992             path->fp_type = FIB_PATH_TYPE_RECEIVE;
993             path->receive.fp_interface = rpath->frp_sw_if_index;
994             path->receive.fp_addr = rpath->frp_addr;
995         }
996         else
997         {
998             if (ip46_address_is_zero(&rpath->frp_addr))
999             {
1000                 path->fp_type = FIB_PATH_TYPE_ATTACHED;
1001                 path->attached.fp_interface = rpath->frp_sw_if_index;
1002             }
1003             else
1004             {
1005                 path->fp_type = FIB_PATH_TYPE_ATTACHED_NEXT_HOP;
1006                 path->attached_next_hop.fp_interface = rpath->frp_sw_if_index;
1007                 path->attached_next_hop.fp_nh = rpath->frp_addr;
1008             }
1009         }
1010     }
1011     else
1012     {
1013         if (ip46_address_is_zero(&rpath->frp_addr))
1014         {
1015             if (~0 == rpath->frp_fib_index)
1016             {
1017                 path->fp_type = FIB_PATH_TYPE_SPECIAL;
1018             }
1019             else
1020             {
1021                 path->fp_type = FIB_PATH_TYPE_DEAG;
1022                 path->deag.fp_tbl_id = rpath->frp_fib_index;
1023             }           
1024         }
1025         else
1026         {
1027             path->fp_type = FIB_PATH_TYPE_RECURSIVE;
1028             path->recursive.fp_nh = rpath->frp_addr;
1029             path->recursive.fp_tbl_id = rpath->frp_fib_index;
1030         }
1031     }
1032
1033     FIB_PATH_DBG(path, "create");
1034
1035     return (fib_path_get_index(path));
1036 }
1037
1038 /*
1039  * fib_path_create_special
1040  *
1041  * Create and initialise a new path object.
1042  * return the index of the path.
1043  */
1044 fib_node_index_t
1045 fib_path_create_special (fib_node_index_t pl_index,
1046                          fib_protocol_t nh_proto,
1047                          fib_path_cfg_flags_t flags,
1048                          const dpo_id_t *dpo)
1049 {
1050     fib_path_t *path;
1051
1052     pool_get(fib_path_pool, path);
1053     memset(path, 0, sizeof(*path));
1054
1055     fib_node_init(&path->fp_node,
1056                   FIB_NODE_TYPE_PATH);
1057     dpo_reset(&path->fp_dpo);
1058
1059     path->fp_pl_index = pl_index;
1060     path->fp_weight = 1;
1061     path->fp_nh_proto = nh_proto;
1062     path->fp_via_fib = FIB_NODE_INDEX_INVALID;
1063     path->fp_cfg_flags = flags;
1064
1065     if (FIB_PATH_CFG_FLAG_DROP & flags)
1066     {
1067         path->fp_type = FIB_PATH_TYPE_SPECIAL;
1068     }
1069     else if (FIB_PATH_CFG_FLAG_LOCAL & flags)
1070     {
1071         path->fp_type = FIB_PATH_TYPE_RECEIVE;
1072         path->attached.fp_interface = FIB_NODE_INDEX_INVALID;
1073     }
1074     else
1075     {
1076         path->fp_type = FIB_PATH_TYPE_EXCLUSIVE;
1077         ASSERT(NULL != dpo);
1078         dpo_copy(&path->exclusive.fp_ex_dpo, dpo);
1079     }
1080
1081     return (fib_path_get_index(path));
1082 }
1083
1084 /*
1085  * fib_path_copy
1086  *
1087  * Copy a path. return index of new path.
1088  */
1089 fib_node_index_t
1090 fib_path_copy (fib_node_index_t path_index,
1091                fib_node_index_t path_list_index)
1092 {
1093     fib_path_t *path, *orig_path;
1094
1095     pool_get(fib_path_pool, path);
1096
1097     orig_path = fib_path_get(path_index);
1098     ASSERT(NULL != orig_path);
1099
1100     memcpy(path, orig_path, sizeof(*path));
1101
1102     FIB_PATH_DBG(path, "create-copy:%d", path_index);
1103
1104     /*
1105      * reset the dynamic section
1106      */
1107     fib_node_init(&path->fp_node, FIB_NODE_TYPE_PATH);
1108     path->fp_oper_flags     = FIB_PATH_OPER_FLAG_NONE;
1109     path->fp_pl_index  = path_list_index;
1110     path->fp_via_fib   = FIB_NODE_INDEX_INVALID;
1111     memset(&path->fp_dpo, 0, sizeof(path->fp_dpo));
1112     dpo_reset(&path->fp_dpo);
1113
1114     return (fib_path_get_index(path));
1115 }
1116
1117 /*
1118  * fib_path_destroy
1119  *
1120  * destroy a path that is no longer required
1121  */
1122 void
1123 fib_path_destroy (fib_node_index_t path_index)
1124 {
1125     fib_path_t *path;
1126
1127     path = fib_path_get(path_index);
1128
1129     ASSERT(NULL != path);
1130     FIB_PATH_DBG(path, "destroy");
1131
1132     fib_path_unresolve(path);
1133
1134     fib_node_deinit(&path->fp_node);
1135     pool_put(fib_path_pool, path);
1136 }
1137
1138 /*
1139  * fib_path_destroy
1140  *
1141  * destroy a path that is no longer required
1142  */
1143 uword
1144 fib_path_hash (fib_node_index_t path_index)
1145 {
1146     fib_path_t *path;
1147
1148     path = fib_path_get(path_index);
1149
1150     return (hash_memory(STRUCT_MARK_PTR(path, path_hash_start),
1151                         (STRUCT_OFFSET_OF(fib_path_t, path_hash_end) -
1152                          STRUCT_OFFSET_OF(fib_path_t, path_hash_start)),
1153                         0));
1154 }
1155
1156 /*
1157  * fib_path_cmp_i
1158  *
1159  * Compare two paths for equivalence.
1160  */
1161 static int
1162 fib_path_cmp_i (const fib_path_t *path1,
1163                 const fib_path_t *path2)
1164 {
1165     int res;
1166
1167     res = 1;
1168
1169     /*
1170      * paths of different types and protocol are not equal.
1171      * different weights only are the same path.
1172      */
1173     if (path1->fp_type != path2->fp_type)
1174     {
1175         res = (path1->fp_type - path2->fp_type);
1176     }
1177     if (path1->fp_nh_proto != path2->fp_nh_proto)
1178     {
1179         res = (path1->fp_nh_proto - path2->fp_nh_proto);
1180     }
1181     else
1182     {
1183         /*
1184          * both paths are of the same type.
1185          * consider each type and its attributes in turn.
1186          */
1187         switch (path1->fp_type)
1188         {
1189         case FIB_PATH_TYPE_ATTACHED_NEXT_HOP:
1190             res = ip46_address_cmp(&path1->attached_next_hop.fp_nh,
1191                                    &path2->attached_next_hop.fp_nh);
1192             if (0 == res) {
1193                 res = vnet_sw_interface_compare(
1194                           vnet_get_main(),
1195                           path1->attached_next_hop.fp_interface,
1196                           path2->attached_next_hop.fp_interface);
1197             }
1198             break;
1199         case FIB_PATH_TYPE_ATTACHED:
1200             res = vnet_sw_interface_compare(
1201                       vnet_get_main(),
1202                       path1->attached.fp_interface,
1203                       path2->attached.fp_interface);
1204             break;
1205         case FIB_PATH_TYPE_RECURSIVE:
1206             res = ip46_address_cmp(&path1->recursive.fp_nh,
1207                                    &path2->recursive.fp_nh);
1208  
1209             if (0 == res)
1210             {
1211                 res = (path1->recursive.fp_tbl_id - path2->recursive.fp_tbl_id);
1212             }
1213             break;
1214         case FIB_PATH_TYPE_DEAG:
1215             res = (path1->deag.fp_tbl_id - path2->deag.fp_tbl_id);
1216             break;
1217         case FIB_PATH_TYPE_SPECIAL:
1218         case FIB_PATH_TYPE_RECEIVE:
1219         case FIB_PATH_TYPE_EXCLUSIVE:
1220             res = 0;
1221             break;
1222         }
1223     }
1224     return (res);
1225 }
1226
1227 /*
1228  * fib_path_cmp_for_sort
1229  *
1230  * Compare two paths for equivalence. Used during path sorting.
1231  * As usual 0 means equal.
1232  */
1233 int
1234 fib_path_cmp_for_sort (void * v1,
1235                        void * v2)
1236 {
1237     fib_node_index_t *pi1 = v1, *pi2 = v2;
1238     fib_path_t *path1, *path2;
1239
1240     path1 = fib_path_get(*pi1);
1241     path2 = fib_path_get(*pi2);
1242
1243     return (fib_path_cmp_i(path1, path2));
1244 }
1245
1246 /*
1247  * fib_path_cmp
1248  *
1249  * Compare two paths for equivalence.
1250  */
1251 int
1252 fib_path_cmp (fib_node_index_t pi1,
1253               fib_node_index_t pi2)
1254 {
1255     fib_path_t *path1, *path2;
1256
1257     path1 = fib_path_get(pi1);
1258     path2 = fib_path_get(pi2);
1259
1260     return (fib_path_cmp_i(path1, path2));
1261 }
1262
1263 int
1264 fib_path_cmp_w_route_path (fib_node_index_t path_index,
1265                            const fib_route_path_t *rpath)
1266 {
1267     fib_path_t *path;
1268     int res;
1269
1270     path = fib_path_get(path_index);
1271
1272     res = 1;
1273
1274     if (path->fp_weight != rpath->frp_weight)
1275     {
1276         res = (path->fp_weight - rpath->frp_weight);
1277     }
1278     else
1279     {
1280         /*
1281          * both paths are of the same type.
1282          * consider each type and its attributes in turn.
1283          */
1284         switch (path->fp_type)
1285         {
1286         case FIB_PATH_TYPE_ATTACHED_NEXT_HOP:
1287             res = ip46_address_cmp(&path->attached_next_hop.fp_nh,
1288                                    &rpath->frp_addr);
1289             if (0 == res)
1290             {
1291                 res = vnet_sw_interface_compare(
1292                           vnet_get_main(),
1293                           path->attached_next_hop.fp_interface,
1294                           rpath->frp_sw_if_index);
1295             }
1296             break;
1297         case FIB_PATH_TYPE_ATTACHED:
1298             res = vnet_sw_interface_compare(
1299                       vnet_get_main(),
1300                       path->attached.fp_interface,
1301                       rpath->frp_sw_if_index);
1302             break;
1303         case FIB_PATH_TYPE_RECURSIVE:
1304             res = ip46_address_cmp(&path->recursive.fp_nh,
1305                                    &rpath->frp_addr);
1306  
1307             if (0 == res)
1308             {
1309                 res = (path->recursive.fp_tbl_id - rpath->frp_fib_index);
1310             }
1311             break;
1312         case FIB_PATH_TYPE_DEAG:
1313             res = (path->deag.fp_tbl_id - rpath->frp_fib_index);
1314             break;
1315         case FIB_PATH_TYPE_SPECIAL:
1316         case FIB_PATH_TYPE_RECEIVE:
1317         case FIB_PATH_TYPE_EXCLUSIVE:
1318             res = 0;
1319             break;
1320         }
1321     }
1322     return (res);
1323 }
1324
1325 /*
1326  * fib_path_recursive_loop_detect
1327  *
1328  * A forward walk of the FIB object graph to detect for a cycle/loop. This
1329  * walk is initiated when an entry is linking to a new path list or from an old.
1330  * The entry vector passed contains all the FIB entrys that are children of this
1331  * path (it is all the entries encountered on the walk so far). If this vector
1332  * contains the entry this path resolve via, then a loop is about to form.
1333  * The loop must be allowed to form, since we need the dependencies in place
1334  * so that we can track when the loop breaks.
1335  * However, we MUST not produce a loop in the forwarding graph (else packets
1336  * would loop around the switch path until the loop breaks), so we mark recursive
1337  * paths as looped so that they do not contribute forwarding information.
1338  * By marking the path as looped, an etry such as;
1339  *    X/Y
1340  *     via a.a.a.a (looped)
1341  *     via b.b.b.b (not looped)
1342  * can still forward using the info provided by b.b.b.b only
1343  */
1344 int
1345 fib_path_recursive_loop_detect (fib_node_index_t path_index,
1346                                 fib_node_index_t **entry_indicies)
1347 {
1348     fib_path_t *path;
1349
1350     path = fib_path_get(path_index);
1351
1352     /*
1353      * the forced drop path is never looped, cos it is never resolved.
1354      */
1355     if (fib_path_is_permanent_drop(path))
1356     {
1357         return (0);
1358     }
1359
1360     switch (path->fp_type)
1361     {
1362     case FIB_PATH_TYPE_RECURSIVE:
1363     {
1364         fib_node_index_t *entry_index, *entries;
1365         int looped = 0;
1366         entries = *entry_indicies;
1367
1368         vec_foreach(entry_index, entries) {
1369             if (*entry_index == path->fp_via_fib)
1370             {
1371                 /*
1372                  * the entry that is about to link to this path-list (or
1373                  * one of this path-list's children) is the same entry that
1374                  * this recursive path resolves through. this is a cycle.
1375                  * abort the walk.
1376                  */
1377                 looped = 1;
1378                 break;
1379             }
1380         }
1381
1382         if (looped)
1383         {
1384             FIB_PATH_DBG(path, "recursive loop formed");
1385             path->fp_oper_flags |= FIB_PATH_OPER_FLAG_RECURSIVE_LOOP;
1386
1387             dpo_copy(&path->fp_dpo,
1388                     drop_dpo_get(fib_proto_to_dpo(path->fp_nh_proto)));
1389         }
1390         else
1391         {
1392             /*
1393              * no loop here yet. keep forward walking the graph.
1394              */     
1395             if (fib_entry_recursive_loop_detect(path->fp_via_fib, entry_indicies))
1396             {
1397                 FIB_PATH_DBG(path, "recursive loop formed");
1398                 path->fp_oper_flags |= FIB_PATH_OPER_FLAG_RECURSIVE_LOOP;
1399             }
1400             else
1401             {
1402                 FIB_PATH_DBG(path, "recursive loop cleared");
1403                 path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RECURSIVE_LOOP;
1404             }
1405         }
1406         break;
1407     }
1408     case FIB_PATH_TYPE_ATTACHED_NEXT_HOP:
1409     case FIB_PATH_TYPE_ATTACHED:
1410     case FIB_PATH_TYPE_SPECIAL:
1411     case FIB_PATH_TYPE_DEAG:
1412     case FIB_PATH_TYPE_RECEIVE:
1413     case FIB_PATH_TYPE_EXCLUSIVE:
1414         /*
1415          * these path types cannot be part of a loop, since they are the leaves
1416          * of the graph.
1417          */
1418         break;
1419     }
1420
1421     return (fib_path_is_looped(path_index));
1422 }
1423
1424 int
1425 fib_path_resolve (fib_node_index_t path_index)
1426 {
1427     fib_path_t *path;
1428
1429     path = fib_path_get(path_index);
1430
1431     /*
1432      * hope for the best.
1433      */
1434     path->fp_oper_flags |= FIB_PATH_OPER_FLAG_RESOLVED;
1435
1436     /*
1437      * the forced drop path resolves via the drop adj
1438      */
1439     if (fib_path_is_permanent_drop(path))
1440     {
1441         dpo_copy(&path->fp_dpo,
1442                  drop_dpo_get(fib_proto_to_dpo(path->fp_nh_proto)));
1443         path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RESOLVED;
1444         return (fib_path_is_resolved(path_index));
1445     }
1446
1447     switch (path->fp_type)
1448     {
1449     case FIB_PATH_TYPE_ATTACHED_NEXT_HOP:
1450         fib_path_attached_next_hop_set(path);
1451         break;
1452     case FIB_PATH_TYPE_ATTACHED:
1453         /*
1454          * path->attached.fp_interface
1455          */
1456         if (!vnet_sw_interface_is_admin_up(vnet_get_main(),
1457                                            path->attached.fp_interface))
1458         {
1459             path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RESOLVED;
1460         }
1461         if (vnet_sw_interface_is_p2p(vnet_get_main(),
1462                                      path->attached.fp_interface))
1463         {
1464             /*
1465              * point-2-point interfaces do not require a glean, since
1466              * there is nothing to ARP. Install a rewrite/nbr adj instead
1467              */
1468             dpo_set(&path->fp_dpo,
1469                     DPO_ADJACENCY,
1470                     fib_proto_to_dpo(path->fp_nh_proto),
1471                     adj_nbr_add_or_lock(
1472                         path->fp_nh_proto,
1473                         fib_proto_to_link(path->fp_nh_proto),
1474                         &zero_addr,
1475                         path->attached.fp_interface));
1476         }
1477         else
1478         {
1479             dpo_set(&path->fp_dpo,
1480                     DPO_ADJACENCY_GLEAN,
1481                     fib_proto_to_dpo(path->fp_nh_proto),
1482                     adj_glean_add_or_lock(path->fp_nh_proto,
1483                                           path->attached.fp_interface,
1484                                           NULL));
1485         }
1486         /*
1487          * become a child of the adjacency so we receive updates
1488          * when the interface state changes
1489          */
1490         path->fp_sibling = adj_child_add(path->fp_dpo.dpoi_index,
1491                                          FIB_NODE_TYPE_PATH,
1492                                          fib_path_get_index(path));
1493
1494         break;
1495     case FIB_PATH_TYPE_RECURSIVE:
1496     {
1497         /*
1498          * Create a RR source entry in the table for the address
1499          * that this path recurses through.
1500          * This resolve action is recursive, hence we may create
1501          * more paths in the process. more creates mean maybe realloc
1502          * of this path.
1503          */
1504         fib_node_index_t fei;
1505         fib_prefix_t pfx;
1506
1507         ASSERT(FIB_NODE_INDEX_INVALID == path->fp_via_fib);
1508
1509         fib_prefix_from_ip46_addr(&path->recursive.fp_nh, &pfx);
1510
1511         fei = fib_table_entry_special_add(path->recursive.fp_tbl_id,
1512                                           &pfx,
1513                                           FIB_SOURCE_RR,
1514                                           FIB_ENTRY_FLAG_NONE,
1515                                           ADJ_INDEX_INVALID);
1516
1517         path = fib_path_get(path_index);
1518         path->fp_via_fib = fei;
1519
1520         /*
1521          * become a dependent child of the entry so the path is 
1522          * informed when the forwarding for the entry changes.
1523          */
1524         path->fp_sibling = fib_entry_child_add(path->fp_via_fib,
1525                                                FIB_NODE_TYPE_PATH,
1526                                                fib_path_get_index(path));
1527
1528         /*
1529          * create and configure the IP DPO
1530          */
1531         fib_path_recursive_adj_update(
1532             path,
1533             fib_path_proto_to_chain_type(path->fp_nh_proto),
1534             &path->fp_dpo);
1535
1536         break;
1537     }
1538     case FIB_PATH_TYPE_SPECIAL:
1539         /*
1540          * Resolve via the drop
1541          */
1542         dpo_copy(&path->fp_dpo,
1543                  drop_dpo_get(fib_proto_to_dpo(path->fp_nh_proto)));
1544         break;
1545     case FIB_PATH_TYPE_DEAG:
1546         /*
1547          * Resolve via a lookup DPO.
1548          * FIXME. control plane should add routes with a table ID
1549          */
1550         lookup_dpo_add_or_lock_w_fib_index(path->deag.fp_tbl_id,
1551                                           fib_proto_to_dpo(path->fp_nh_proto),
1552                                           LOOKUP_INPUT_DST_ADDR,
1553                                           LOOKUP_TABLE_FROM_CONFIG,
1554                                           &path->fp_dpo);
1555         break;
1556     case FIB_PATH_TYPE_RECEIVE:
1557         /*
1558          * Resolve via a receive DPO.
1559          */
1560         receive_dpo_add_or_lock(fib_proto_to_dpo(path->fp_nh_proto),
1561                                 path->receive.fp_interface,
1562                                 &path->receive.fp_addr,
1563                                 &path->fp_dpo);
1564         break;
1565     case FIB_PATH_TYPE_EXCLUSIVE:
1566         /*
1567          * Resolve via the user provided DPO
1568          */
1569         dpo_copy(&path->fp_dpo, &path->exclusive.fp_ex_dpo);
1570         break;
1571     }
1572
1573     return (fib_path_is_resolved(path_index));
1574 }
1575
1576 u32
1577 fib_path_get_resolving_interface (fib_node_index_t path_index)
1578 {
1579     fib_path_t *path;
1580
1581     path = fib_path_get(path_index);
1582
1583     switch (path->fp_type)
1584     {
1585     case FIB_PATH_TYPE_ATTACHED_NEXT_HOP:
1586         return (path->attached_next_hop.fp_interface);
1587     case FIB_PATH_TYPE_ATTACHED:
1588         return (path->attached.fp_interface);
1589     case FIB_PATH_TYPE_RECEIVE:
1590         return (path->receive.fp_interface);
1591     case FIB_PATH_TYPE_RECURSIVE:
1592         return (fib_entry_get_resolving_interface(path->fp_via_fib));    
1593     case FIB_PATH_TYPE_SPECIAL:
1594     case FIB_PATH_TYPE_DEAG:
1595     case FIB_PATH_TYPE_EXCLUSIVE:
1596         break;
1597     }
1598     return (~0);
1599 }
1600
1601 adj_index_t
1602 fib_path_get_adj (fib_node_index_t path_index)
1603 {
1604     fib_path_t *path;
1605
1606     path = fib_path_get(path_index);
1607
1608     ASSERT(dpo_is_adj(&path->fp_dpo));
1609     if (dpo_is_adj(&path->fp_dpo))
1610     {
1611         return (path->fp_dpo.dpoi_index);
1612     }
1613     return (ADJ_INDEX_INVALID);
1614 }
1615
1616 int
1617 fib_path_get_weight (fib_node_index_t path_index)
1618 {
1619     fib_path_t *path;
1620
1621     path = fib_path_get(path_index);
1622
1623     ASSERT(path);
1624
1625     return (path->fp_weight);
1626 }
1627
1628 /**
1629  * @brief Contribute the path's adjacency to the list passed.
1630  * By calling this function over all paths, recursively, a child
1631  * can construct its full set of forwarding adjacencies, and hence its
1632  * uRPF list.
1633  */
1634 void
1635 fib_path_contribute_urpf (fib_node_index_t path_index,
1636                           index_t urpf)
1637 {
1638     fib_path_t *path;
1639
1640     if (!fib_path_is_resolved(path_index))
1641         return;
1642
1643     path = fib_path_get(path_index);
1644
1645     switch (path->fp_type)
1646     {
1647     case FIB_PATH_TYPE_ATTACHED_NEXT_HOP:
1648         fib_urpf_list_append(urpf, path->attached_next_hop.fp_interface);
1649         break;
1650
1651     case FIB_PATH_TYPE_ATTACHED:
1652         fib_urpf_list_append(urpf, path->attached.fp_interface);
1653         break;
1654
1655     case FIB_PATH_TYPE_RECURSIVE:
1656         fib_entry_contribute_urpf(path->fp_via_fib, urpf);
1657         break;
1658
1659     case FIB_PATH_TYPE_EXCLUSIVE:
1660     case FIB_PATH_TYPE_SPECIAL:
1661         /*
1662          * these path types may link to an adj, if that's what
1663          * the clinet gave
1664          */
1665         if (dpo_is_adj(&path->fp_dpo))
1666         {
1667             ip_adjacency_t *adj;
1668
1669             adj = adj_get(path->fp_dpo.dpoi_index);
1670
1671             fib_urpf_list_append(urpf, adj->rewrite_header.sw_if_index);
1672         }
1673         break;
1674
1675     case FIB_PATH_TYPE_DEAG:
1676     case FIB_PATH_TYPE_RECEIVE:
1677         /*
1678          * these path types don't link to an adj
1679          */
1680         break;
1681     }
1682 }
1683
1684 void
1685 fib_path_contribute_forwarding (fib_node_index_t path_index,
1686                                 fib_forward_chain_type_t fct,
1687                                 dpo_id_t *dpo)
1688 {
1689     fib_path_t *path;
1690
1691     path = fib_path_get(path_index);
1692
1693     ASSERT(path);
1694     ASSERT(FIB_FORW_CHAIN_TYPE_MPLS_EOS != fct);
1695
1696     FIB_PATH_DBG(path, "contribute");
1697
1698     /*
1699      * The DPO stored in the path was created when the path was resolved.
1700      * This then represents the path's 'native' protocol; IP.
1701      * For all others will need to go find something else.
1702      */
1703     if (fib_path_proto_to_chain_type(path->fp_nh_proto) == fct)
1704     {
1705         dpo_copy(dpo, &path->fp_dpo);
1706     }
1707     else
1708     {
1709         switch (path->fp_type)
1710         {
1711         case FIB_PATH_TYPE_ATTACHED_NEXT_HOP:
1712             switch (fct)
1713             {
1714             case FIB_FORW_CHAIN_TYPE_UNICAST_IP4:
1715             case FIB_FORW_CHAIN_TYPE_UNICAST_IP6:
1716             case FIB_FORW_CHAIN_TYPE_MPLS_EOS:
1717             case FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS:
1718             case FIB_FORW_CHAIN_TYPE_ETHERNET:
1719             {
1720                 adj_index_t ai;
1721
1722                 /*
1723                  * get a MPLS link type adj.
1724                  */
1725                 ai = fib_path_attached_next_hop_get_adj(
1726                          path,
1727                          fib_forw_chain_type_to_link_type(fct));
1728                 dpo_set(dpo, DPO_ADJACENCY,
1729                         fib_forw_chain_type_to_dpo_proto(fct), ai);
1730                 adj_unlock(ai);
1731
1732                 break;
1733             }
1734             }
1735             break;
1736         case FIB_PATH_TYPE_RECURSIVE:
1737             switch (fct)
1738             {
1739             case FIB_FORW_CHAIN_TYPE_MPLS_EOS:
1740             case FIB_FORW_CHAIN_TYPE_UNICAST_IP4:
1741             case FIB_FORW_CHAIN_TYPE_UNICAST_IP6:
1742                 /*
1743                  * Assume that EOS and IP forwarding is the same.
1744                  * revisit for ieBGP
1745                  */
1746                 dpo_copy(dpo, &path->fp_dpo);
1747                 break;
1748             case FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS:
1749                 fib_path_recursive_adj_update(path, fct, dpo);
1750                 break;
1751             case FIB_FORW_CHAIN_TYPE_ETHERNET:
1752                 ASSERT(0);
1753                 break;
1754             }
1755             break;
1756         case FIB_PATH_TYPE_DEAG:
1757             switch (fct)
1758             {
1759             case FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS:
1760                 lookup_dpo_add_or_lock_w_table_id(MPLS_FIB_DEFAULT_TABLE_ID,
1761                                                   DPO_PROTO_MPLS,
1762                                                   LOOKUP_INPUT_DST_ADDR,
1763                                                   LOOKUP_TABLE_FROM_CONFIG,
1764                                                   dpo);
1765                 break;
1766             case FIB_FORW_CHAIN_TYPE_UNICAST_IP4:
1767             case FIB_FORW_CHAIN_TYPE_UNICAST_IP6:
1768             case FIB_FORW_CHAIN_TYPE_MPLS_EOS:
1769                 dpo_copy(dpo, &path->fp_dpo);
1770                 break;          
1771             case FIB_FORW_CHAIN_TYPE_ETHERNET:
1772                 ASSERT(0);
1773                 break;
1774             }
1775             break;
1776         case FIB_PATH_TYPE_EXCLUSIVE:
1777             dpo_copy(dpo, &path->exclusive.fp_ex_dpo);
1778             break;
1779         case FIB_PATH_TYPE_ATTACHED:
1780         case FIB_PATH_TYPE_RECEIVE:
1781         case FIB_PATH_TYPE_SPECIAL:
1782             ASSERT(0);
1783             break;
1784         }
1785
1786     }
1787 }
1788
1789 load_balance_path_t *
1790 fib_path_append_nh_for_multipath_hash (fib_node_index_t path_index,
1791                                        fib_forward_chain_type_t fct,
1792                                        load_balance_path_t *hash_key)
1793 {
1794     load_balance_path_t *mnh;
1795     fib_path_t *path;
1796
1797     path = fib_path_get(path_index);
1798
1799     ASSERT(path);
1800
1801     if (fib_path_is_resolved(path_index))
1802     {
1803         vec_add2(hash_key, mnh, 1);
1804
1805         mnh->path_weight = path->fp_weight;
1806         mnh->path_index = path_index;
1807         fib_path_contribute_forwarding(path_index, fct, &mnh->path_dpo);
1808     }
1809
1810     return (hash_key);
1811 }
1812
1813 int
1814 fib_path_is_recursive (fib_node_index_t path_index)
1815 {
1816     fib_path_t *path;
1817
1818     path = fib_path_get(path_index);
1819
1820     return (FIB_PATH_TYPE_RECURSIVE == path->fp_type);
1821 }
1822
1823 int
1824 fib_path_is_exclusive (fib_node_index_t path_index)
1825 {
1826     fib_path_t *path;
1827
1828     path = fib_path_get(path_index);
1829
1830     return (FIB_PATH_TYPE_EXCLUSIVE == path->fp_type);
1831 }
1832
1833 int
1834 fib_path_is_deag (fib_node_index_t path_index)
1835 {
1836     fib_path_t *path;
1837
1838     path = fib_path_get(path_index);
1839
1840     return (FIB_PATH_TYPE_DEAG == path->fp_type);
1841 }
1842
1843 int
1844 fib_path_is_resolved (fib_node_index_t path_index)
1845 {
1846     fib_path_t *path;
1847
1848     path = fib_path_get(path_index);
1849
1850     return (dpo_id_is_valid(&path->fp_dpo) &&
1851             (path->fp_oper_flags & FIB_PATH_OPER_FLAG_RESOLVED) &&
1852             !fib_path_is_looped(path_index) &&
1853             !fib_path_is_permanent_drop(path));
1854 }
1855
1856 int
1857 fib_path_is_looped (fib_node_index_t path_index)
1858 {
1859     fib_path_t *path;
1860
1861     path = fib_path_get(path_index);
1862
1863     return (path->fp_oper_flags & FIB_PATH_OPER_FLAG_RECURSIVE_LOOP);
1864 }
1865
1866 int
1867 fib_path_encode (fib_node_index_t path_list_index,
1868                  fib_node_index_t path_index,
1869                  void *ctx)
1870 {
1871     fib_route_path_encode_t **api_rpaths = ctx;
1872     fib_route_path_encode_t *api_rpath;
1873     fib_path_t *path;
1874
1875     path = fib_path_get(path_index);
1876     if (!path)
1877       return (0);
1878     vec_add2(*api_rpaths, api_rpath, 1);
1879     api_rpath->rpath.frp_weight = path->fp_weight;
1880     api_rpath->rpath.frp_proto = path->fp_nh_proto;
1881     api_rpath->rpath.frp_sw_if_index = ~0;
1882     api_rpath->dpo = path->exclusive.fp_ex_dpo;
1883     switch (path->fp_type)
1884       {
1885       case FIB_PATH_TYPE_RECEIVE:
1886         api_rpath->rpath.frp_addr = path->receive.fp_addr;
1887         api_rpath->rpath.frp_sw_if_index = path->receive.fp_interface;
1888         break;
1889       case FIB_PATH_TYPE_ATTACHED:
1890         api_rpath->rpath.frp_sw_if_index = path->attached.fp_interface;
1891         break;
1892       case FIB_PATH_TYPE_ATTACHED_NEXT_HOP:
1893         api_rpath->rpath.frp_sw_if_index = path->attached_next_hop.fp_interface;
1894         api_rpath->rpath.frp_addr = path->attached_next_hop.fp_nh;
1895         break;
1896       case FIB_PATH_TYPE_SPECIAL:
1897         break;
1898       case FIB_PATH_TYPE_DEAG:
1899         break;
1900       case FIB_PATH_TYPE_RECURSIVE:
1901         api_rpath->rpath.frp_addr = path->recursive.fp_nh;
1902         break;
1903       default:
1904         break;
1905       }
1906     return (1);
1907 }
1908
1909 void
1910 fib_path_module_init (void)
1911 {
1912     fib_node_register_type (FIB_NODE_TYPE_PATH, &fib_path_vft);
1913 }
1914
1915 static clib_error_t *
1916 show_fib_path_command (vlib_main_t * vm,
1917                         unformat_input_t * input,
1918                         vlib_cli_command_t * cmd)
1919 {
1920     fib_node_index_t pi;
1921     fib_path_t *path;
1922
1923     if (unformat (input, "%d", &pi))
1924     {
1925         /*
1926          * show one in detail
1927          */
1928         if (!pool_is_free_index(fib_path_pool, pi))
1929         {
1930             path = fib_path_get(pi);
1931             u8 *s = fib_path_format(pi, NULL);
1932             s = format(s, "children:");
1933             s = fib_node_children_format(path->fp_node.fn_children, s);
1934             vlib_cli_output (vm, "%s", s);
1935             vec_free(s);
1936         }
1937         else
1938         {
1939             vlib_cli_output (vm, "path %d invalid", pi);
1940         }
1941     }
1942     else
1943     {
1944         vlib_cli_output (vm, "FIB Paths");
1945         pool_foreach(path, fib_path_pool,
1946         ({
1947             vlib_cli_output (vm, "%U", format_fib_path, path);
1948         }));
1949     }
1950
1951     return (NULL);
1952 }
1953
1954 VLIB_CLI_COMMAND (show_fib_path, static) = {
1955   .path = "show fib paths",
1956   .function = show_fib_path_command,
1957   .short_help = "show fib paths",
1958 };