2 * Copyright (c) 2016 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
16 #include <vnet/fib/fib_walk.h>
17 #include <vnet/fib/fib_node_list.h>
22 typedef enum fib_walk_flags_t_
26 * This walk will run to completion, i.e. visit ALL the children.
27 * It is a depth first traversal of the graph.
29 FIB_WALK_FLAG_SYNC = (1 << 0),
31 * An asynchronous walk.
32 * This walk will be scheduled to run in the background. It will thus visits
33 * the children at a later point in time.
34 * It is a depth first traversal of the graph.
36 FIB_WALK_FLAG_ASYNC = (1 << 1),
38 * An indication that the walk is currently executing.
40 FIB_WALK_FLAG_EXECUTING = (1 << 2),
44 * A representation of a graph walk from a parent object to its children
46 typedef struct fib_walk_t_
49 * FIB node linkage. This object is not in the FIB object graph,
50 * but it is present in other node's dependency lists, so it needs to
58 fib_walk_flags_t fw_flags;
61 * Sibling index in the dependency list
66 * Sibling index in the list of all walks
71 * Pointer to the node whose dependants this walk is walking
73 fib_node_ptr_t fw_parent;
76 * Number of nodes visited by this walk. saved for debugging purposes.
81 * The reasons this walk is occuring.
82 * This is a vector ordered in time. The reasons and the front were started
83 * first, and so should be acted first when a node is visisted.
85 fib_node_back_walk_ctx_t *fw_ctx;
89 * @brief The pool of all walk objects
91 static fib_walk_t *fib_walk_pool;
94 * @brief There's only one event type sent to the walk process
96 #define FIB_WALK_EVENT 0
99 * Statistics maintained per-walk queue
101 typedef enum fib_walk_queue_stats_t_
105 } fib_walk_queue_stats_t;
106 #define FIB_WALK_QUEUE_STATS_NUM (FIB_WALK_COMPLETED+1)
108 #define FIB_WALK_QUEUE_STATS { \
109 [FIB_WALK_SCHEDULED] = "scheduled", \
110 [FIB_WALK_COMPLETED] = "completed", \
113 #define FOR_EACH_FIB_WALK_QUEUE_STATS(_wqs) \
114 for ((_wqs) = FIB_WALK_SCHEDULED; \
115 (_wqs) < FIB_WALK_QUEUE_STATS_NUM; \
119 * The names of the walk stats
121 static const char * const fib_walk_queue_stats_names[] = FIB_WALK_QUEUE_STATS;
124 * A represenation of one queue of walk
126 typedef struct fib_walk_queue_t_
131 u64 fwq_stats[FIB_WALK_QUEUE_STATS_NUM];
134 * The node list which acts as the queue
136 fib_node_list_t fwq_queue;
140 * A set of priority queues for outstanding walks
142 typedef struct fib_walk_queues_t_
144 fib_walk_queue_t fwqs_queues[FIB_WALK_PRIORITY_NUM];
148 * The global queues of outstanding walks
150 static fib_walk_queues_t fib_walk_queues;
153 * The names of the walk priorities
155 static const char * const fib_walk_priority_names[] = FIB_WALK_PRIORITIES;
158 format_fib_walk_priority (u8 *s, va_list ap)
160 fib_walk_priority_t prio = va_arg(ap, fib_walk_priority_t);
162 ASSERT(prio < FIB_WALK_PRIORITY_NUM);
164 return (format(s, "%s", fib_walk_priority_names[prio]));
167 format_fib_walk_queue_stats (u8 *s, va_list ap)
169 fib_walk_queue_stats_t wqs = va_arg(ap, fib_walk_queue_stats_t);
171 ASSERT(wqs < FIB_WALK_QUEUE_STATS_NUM);
173 return (format(s, "%s", fib_walk_queue_stats_names[wqs]));
177 fib_walk_get_index (fib_walk_t *fwalk)
179 return (fwalk - fib_walk_pool);
183 fib_walk_get (index_t fwi)
185 return (pool_elt_at_index(fib_walk_pool, fwi));
189 * not static so it can be used in the unit tests
192 fib_walk_queue_get_size (fib_walk_priority_t prio)
194 return (fib_node_list_get_size(fib_walk_queues.fwqs_queues[prio].fwq_queue));
197 static fib_node_index_t
198 fib_walk_queue_get_front (fib_walk_priority_t prio)
202 fib_node_list_get_front(fib_walk_queues.fwqs_queues[prio].fwq_queue, &wp);
204 return (wp.fnp_index);
208 fib_walk_destroy (fib_walk_t *fwalk)
210 if (FIB_NODE_INDEX_INVALID != fwalk->fw_prio_sibling)
212 fib_node_list_elt_remove(fwalk->fw_prio_sibling);
214 fib_node_child_remove(fwalk->fw_parent.fnp_type,
215 fwalk->fw_parent.fnp_index,
216 fwalk->fw_dep_sibling);
218 fib_node_deinit(&fwalk->fw_node);
219 pool_put(fib_walk_pool, fwalk);
223 * return code when advancing a walk
225 typedef enum fib_walk_advance_rc_t_
228 * The walk is complete
230 FIB_WALK_ADVANCE_DONE,
232 * the walk has more work
234 FIB_WALK_ADVANCE_MORE,
236 * The walk merged with the one in front
238 FIB_WALK_ADVANCE_MERGE,
239 } fib_walk_advance_rc_t;
242 * @brief Advance the walk one element in its work list
244 static fib_walk_advance_rc_t
245 fib_walk_advance (fib_node_index_t fwi)
247 fib_node_back_walk_ctx_t *ctx;
248 fib_node_back_walk_rc_t wrc;
249 fib_node_ptr_t sibling;
254 * this walk function is re-entrant - walks acan spawn walks.
255 * fib_walk_t objects come from a pool, so they can realloc. we need
256 * to retch from said pool at the appropriate times.
258 fwalk = fib_walk_get(fwi);
260 more_elts = fib_node_list_elt_get_next(fwalk->fw_dep_sibling, &sibling);
264 vec_foreach(ctx, fwalk->fw_ctx)
266 wrc = fib_node_back_walk_one(&sibling, ctx);
268 fwalk = fib_walk_get(fwi);
269 fwalk->fw_n_visits++;
271 if (FIB_NODE_BACK_WALK_MERGE == wrc)
274 * this walk has merged with the one further along the node's
277 return (FIB_WALK_ADVANCE_MERGE);
281 * move foward to the next node to visit
283 more_elts = fib_node_list_advance(fwalk->fw_dep_sibling);
288 return (FIB_WALK_ADVANCE_MORE);
291 return (FIB_WALK_ADVANCE_DONE);
295 * First guesses as to good values
297 #define SHORT_SLEEP 1e-8
298 #define LONG_SLEEP 1e-3
302 * @brief Service the queues
303 * This is not declared static so that it can be unit tested - i know i know...
306 fib_walk_process_queues (vlib_main_t * vm,
309 fib_walk_priority_t prio;
310 fib_walk_advance_rc_t rc;
311 fib_node_index_t fwi;
314 f64 sleep_time, start_time; // , vector_rate;
316 start_time = vlib_time_now(vm);
318 FOR_EACH_FIB_WALK_PRIORITY(prio)
320 while (0 != fib_walk_queue_get_size(prio))
322 fwi = fib_walk_queue_get_front(prio);
325 * set this walk as executing
327 fwalk = fib_walk_get(fwi);
328 fwalk->fw_flags |= FIB_WALK_FLAG_EXECUTING;
332 rc = fib_walk_advance(fwi);
333 } while (((vlib_time_now(vm) - start_time) < quota) &&
334 (FIB_WALK_ADVANCE_MORE == rc));
337 * if this walk has no more work then pop it from the queue
338 * and move on to the next.
340 if (FIB_WALK_ADVANCE_MORE != rc)
342 fwalk = fib_walk_get(fwi);
343 fib_walk_destroy(fwalk);
344 fib_walk_queues.fwqs_queues[prio].fwq_stats[FIB_WALK_COMPLETED]++;
349 * passed our work quota. sleep time.
351 fwalk = fib_walk_get(fwi);
352 fwalk->fw_flags &= ~FIB_WALK_FLAG_EXECUTING;
353 sleep_time = SHORT_SLEEP;
354 goto that_will_do_for_now;
359 * got to the end of all the work
361 sleep_time = LONG_SLEEP;
363 that_will_do_for_now:
368 * @brief The 'fib-walk' process's main loop.
371 fib_walk_process (vlib_main_t * vm,
372 vlib_node_runtime_t * node,
377 sleep_time = SHORT_SLEEP;
381 vlib_process_wait_for_event_or_clock(vm, sleep_time);
384 * there may be lots of event queued between the processes,
385 * but the walks we want to schedule are in the priority queues,
386 * so we ignore the process events.
388 vlib_process_get_events(vm, NULL);
390 sleep_time = fib_walk_process_queues(vm, QUOTA);
401 VLIB_REGISTER_NODE (fib_walk_process_node,static) = {
402 .function = fib_walk_process,
403 .type = VLIB_NODE_TYPE_PROCESS,
409 * @brief Allocate a new walk object
412 fib_walk_alloc (fib_node_type_t parent_type,
413 fib_node_index_t parent_index,
414 fib_walk_flags_t flags,
415 fib_node_back_walk_ctx_t *ctx)
419 pool_get(fib_walk_pool, fwalk);
421 fib_node_init(&fwalk->fw_node, FIB_NODE_TYPE_WALK);
423 fwalk->fw_flags = flags;
424 fwalk->fw_dep_sibling = FIB_NODE_INDEX_INVALID;
425 fwalk->fw_prio_sibling = FIB_NODE_INDEX_INVALID;
426 fwalk->fw_parent.fnp_index = parent_index;
427 fwalk->fw_parent.fnp_type = parent_type;
428 fwalk->fw_ctx = NULL;
431 * make a copy of the backwalk context so the depth count remains
432 * the same for each sibling visitsed. This is important in the case
433 * where a parents has a loop via one child, but all the others are not.
434 * if the looped child were visited first, the depth count would exceed, the
435 * max and the walk would terminate before it reached the other siblings.
437 vec_add1(fwalk->fw_ctx, *ctx);
443 * @brief Enqueue a walk onto the appropriate priority queue. Then signal
444 * the background process there is work to do.
447 fib_walk_prio_queue_enquue (fib_walk_priority_t prio,
452 sibling = fib_node_list_push_front(fib_walk_queues.fwqs_queues[prio].fwq_queue,
455 fib_walk_get_index(fwalk));
456 fib_walk_queues.fwqs_queues[prio].fwq_stats[FIB_WALK_SCHEDULED]++;
459 * poke the fib-walk process to perform the async walk.
460 * we are not passing it specific data, hence the last two args,
461 * the process will drain the queues
463 vlib_process_signal_event(vlib_get_main(),
464 fib_walk_process_node.index,
472 fib_walk_async (fib_node_type_t parent_type,
473 fib_node_index_t parent_index,
474 fib_walk_priority_t prio,
475 fib_node_back_walk_ctx_t *ctx)
479 if (FIB_NODE_GRAPH_MAX_DEPTH < ++ctx->fnbw_depth)
482 * The walk has reached the maximum depth. there is a loop in the graph.
488 fwalk = fib_walk_alloc(parent_type,
493 fwalk->fw_dep_sibling = fib_node_child_add(parent_type,
496 fib_walk_get_index(fwalk));
498 fwalk->fw_prio_sibling = fib_walk_prio_queue_enquue(prio, fwalk);
502 * @brief Back walk all the children of a FIB node.
504 * note this is a synchronous depth first walk. Children visited may propagate
505 * the walk to thier children. Other children node types may not propagate,
506 * synchronously but instead queue the walk for later async completion.
509 fib_walk_sync (fib_node_type_t parent_type,
510 fib_node_index_t parent_index,
511 fib_node_back_walk_ctx_t *ctx)
513 fib_walk_advance_rc_t rc;
514 fib_node_index_t fwi;
517 if (FIB_NODE_GRAPH_MAX_DEPTH < ++ctx->fnbw_depth)
520 * The walk has reached the maximum depth. there is a loop in the graph.
526 fwalk = fib_walk_alloc(parent_type,
531 fwalk->fw_dep_sibling = fib_node_child_add(parent_type,
534 fib_walk_get_index(fwalk));
535 fwi = fib_walk_get_index(fwalk);
540 * set this walk as executing
542 fwalk->fw_flags |= FIB_WALK_FLAG_EXECUTING;
546 rc = fib_walk_advance(fwi);
547 } while (FIB_WALK_ADVANCE_MORE == rc);
551 * this walk function is re-entrant - walks can spawn walks.
552 * fib_walk_t objects come from a pool, so they can realloc. we need
553 * to re-fetch from said pool at the appropriate times.
555 fwalk = fib_walk_get(fwi);
557 if (FIB_WALK_ADVANCE_MERGE == rc)
560 * this sync walk merged with an walk in front.
561 * by reqeusting a sync walk the client wanted all children walked,
562 * so we ditch the walk object in hand and continue with the one
565 fib_node_ptr_t merged_walk;
567 fib_node_list_elt_get_next(fwalk->fw_dep_sibling, &merged_walk);
569 ASSERT(FIB_NODE_INDEX_INVALID != merged_walk.fnp_index);
570 ASSERT(FIB_NODE_TYPE_WALK == merged_walk.fnp_type);
572 fib_walk_destroy(fwalk);
574 fwi = merged_walk.fnp_index;
575 fwalk = fib_walk_get(fwi);
577 if (FIB_WALK_FLAG_EXECUTING & fwalk->fw_flags)
580 * we are executing a sync walk, and we have met with another
581 * walk that is also executing. since only one walk executs at once
582 * (there is no multi-threading) this implies we have met ourselves
583 * and hence the is a loop in the graph.
584 * This function is re-entrant, so the walk object we met is being
585 * acted on in a stack frame below this one. We must therefore not
586 * continue with it now, but let the stack unwind and along the
587 * appropriate frame to read the depth count and bail.
596 * the walk reached the end of the depdency list.
604 fib_walk_destroy(fwalk);
609 fib_walk_get_node (fib_node_index_t index)
613 fwalk = fib_walk_get(index);
615 return (&(fwalk->fw_node));
619 * Walk objects are not parents, nor are they locked.
623 fib_walk_last_lock_gone (fib_node_t *node)
629 fib_walk_get_from_node (fib_node_t *node)
631 return ((fib_walk_t*)(((char*)node) -
632 STRUCT_OFFSET_OF(fib_walk_t, fw_node)));
636 * @brief Another back walk has reach this walk.
637 * Megre them so there is only one left. It is this node being
638 * visited that will remain, so copy or merge the context onto it.
640 static fib_node_back_walk_rc_t
641 fib_walk_back_walk_notify (fib_node_t *node,
642 fib_node_back_walk_ctx_t *ctx)
644 fib_node_back_walk_ctx_t *old;
647 fwalk = fib_walk_get_from_node(node);
650 * check whether the walk context can be merge with another,
651 * or whether it needs to be appended.
653 vec_foreach(old, fwalk->fw_ctx)
656 * we can merge walks if the reason for the walk is the same.
658 if (old->fnbw_reason == ctx->fnbw_reason)
661 * copy the largest of the depth values. in the presence of a loop,
662 * the same walk will merge with itself. if we take the smaller depth
663 * then it will never end.
665 old->fnbw_depth = ((old->fnbw_depth >= ctx->fnbw_depth) ?
673 * walks could not be merged, this means that the walk infront needs to
674 * perform different action to this one that has caught up. the one in front
675 * was scheduled first so append the new walk context to the back of the list.
677 vec_add1(fwalk->fw_ctx, *ctx);
680 return (FIB_NODE_BACK_WALK_MERGE);
684 * The FIB walk's graph node virtual function table
686 static const fib_node_vft_t fib_walk_vft = {
687 .fnv_get = fib_walk_get_node,
688 .fnv_last_lock = fib_walk_last_lock_gone,
689 .fnv_back_walk = fib_walk_back_walk_notify,
693 fib_walk_module_init (void)
695 fib_walk_priority_t prio;
697 FOR_EACH_FIB_WALK_PRIORITY(prio)
699 fib_walk_queues.fwqs_queues[prio].fwq_queue = fib_node_list_create();
702 fib_node_register_type(FIB_NODE_TYPE_WALK, &fib_walk_vft);
706 format_fib_walk (u8* s, va_list ap)
708 fib_node_index_t fwi = va_arg(ap, fib_node_index_t);
711 fwalk = fib_walk_get(fwi);
713 return (format(s, " parent:{%s:%d} visits:%d flags:%d",
714 fib_node_type_get_name(fwalk->fw_parent.fnp_type),
715 fwalk->fw_parent.fnp_index,
720 static clib_error_t *
721 fib_walk_show (vlib_main_t * vm,
722 unformat_input_t * input,
723 vlib_cli_command_t * cmd)
725 fib_walk_queue_stats_t wqs;
726 fib_walk_priority_t prio;
727 fib_node_ptr_t sibling;
728 fib_node_index_t fwi;
732 vlib_cli_output(vm, "FIB Walk queues:");
734 FOR_EACH_FIB_WALK_PRIORITY(prio)
736 vlib_cli_output(vm, " %U priority queue:",
737 format_fib_walk_priority, prio);
738 vlib_cli_output(vm, " Stats: ");
740 FOR_EACH_FIB_WALK_QUEUE_STATS(wqs)
742 vlib_cli_output(vm, " %U:%d",
743 format_fib_walk_queue_stats, wqs,
744 fib_walk_queues.fwqs_queues[prio].fwq_stats[wqs]);
746 vlib_cli_output(vm, " Occupancy:%d",
747 fib_node_list_get_size(
748 fib_walk_queues.fwqs_queues[prio].fwq_queue));
750 more_elts = fib_node_list_get_front(
751 fib_walk_queues.fwqs_queues[prio].fwq_queue,
756 ASSERT(FIB_NODE_INDEX_INVALID != sibling.fnp_index);
757 ASSERT(FIB_NODE_TYPE_WALK == sibling.fnp_type);
759 fwi = sibling.fnp_index;
760 fwalk = fib_walk_get(fwi);
762 vlib_cli_output(vm, " %U", format_fib_walk, fwi);
764 more_elts = fib_node_list_elt_get_next(fwalk->fw_prio_sibling,
771 VLIB_CLI_COMMAND (fib_walk_show_command, static) = {
772 .path = "show fib walk",
773 .short_help = "show fib walk",
774 .function = fib_walk_show,