aboutsummaryrefslogtreecommitdiffstats
path: root/vnet/vnet/fib/fib_walk.c
diff options
context:
space:
mode:
Diffstat (limited to 'vnet/vnet/fib/fib_walk.c')
-rw-r--r--vnet/vnet/fib/fib_walk.c775
1 files changed, 775 insertions, 0 deletions
diff --git a/vnet/vnet/fib/fib_walk.c b/vnet/vnet/fib/fib_walk.c
new file mode 100644
index 00000000000..79e3ad0b242
--- /dev/null
+++ b/vnet/vnet/fib/fib_walk.c
@@ -0,0 +1,775 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/fib/fib_walk.h>
+#include <vnet/fib/fib_node_list.h>
+
+/**
+ * The flags on a walk
+ */
+typedef enum fib_walk_flags_t_
+{
+ /**
+ * A synchronous walk.
+ * This walk will run to completion, i.e. visit ALL the children.
+ * It is a depth first traversal of the graph.
+ */
+ FIB_WALK_FLAG_SYNC = (1 << 0),
+ /**
+ * An asynchronous walk.
+ * This walk will be scheduled to run in the background. It will thus visits
+ * the children at a later point in time.
+ * It is a depth first traversal of the graph.
+ */
+ FIB_WALK_FLAG_ASYNC = (1 << 1),
+ /**
+ * An indication that the walk is currently executing.
+ */
+ FIB_WALK_FLAG_EXECUTING = (1 << 2),
+} fib_walk_flags_t;
+
+/**
+ * A representation of a graph walk from a parent object to its children
+ */
+typedef struct fib_walk_t_
+{
+ /**
+ * FIB node linkage. This object is not in the FIB object graph,
+ * but it is present in other node's dependency lists, so it needs to
+ * be pointerable to.
+ */
+ fib_node_t fw_node;
+
+ /**
+ * the walk's flags
+ */
+ fib_walk_flags_t fw_flags;
+
+ /**
+ * Sibling index in the dependency list
+ */
+ u32 fw_dep_sibling;
+
+ /**
+ * Sibling index in the list of all walks
+ */
+ u32 fw_prio_sibling;
+
+ /**
+ * Pointer to the node whose dependants this walk is walking
+ */
+ fib_node_ptr_t fw_parent;
+
+ /**
+ * Number of nodes visited by this walk. saved for debugging purposes.
+ */
+ u32 fw_n_visits;
+
+ /**
+ * The reasons this walk is occuring.
+ * This is a vector ordered in time. The reasons and the front were started
+ * first, and so should be acted first when a node is visisted.
+ */
+ fib_node_back_walk_ctx_t *fw_ctx;
+} fib_walk_t;
+
+/**
+ * @brief The pool of all walk objects
+ */
+static fib_walk_t *fib_walk_pool;
+
+/**
+ * @brief There's only one event type sent to the walk process
+ */
+#define FIB_WALK_EVENT 0
+
+/**
+ * Statistics maintained per-walk queue
+ */
+typedef enum fib_walk_queue_stats_t_
+{
+ FIB_WALK_SCHEDULED,
+ FIB_WALK_COMPLETED,
+} fib_walk_queue_stats_t;
+#define FIB_WALK_QUEUE_STATS_NUM (FIB_WALK_COMPLETED+1)
+
+#define FIB_WALK_QUEUE_STATS { \
+ [FIB_WALK_SCHEDULED] = "scheduled", \
+ [FIB_WALK_COMPLETED] = "completed", \
+}
+
+#define FOR_EACH_FIB_WALK_QUEUE_STATS(_wqs) \
+ for ((_wqs) = FIB_WALK_SCHEDULED; \
+ (_wqs) < FIB_WALK_QUEUE_STATS_NUM; \
+ (_wqs)++)
+
+/**
+ * The names of the walk stats
+ */
+static const char * const fib_walk_queue_stats_names[] = FIB_WALK_QUEUE_STATS;
+
+/**
+ * A represenation of one queue of walk
+ */
+typedef struct fib_walk_queue_t_
+{
+ /**
+ * Qeuee stats
+ */
+ u64 fwq_stats[FIB_WALK_QUEUE_STATS_NUM];
+
+ /**
+ * The node list which acts as the queue
+ */
+ fib_node_list_t fwq_queue;
+} fib_walk_queue_t;
+
+/**
+ * A set of priority queues for outstanding walks
+ */
+typedef struct fib_walk_queues_t_
+{
+ fib_walk_queue_t fwqs_queues[FIB_WALK_PRIORITY_NUM];
+} fib_walk_queues_t;
+
+/**
+ * The global queues of outstanding walks
+ */
+static fib_walk_queues_t fib_walk_queues;
+
+/**
+ * The names of the walk priorities
+ */
+static const char * const fib_walk_priority_names[] = FIB_WALK_PRIORITIES;
+
+u8*
+format_fib_walk_priority (u8 *s, va_list ap)
+{
+ fib_walk_priority_t prio = va_arg(ap, fib_walk_priority_t);
+
+ ASSERT(prio < FIB_WALK_PRIORITY_NUM);
+
+ return (format(s, "%s", fib_walk_priority_names[prio]));
+}
+static u8*
+format_fib_walk_queue_stats (u8 *s, va_list ap)
+{
+ fib_walk_queue_stats_t wqs = va_arg(ap, fib_walk_queue_stats_t);
+
+ ASSERT(wqs < FIB_WALK_QUEUE_STATS_NUM);
+
+ return (format(s, "%s", fib_walk_queue_stats_names[wqs]));
+}
+
+static index_t
+fib_walk_get_index (fib_walk_t *fwalk)
+{
+ return (fwalk - fib_walk_pool);
+}
+
+static fib_walk_t *
+fib_walk_get (index_t fwi)
+{
+ return (pool_elt_at_index(fib_walk_pool, fwi));
+}
+
+/*
+ * not static so it can be used in the unit tests
+ */
+u32
+fib_walk_queue_get_size (fib_walk_priority_t prio)
+{
+ return (fib_node_list_get_size(fib_walk_queues.fwqs_queues[prio].fwq_queue));
+}
+
+static fib_node_index_t
+fib_walk_queue_get_front (fib_walk_priority_t prio)
+{
+ fib_node_ptr_t wp;
+
+ fib_node_list_get_front(fib_walk_queues.fwqs_queues[prio].fwq_queue, &wp);
+
+ return (wp.fnp_index);
+}
+
+static void
+fib_walk_destroy (fib_walk_t *fwalk)
+{
+ if (FIB_NODE_INDEX_INVALID != fwalk->fw_prio_sibling)
+ {
+ fib_node_list_elt_remove(fwalk->fw_prio_sibling);
+ }
+ fib_node_child_remove(fwalk->fw_parent.fnp_type,
+ fwalk->fw_parent.fnp_index,
+ fwalk->fw_dep_sibling);
+
+ fib_node_deinit(&fwalk->fw_node);
+ pool_put(fib_walk_pool, fwalk);
+}
+
+/**
+ * return code when advancing a walk
+ */
+typedef enum fib_walk_advance_rc_t_
+{
+ /**
+ * The walk is complete
+ */
+ FIB_WALK_ADVANCE_DONE,
+ /**
+ * the walk has more work
+ */
+ FIB_WALK_ADVANCE_MORE,
+ /**
+ * The walk merged with the one in front
+ */
+ FIB_WALK_ADVANCE_MERGE,
+} fib_walk_advance_rc_t;
+
+/**
+ * @brief Advance the walk one element in its work list
+ */
+static fib_walk_advance_rc_t
+fib_walk_advance (fib_node_index_t fwi)
+{
+ fib_node_back_walk_ctx_t *ctx;
+ fib_node_back_walk_rc_t wrc;
+ fib_node_ptr_t sibling;
+ fib_walk_t *fwalk;
+ int more_elts;
+
+ /*
+ * this walk function is re-entrant - walks acan spawn walks.
+ * fib_walk_t objects come from a pool, so they can realloc. we need
+ * to retch from said pool at the appropriate times.
+ */
+ fwalk = fib_walk_get(fwi);
+
+ more_elts = fib_node_list_elt_get_next(fwalk->fw_dep_sibling, &sibling);
+
+ if (more_elts)
+ {
+ vec_foreach(ctx, fwalk->fw_ctx)
+ {
+ wrc = fib_node_back_walk_one(&sibling, ctx);
+
+ fwalk = fib_walk_get(fwi);
+ fwalk->fw_n_visits++;
+
+ if (FIB_NODE_BACK_WALK_MERGE == wrc)
+ {
+ /*
+ * this walk has merged with the one further along the node's
+ * dependecy list.
+ */
+ return (FIB_WALK_ADVANCE_MERGE);
+ }
+ }
+ /*
+ * move foward to the next node to visit
+ */
+ more_elts = fib_node_list_advance(fwalk->fw_dep_sibling);
+ }
+
+ if (more_elts)
+ {
+ return (FIB_WALK_ADVANCE_MORE);
+ }
+
+ return (FIB_WALK_ADVANCE_DONE);
+}
+
+/**
+ * First guesses as to good values
+ */
+#define SHORT_SLEEP 1e-8
+#define LONG_SLEEP 1e-3
+#define QUOTA 1e-4
+
+/**
+ * @brief Service the queues
+ * This is not declared static so that it can be unit tested - i know i know...
+ */
+f64
+fib_walk_process_queues (vlib_main_t * vm,
+ const f64 quota)
+{
+ fib_walk_priority_t prio;
+ fib_walk_advance_rc_t rc;
+ fib_node_index_t fwi;
+ fib_walk_t *fwalk;
+
+ f64 sleep_time, start_time; // , vector_rate;
+
+ start_time = vlib_time_now(vm);
+
+ FOR_EACH_FIB_WALK_PRIORITY(prio)
+ {
+ while (0 != fib_walk_queue_get_size(prio))
+ {
+ fwi = fib_walk_queue_get_front(prio);
+
+ /*
+ * set this walk as executing
+ */
+ fwalk = fib_walk_get(fwi);
+ fwalk->fw_flags |= FIB_WALK_FLAG_EXECUTING;
+
+ do
+ {
+ rc = fib_walk_advance(fwi);
+ } while (((vlib_time_now(vm) - start_time) < quota) &&
+ (FIB_WALK_ADVANCE_MORE == rc));
+
+ /*
+ * if this walk has no more work then pop it from the queue
+ * and move on to the next.
+ */
+ if (FIB_WALK_ADVANCE_MORE != rc)
+ {
+ fwalk = fib_walk_get(fwi);
+ fib_walk_destroy(fwalk);
+ fib_walk_queues.fwqs_queues[prio].fwq_stats[FIB_WALK_COMPLETED]++;
+ }
+ else
+ {
+ /*
+ * passed our work quota. sleep time.
+ */
+ fwalk = fib_walk_get(fwi);
+ fwalk->fw_flags &= ~FIB_WALK_FLAG_EXECUTING;
+ sleep_time = SHORT_SLEEP;
+ goto that_will_do_for_now;
+ }
+ }
+ }
+ /*
+ * got to the end of all the work
+ */
+ sleep_time = LONG_SLEEP;
+
+that_will_do_for_now:
+ return (sleep_time);
+}
+
+/**
+ * @brief The 'fib-walk' process's main loop.
+ */
+static uword
+fib_walk_process (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * f)
+{
+ f64 sleep_time;
+
+ sleep_time = SHORT_SLEEP;
+
+ while (1)
+ {
+ vlib_process_wait_for_event_or_clock(vm, sleep_time);
+
+ /*
+ * there may be lots of event queued between the processes,
+ * but the walks we want to schedule are in the priority queues,
+ * so we ignore the process events.
+ */
+ vlib_process_get_events(vm, NULL);
+
+ sleep_time = fib_walk_process_queues(vm, QUOTA);
+ }
+
+ /*
+ * Unreached
+ */
+ ASSERT(!"WTF");
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (fib_walk_process_node,static) = {
+ .function = fib_walk_process,
+ .type = VLIB_NODE_TYPE_PROCESS,
+ .name = "fib-walk",
+};
+/* *INDENT-ON* */
+
+/**
+ * @brief Allocate a new walk object
+ */
+static fib_walk_t *
+fib_walk_alloc (fib_node_type_t parent_type,
+ fib_node_index_t parent_index,
+ fib_walk_flags_t flags,
+ fib_node_back_walk_ctx_t *ctx)
+{
+ fib_walk_t *fwalk;
+
+ pool_get(fib_walk_pool, fwalk);
+
+ fib_node_init(&fwalk->fw_node, FIB_NODE_TYPE_WALK);
+
+ fwalk->fw_flags = flags;
+ fwalk->fw_dep_sibling = FIB_NODE_INDEX_INVALID;
+ fwalk->fw_prio_sibling = FIB_NODE_INDEX_INVALID;
+ fwalk->fw_parent.fnp_index = parent_index;
+ fwalk->fw_parent.fnp_type = parent_type;
+ fwalk->fw_ctx = NULL;
+
+ /*
+ * make a copy of the backwalk context so the depth count remains
+ * the same for each sibling visitsed. This is important in the case
+ * where a parents has a loop via one child, but all the others are not.
+ * if the looped child were visited first, the depth count would exceed, the
+ * max and the walk would terminate before it reached the other siblings.
+ */
+ vec_add1(fwalk->fw_ctx, *ctx);
+
+ return (fwalk);
+}
+
+/**
+ * @brief Enqueue a walk onto the appropriate priority queue. Then signal
+ * the background process there is work to do.
+ */
+static index_t
+fib_walk_prio_queue_enquue (fib_walk_priority_t prio,
+ fib_walk_t *fwalk)
+{
+ index_t sibling;
+
+ sibling = fib_node_list_push_front(fib_walk_queues.fwqs_queues[prio].fwq_queue,
+ 0,
+ FIB_NODE_TYPE_WALK,
+ fib_walk_get_index(fwalk));
+ fib_walk_queues.fwqs_queues[prio].fwq_stats[FIB_WALK_SCHEDULED]++;
+
+ /*
+ * poke the fib-walk process to perform the async walk.
+ * we are not passing it specific data, hence the last two args,
+ * the process will drain the queues
+ */
+ vlib_process_signal_event(vlib_get_main(),
+ fib_walk_process_node.index,
+ FIB_WALK_EVENT,
+ FIB_WALK_EVENT);
+
+ return (sibling);
+}
+
+void
+fib_walk_async (fib_node_type_t parent_type,
+ fib_node_index_t parent_index,
+ fib_walk_priority_t prio,
+ fib_node_back_walk_ctx_t *ctx)
+{
+ fib_walk_t *fwalk;
+
+ if (FIB_NODE_GRAPH_MAX_DEPTH < ++ctx->fnbw_depth)
+ {
+ /*
+ * The walk has reached the maximum depth. there is a loop in the graph.
+ * bail.
+ */
+ return;
+ }
+
+ fwalk = fib_walk_alloc(parent_type,
+ parent_index,
+ FIB_WALK_FLAG_ASYNC,
+ ctx);
+
+ fwalk->fw_dep_sibling = fib_node_child_add(parent_type,
+ parent_index,
+ FIB_NODE_TYPE_WALK,
+ fib_walk_get_index(fwalk));
+
+ fwalk->fw_prio_sibling = fib_walk_prio_queue_enquue(prio, fwalk);
+}
+
+/**
+ * @brief Back walk all the children of a FIB node.
+ *
+ * note this is a synchronous depth first walk. Children visited may propagate
+ * the walk to thier children. Other children node types may not propagate,
+ * synchronously but instead queue the walk for later async completion.
+ */
+void
+fib_walk_sync (fib_node_type_t parent_type,
+ fib_node_index_t parent_index,
+ fib_node_back_walk_ctx_t *ctx)
+{
+ fib_walk_advance_rc_t rc;
+ fib_node_index_t fwi;
+ fib_walk_t *fwalk;
+
+ if (FIB_NODE_GRAPH_MAX_DEPTH < ++ctx->fnbw_depth)
+ {
+ /*
+ * The walk has reached the maximum depth. there is a loop in the graph.
+ * bail.
+ */
+ return;
+ }
+
+ fwalk = fib_walk_alloc(parent_type,
+ parent_index,
+ FIB_WALK_FLAG_SYNC,
+ ctx);
+
+ fwalk->fw_dep_sibling = fib_node_child_add(parent_type,
+ parent_index,
+ FIB_NODE_TYPE_WALK,
+ fib_walk_get_index(fwalk));
+ fwi = fib_walk_get_index(fwalk);
+
+ while (1)
+ {
+ /*
+ * set this walk as executing
+ */
+ fwalk->fw_flags |= FIB_WALK_FLAG_EXECUTING;
+
+ do
+ {
+ rc = fib_walk_advance(fwi);
+ } while (FIB_WALK_ADVANCE_MORE == rc);
+
+
+ /*
+ * this walk function is re-entrant - walks can spawn walks.
+ * fib_walk_t objects come from a pool, so they can realloc. we need
+ * to re-fetch from said pool at the appropriate times.
+ */
+ fwalk = fib_walk_get(fwi);
+
+ if (FIB_WALK_ADVANCE_MERGE == rc)
+ {
+ /*
+ * this sync walk merged with an walk in front.
+ * by reqeusting a sync walk the client wanted all children walked,
+ * so we ditch the walk object in hand and continue with the one
+ * we merged into
+ */
+ fib_node_ptr_t merged_walk;
+
+ fib_node_list_elt_get_next(fwalk->fw_dep_sibling, &merged_walk);
+
+ ASSERT(FIB_NODE_INDEX_INVALID != merged_walk.fnp_index);
+ ASSERT(FIB_NODE_TYPE_WALK == merged_walk.fnp_type);
+
+ fib_walk_destroy(fwalk);
+
+ fwi = merged_walk.fnp_index;
+ fwalk = fib_walk_get(fwi);
+
+ if (FIB_WALK_FLAG_EXECUTING & fwalk->fw_flags)
+ {
+ /*
+ * we are executing a sync walk, and we have met with another
+ * walk that is also executing. since only one walk executs at once
+ * (there is no multi-threading) this implies we have met ourselves
+ * and hence the is a loop in the graph.
+ * This function is re-entrant, so the walk object we met is being
+ * acted on in a stack frame below this one. We must therefore not
+ * continue with it now, but let the stack unwind and along the
+ * appropriate frame to read the depth count and bail.
+ */
+ fwalk = NULL;
+ break;
+ }
+ }
+ else
+ {
+ /*
+ * the walk reached the end of the depdency list.
+ */
+ break;
+ }
+ }
+
+ if (NULL != fwalk)
+ {
+ fib_walk_destroy(fwalk);
+ }
+}
+
+static fib_node_t *
+fib_walk_get_node (fib_node_index_t index)
+{
+ fib_walk_t *fwalk;
+
+ fwalk = fib_walk_get(index);
+
+ return (&(fwalk->fw_node));
+}
+
+/**
+ * Walk objects are not parents, nor are they locked.
+ * are no-ops
+ */
+static void
+fib_walk_last_lock_gone (fib_node_t *node)
+{
+ ASSERT(0);
+}
+
+static fib_walk_t*
+fib_walk_get_from_node (fib_node_t *node)
+{
+ return ((fib_walk_t*)(((char*)node) -
+ STRUCT_OFFSET_OF(fib_walk_t, fw_node)));
+}
+
+/**
+ * @brief Another back walk has reach this walk.
+ * Megre them so there is only one left. It is this node being
+ * visited that will remain, so copy or merge the context onto it.
+ */
+static fib_node_back_walk_rc_t
+fib_walk_back_walk_notify (fib_node_t *node,
+ fib_node_back_walk_ctx_t *ctx)
+{
+ fib_node_back_walk_ctx_t *old;
+ fib_walk_t *fwalk;
+
+ fwalk = fib_walk_get_from_node(node);
+
+ /*
+ * check whether the walk context can be merge with another,
+ * or whether it needs to be appended.
+ */
+ vec_foreach(old, fwalk->fw_ctx)
+ {
+ /*
+ * we can merge walks if the reason for the walk is the same.
+ */
+ if (old->fnbw_reason == ctx->fnbw_reason)
+ {
+ /*
+ * copy the largest of the depth values. in the presence of a loop,
+ * the same walk will merge with itself. if we take the smaller depth
+ * then it will never end.
+ */
+ old->fnbw_depth = ((old->fnbw_depth >= ctx->fnbw_depth) ?
+ old->fnbw_depth :
+ ctx->fnbw_depth);
+ goto out;
+ }
+ }
+
+ /*
+ * walks could not be merged, this means that the walk infront needs to
+ * perform different action to this one that has caught up. the one in front
+ * was scheduled first so append the new walk context to the back of the list.
+ */
+ vec_add1(fwalk->fw_ctx, *ctx);
+
+out:
+ return (FIB_NODE_BACK_WALK_MERGE);
+}
+
+/**
+ * The FIB walk's graph node virtual function table
+ */
+static const fib_node_vft_t fib_walk_vft = {
+ .fnv_get = fib_walk_get_node,
+ .fnv_last_lock = fib_walk_last_lock_gone,
+ .fnv_back_walk = fib_walk_back_walk_notify,
+};
+
+void
+fib_walk_module_init (void)
+{
+ fib_walk_priority_t prio;
+
+ FOR_EACH_FIB_WALK_PRIORITY(prio)
+ {
+ fib_walk_queues.fwqs_queues[prio].fwq_queue = fib_node_list_create();
+ }
+
+ fib_node_register_type(FIB_NODE_TYPE_WALK, &fib_walk_vft);
+}
+
+static u8*
+format_fib_walk (u8* s, va_list ap)
+{
+ fib_node_index_t fwi = va_arg(ap, fib_node_index_t);
+ fib_walk_t *fwalk;
+
+ fwalk = fib_walk_get(fwi);
+
+ return (format(s, " parent:{%s:%d} visits:%d flags:%d",
+ fib_node_type_get_name(fwalk->fw_parent.fnp_type),
+ fwalk->fw_parent.fnp_index,
+ fwalk->fw_n_visits,
+ fwalk->fw_flags));
+}
+
+static clib_error_t *
+fib_walk_show (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ fib_walk_queue_stats_t wqs;
+ fib_walk_priority_t prio;
+ fib_node_ptr_t sibling;
+ fib_node_index_t fwi;
+ fib_walk_t *fwalk;
+ int more_elts;
+
+ vlib_cli_output(vm, "FIB Walk queues:");
+
+ FOR_EACH_FIB_WALK_PRIORITY(prio)
+ {
+ vlib_cli_output(vm, " %U priority queue:",
+ format_fib_walk_priority, prio);
+ vlib_cli_output(vm, " Stats: ");
+
+ FOR_EACH_FIB_WALK_QUEUE_STATS(wqs)
+ {
+ vlib_cli_output(vm, " %U:%d",
+ format_fib_walk_queue_stats, wqs,
+ fib_walk_queues.fwqs_queues[prio].fwq_stats[wqs]);
+ }
+ vlib_cli_output(vm, " Occupancy:%d",
+ fib_node_list_get_size(
+ fib_walk_queues.fwqs_queues[prio].fwq_queue));
+
+ more_elts = fib_node_list_get_front(
+ fib_walk_queues.fwqs_queues[prio].fwq_queue,
+ &sibling);
+
+ while (more_elts)
+ {
+ ASSERT(FIB_NODE_INDEX_INVALID != sibling.fnp_index);
+ ASSERT(FIB_NODE_TYPE_WALK == sibling.fnp_type);
+
+ fwi = sibling.fnp_index;
+ fwalk = fib_walk_get(fwi);
+
+ vlib_cli_output(vm, " %U", format_fib_walk, fwi);
+
+ more_elts = fib_node_list_elt_get_next(fwalk->fw_prio_sibling,
+ &sibling);
+ }
+ }
+ return (NULL);
+}
+
+VLIB_CLI_COMMAND (fib_walk_show_command, static) = {
+ .path = "show fib walk",
+ .short_help = "show fib walk",
+ .function = fib_walk_show,
+};