aboutsummaryrefslogtreecommitdiffstats
path: root/src/vlib/threads.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/vlib/threads.c')
-rw-r--r--src/vlib/threads.c313
1 files changed, 303 insertions, 10 deletions
diff --git a/src/vlib/threads.c b/src/vlib/threads.c
index 6cd325b3..2d9ce84a 100644
--- a/src/vlib/threads.c
+++ b/src/vlib/threads.c
@@ -35,6 +35,222 @@ vl (void *p)
vlib_worker_thread_t *vlib_worker_threads;
vlib_thread_main_t vlib_thread_main;
+/*
+ * Barrier tracing can be enabled on a normal build to collect information
+ * on barrier use, including timings and call stacks. Deliberately not
+ * keyed off CLIB_DEBUG, because that can add significant overhead which
+ * imapacts observed timings.
+ */
+
+#ifdef BARRIER_TRACING
+ /*
+ * Output of barrier tracing can be to syslog or elog as suits
+ */
+#ifdef BARRIER_TRACING_ELOG
+static u32
+elog_id_for_msg_name (const char *msg_name)
+{
+ uword *p, r;
+ static uword *h;
+ u8 *name_copy;
+
+ if (!h)
+ h = hash_create_string (0, sizeof (uword));
+
+ p = hash_get_mem (h, msg_name);
+ if (p)
+ return p[0];
+ r = elog_string (&vlib_global_main.elog_main, "%s", msg_name);
+
+ name_copy = format (0, "%s%c", msg_name, 0);
+
+ hash_set_mem (h, name_copy, r);
+
+ return r;
+}
+
+ /*
+ * elog Barrier trace functions, which are nulled out if BARRIER_TRACING isn't
+ * defined
+ */
+
+static inline void
+barrier_trace_sync (f64 t_entry, f64 t_open, f64 t_closed)
+{
+ /* *INDENT-OFF* */
+ ELOG_TYPE_DECLARE (e) =
+ {
+ .format = "barrier <%d#%s(O:%dus:%dus)(%dus)",
+ .format_args = "i4T4i4i4i4",
+ };
+ /* *INDENT-ON* */
+ struct
+ {
+ u32 count, caller, t_entry, t_open, t_closed;
+ } *ed = 0;
+
+ ed = ELOG_DATA (&vlib_global_main.elog_main, e);
+ ed->count = (int) vlib_worker_threads[0].barrier_sync_count;
+ ed->caller = elog_id_for_msg_name (vlib_worker_threads[0].barrier_caller);
+ ed->t_entry = (int) (1000000.0 * t_entry);
+ ed->t_open = (int) (1000000.0 * t_open);
+ ed->t_closed = (int) (1000000.0 * t_closed);
+}
+
+static inline void
+barrier_trace_sync_rec (f64 t_entry)
+{
+ /* *INDENT-OFF* */
+ ELOG_TYPE_DECLARE (e) =
+ {
+ .format = "barrier <%d(%dus)%s",
+ .format_args = "i4i4T4",
+ };
+ /* *INDENT-ON* */
+ struct
+ {
+ u32 depth, t_entry, caller;
+ } *ed = 0;
+
+ ed = ELOG_DATA (&vlib_global_main.elog_main, e);
+ ed->depth = (int) vlib_worker_threads[0].recursion_level - 1;
+ ed->t_entry = (int) (1000000.0 * t_entry);
+ ed->caller = elog_id_for_msg_name (vlib_worker_threads[0].barrier_caller);
+}
+
+static inline void
+barrier_trace_release_rec (f64 t_entry)
+{
+ /* *INDENT-OFF* */
+ ELOG_TYPE_DECLARE (e) =
+ {
+ .format = "barrier (%dus)%d>",
+ .format_args = "i4i4",
+ };
+ /* *INDENT-ON* */
+ struct
+ {
+ u32 t_entry, depth;
+ } *ed = 0;
+
+ ed = ELOG_DATA (&vlib_global_main.elog_main, e);
+ ed->t_entry = (int) (1000000.0 * t_entry);
+ ed->depth = (int) vlib_worker_threads[0].recursion_level;
+}
+
+static inline void
+barrier_trace_release (f64 t_entry, f64 t_closed_total, f64 t_update_main)
+{
+ /* *INDENT-OFF* */
+ ELOG_TYPE_DECLARE (e) =
+ {
+ .format = "barrier (%dus){%d}(C:%dus)#%d>",
+ .format_args = "i4i4i4i4",
+ };
+ /* *INDENT-ON* */
+ struct
+ {
+ u32 t_entry, t_update_main, t_closed_total, count;
+ } *ed = 0;
+
+ ed = ELOG_DATA (&vlib_global_main.elog_main, e);
+ ed->t_entry = (int) (1000000.0 * t_entry);
+ ed->t_update_main = (int) (1000000.0 * t_update_main);
+ ed->t_closed_total = (int) (1000000.0 * t_closed_total);
+ ed->count = (int) vlib_worker_threads[0].barrier_sync_count;
+
+ /* Reset context for next trace */
+ vlib_worker_threads[0].barrier_context = NULL;
+}
+#else
+char barrier_trace[65536];
+char *btp = barrier_trace;
+
+ /*
+ * syslog Barrier trace functions, which are nulled out if BARRIER_TRACING
+ * isn't defined
+ */
+
+
+static inline void
+barrier_trace_sync (f64 t_entry, f64 t_open, f64 t_closed)
+{
+ btp += sprintf (btp, "<%u#%s",
+ (unsigned int) vlib_worker_threads[0].barrier_sync_count,
+ vlib_worker_threads[0].barrier_caller);
+
+ if (vlib_worker_threads[0].barrier_context)
+ {
+ btp += sprintf (btp, "[%s]", vlib_worker_threads[0].barrier_context);
+
+ }
+
+ btp += sprintf (btp, "(O:%dus:%dus)(%dus):",
+ (int) (1000000.0 * t_entry),
+ (int) (1000000.0 * t_open), (int) (1000000.0 * t_closed));
+
+}
+
+static inline void
+barrier_trace_sync_rec (f64 t_entry)
+{
+ btp += sprintf (btp, "<%u(%dus)%s:",
+ (int) vlib_worker_threads[0].recursion_level - 1,
+ (int) (1000000.0 * t_entry),
+ vlib_worker_threads[0].barrier_caller);
+}
+
+static inline void
+barrier_trace_release_rec (f64 t_entry)
+{
+ btp += sprintf (btp, ":(%dus)%u>", (int) (1000000.0 * t_entry),
+ (int) vlib_worker_threads[0].recursion_level);
+}
+
+static inline void
+barrier_trace_release (f64 t_entry, f64 t_closed_total, f64 t_update_main)
+{
+
+ btp += sprintf (btp, ":(%dus)", (int) (1000000.0 * t_entry));
+ if (t_update_main > 0)
+ {
+ btp += sprintf (btp, "{%dus}", (int) (1000000.0 * t_update_main));
+ }
+
+ btp += sprintf (btp, "(C:%dus)#%u>",
+ (int) (1000000.0 * t_closed_total),
+ (int) vlib_worker_threads[0].barrier_sync_count);
+
+ /* Dump buffer to syslog, and reset for next trace */
+ fformat (stderr, "BTRC %s\n", barrier_trace);
+ btp = barrier_trace;
+ vlib_worker_threads[0].barrier_context = NULL;
+}
+#endif
+#else
+
+ /* Null functions for default case where barrier tracing isn't used */
+static inline void
+barrier_trace_sync (f64 t_entry, f64 t_open, f64 t_closed)
+{
+}
+
+static inline void
+barrier_trace_sync_rec (f64 t_entry)
+{
+}
+
+static inline void
+barrier_trace_release_rec (f64 t_entry)
+{
+}
+
+static inline void
+barrier_trace_release (f64 t_entry, f64 t_closed_total, f64 t_update_main)
+{
+}
+#endif
+
uword
os_get_nthreads (void)
{
@@ -558,6 +774,10 @@ start_workers (vlib_main_t * vm)
*vlib_worker_threads->node_reforks_required = 0;
vm->need_vlib_worker_thread_node_runtime_update = 0;
+ /* init timing */
+ vm->barrier_epoch = 0;
+ vm->barrier_no_close_before = 0;
+
worker_thread_index = 1;
for (i = 0; i < vec_len (tm->registrations); i++)
@@ -790,6 +1010,7 @@ start_workers (vlib_main_t * vm)
VLIB_MAIN_LOOP_ENTER_FUNCTION (start_workers);
+
static inline void
worker_thread_node_runtime_update_internal (void)
{
@@ -993,7 +1214,6 @@ vlib_worker_thread_node_refork (void)
nm_clone->processes = vec_dup (nm->processes);
}
-
void
vlib_worker_thread_node_runtime_update (void)
{
@@ -1192,10 +1412,29 @@ vlib_worker_thread_fork_fixup (vlib_fork_fixup_t which)
vlib_worker_thread_barrier_release (vm);
}
+ /*
+ * Enforce minimum open time to minimize packet loss due to Rx overflow,
+ * based on a test based heuristic that barrier should be open for at least
+ * 3 time as long as it is closed (with an upper bound of 1ms because by that
+ * point it is probably too late to make a difference)
+ */
+
+#ifndef BARRIER_MINIMUM_OPEN_LIMIT
+#define BARRIER_MINIMUM_OPEN_LIMIT 0.001
+#endif
+
+#ifndef BARRIER_MINIMUM_OPEN_FACTOR
+#define BARRIER_MINIMUM_OPEN_FACTOR 3
+#endif
+
void
-vlib_worker_thread_barrier_sync (vlib_main_t * vm)
+vlib_worker_thread_barrier_sync_int (vlib_main_t * vm)
{
f64 deadline;
+ f64 now;
+ f64 t_entry;
+ f64 t_open;
+ f64 t_closed;
u32 count;
if (vec_len (vlib_mains) < 2)
@@ -1205,29 +1444,55 @@ vlib_worker_thread_barrier_sync (vlib_main_t * vm)
count = vec_len (vlib_mains) - 1;
+ /* Record entry relative to last close */
+ now = vlib_time_now (vm);
+ t_entry = now - vm->barrier_epoch;
+
/* Tolerate recursive calls */
if (++vlib_worker_threads[0].recursion_level > 1)
- return;
+ {
+ barrier_trace_sync_rec (t_entry);
+ return;
+ }
vlib_worker_threads[0].barrier_sync_count++;
- deadline = vlib_time_now (vm) + BARRIER_SYNC_TIMEOUT;
+ /* Enforce minimum barrier open time to minimize packet loss */
+ ASSERT (vm->barrier_no_close_before <= (now + BARRIER_MINIMUM_OPEN_LIMIT));
+ while ((now = vlib_time_now (vm)) < vm->barrier_no_close_before)
+ ;
+
+ /* Record time of closure */
+ t_open = now - vm->barrier_epoch;
+ vm->barrier_epoch = now;
+
+ deadline = now + BARRIER_SYNC_TIMEOUT;
*vlib_worker_threads->wait_at_barrier = 1;
while (*vlib_worker_threads->workers_at_barrier != count)
{
- if (vlib_time_now (vm) > deadline)
+ if ((now = vlib_time_now (vm)) > deadline)
{
fformat (stderr, "%s: worker thread deadlock\n", __FUNCTION__);
os_panic ();
}
}
+
+ t_closed = now - vm->barrier_epoch;
+
+ barrier_trace_sync (t_entry, t_open, t_closed);
+
}
void
vlib_worker_thread_barrier_release (vlib_main_t * vm)
{
f64 deadline;
+ f64 now;
+ f64 minimum_open;
+ f64 t_entry;
+ f64 t_closed_total;
+ f64 t_update_main = 0.0;
int refork_needed = 0;
if (vec_len (vlib_mains) < 2)
@@ -1235,8 +1500,15 @@ vlib_worker_thread_barrier_release (vlib_main_t * vm)
ASSERT (vlib_get_thread_index () == 0);
+
+ now = vlib_time_now (vm);
+ t_entry = now - vm->barrier_epoch;
+
if (--vlib_worker_threads[0].recursion_level > 0)
- return;
+ {
+ barrier_trace_release_rec (t_entry);
+ return;
+ }
/* Update (all) node runtimes before releasing the barrier, if needed */
if (vm->need_vlib_worker_thread_node_runtime_update)
@@ -1249,15 +1521,17 @@ vlib_worker_thread_barrier_release (vlib_main_t * vm)
refork_needed = 1;
clib_smp_atomic_add (vlib_worker_threads->node_reforks_required,
(vec_len (vlib_mains) - 1));
+ now = vlib_time_now (vm);
+ t_update_main = now - vm->barrier_epoch;
}
- deadline = vlib_time_now (vm) + BARRIER_SYNC_TIMEOUT;
+ deadline = now + BARRIER_SYNC_TIMEOUT;
*vlib_worker_threads->wait_at_barrier = 0;
while (*vlib_worker_threads->workers_at_barrier > 0)
{
- if (vlib_time_now (vm) > deadline)
+ if ((now = vlib_time_now (vm)) > deadline)
{
fformat (stderr, "%s: worker thread deadlock\n", __FUNCTION__);
os_panic ();
@@ -1267,11 +1541,13 @@ vlib_worker_thread_barrier_release (vlib_main_t * vm)
/* Wait for reforks before continuing */
if (refork_needed)
{
- deadline = vlib_time_now (vm) + BARRIER_SYNC_TIMEOUT;
+ now = vlib_time_now (vm);
+
+ deadline = now + BARRIER_SYNC_TIMEOUT;
while (*vlib_worker_threads->node_reforks_required > 0)
{
- if (vlib_time_now (vm) > deadline)
+ if ((now = vlib_time_now (vm)) > deadline)
{
fformat (stderr, "%s: worker thread refork deadlock\n",
__FUNCTION__);
@@ -1279,6 +1555,23 @@ vlib_worker_thread_barrier_release (vlib_main_t * vm)
}
}
}
+
+ t_closed_total = now - vm->barrier_epoch;
+
+ minimum_open = t_closed_total * BARRIER_MINIMUM_OPEN_FACTOR;
+
+ if (minimum_open > BARRIER_MINIMUM_OPEN_LIMIT)
+ {
+ minimum_open = BARRIER_MINIMUM_OPEN_LIMIT;
+ }
+
+ vm->barrier_no_close_before = now + minimum_open;
+
+ /* Record barrier epoch (used to enforce minimum open time) */
+ vm->barrier_epoch = now;
+
+ barrier_trace_release (t_entry, t_closed_total, t_update_main);
+
}
/*