aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/vlib/main.h9
-rw-r--r--src/vlib/threads.c313
-rw-r--r--src/vlib/threads.h29
-rw-r--r--src/vlibapi/api_common.h6
-rw-r--r--src/vlibapi/api_shared.c10
-rw-r--r--src/vlibmemory/memory_vlib.c5
-rw-r--r--src/vpp/vnet/main.c8
7 files changed, 367 insertions, 13 deletions
diff --git a/src/vlib/main.h b/src/vlib/main.h
index 4c0cde3ff45..fb67334e4e2 100644
--- a/src/vlib/main.h
+++ b/src/vlib/main.h
@@ -199,6 +199,15 @@ typedef struct vlib_main_t
*/
int need_vlib_worker_thread_node_runtime_update;
+ /*
+ * Barrier epoch - Set to current time, each time barrier_sync or
+ * barrier_release is called with zero recursion.
+ */
+ f64 barrier_epoch;
+
+ /* Earliest barrier can be closed again */
+ f64 barrier_no_close_before;
+
} vlib_main_t;
/* Global main structure. */
diff --git a/src/vlib/threads.c b/src/vlib/threads.c
index 6cd325b3e2c..2d9ce84a7cb 100644
--- a/src/vlib/threads.c
+++ b/src/vlib/threads.c
@@ -35,6 +35,222 @@ vl (void *p)
vlib_worker_thread_t *vlib_worker_threads;
vlib_thread_main_t vlib_thread_main;
+/*
+ * Barrier tracing can be enabled on a normal build to collect information
+ * on barrier use, including timings and call stacks. Deliberately not
+ * keyed off CLIB_DEBUG, because that can add significant overhead which
+ * imapacts observed timings.
+ */
+
+#ifdef BARRIER_TRACING
+ /*
+ * Output of barrier tracing can be to syslog or elog as suits
+ */
+#ifdef BARRIER_TRACING_ELOG
+static u32
+elog_id_for_msg_name (const char *msg_name)
+{
+ uword *p, r;
+ static uword *h;
+ u8 *name_copy;
+
+ if (!h)
+ h = hash_create_string (0, sizeof (uword));
+
+ p = hash_get_mem (h, msg_name);
+ if (p)
+ return p[0];
+ r = elog_string (&vlib_global_main.elog_main, "%s", msg_name);
+
+ name_copy = format (0, "%s%c", msg_name, 0);
+
+ hash_set_mem (h, name_copy, r);
+
+ return r;
+}
+
+ /*
+ * elog Barrier trace functions, which are nulled out if BARRIER_TRACING isn't
+ * defined
+ */
+
+static inline void
+barrier_trace_sync (f64 t_entry, f64 t_open, f64 t_closed)
+{
+ /* *INDENT-OFF* */
+ ELOG_TYPE_DECLARE (e) =
+ {
+ .format = "barrier <%d#%s(O:%dus:%dus)(%dus)",
+ .format_args = "i4T4i4i4i4",
+ };
+ /* *INDENT-ON* */
+ struct
+ {
+ u32 count, caller, t_entry, t_open, t_closed;
+ } *ed = 0;
+
+ ed = ELOG_DATA (&vlib_global_main.elog_main, e);
+ ed->count = (int) vlib_worker_threads[0].barrier_sync_count;
+ ed->caller = elog_id_for_msg_name (vlib_worker_threads[0].barrier_caller);
+ ed->t_entry = (int) (1000000.0 * t_entry);
+ ed->t_open = (int) (1000000.0 * t_open);
+ ed->t_closed = (int) (1000000.0 * t_closed);
+}
+
+static inline void
+barrier_trace_sync_rec (f64 t_entry)
+{
+ /* *INDENT-OFF* */
+ ELOG_TYPE_DECLARE (e) =
+ {
+ .format = "barrier <%d(%dus)%s",
+ .format_args = "i4i4T4",
+ };
+ /* *INDENT-ON* */
+ struct
+ {
+ u32 depth, t_entry, caller;
+ } *ed = 0;
+
+ ed = ELOG_DATA (&vlib_global_main.elog_main, e);
+ ed->depth = (int) vlib_worker_threads[0].recursion_level - 1;
+ ed->t_entry = (int) (1000000.0 * t_entry);
+ ed->caller = elog_id_for_msg_name (vlib_worker_threads[0].barrier_caller);
+}
+
+static inline void
+barrier_trace_release_rec (f64 t_entry)
+{
+ /* *INDENT-OFF* */
+ ELOG_TYPE_DECLARE (e) =
+ {
+ .format = "barrier (%dus)%d>",
+ .format_args = "i4i4",
+ };
+ /* *INDENT-ON* */
+ struct
+ {
+ u32 t_entry, depth;
+ } *ed = 0;
+
+ ed = ELOG_DATA (&vlib_global_main.elog_main, e);
+ ed->t_entry = (int) (1000000.0 * t_entry);
+ ed->depth = (int) vlib_worker_threads[0].recursion_level;
+}
+
+static inline void
+barrier_trace_release (f64 t_entry, f64 t_closed_total, f64 t_update_main)
+{
+ /* *INDENT-OFF* */
+ ELOG_TYPE_DECLARE (e) =
+ {
+ .format = "barrier (%dus){%d}(C:%dus)#%d>",
+ .format_args = "i4i4i4i4",
+ };
+ /* *INDENT-ON* */
+ struct
+ {
+ u32 t_entry, t_update_main, t_closed_total, count;
+ } *ed = 0;
+
+ ed = ELOG_DATA (&vlib_global_main.elog_main, e);
+ ed->t_entry = (int) (1000000.0 * t_entry);
+ ed->t_update_main = (int) (1000000.0 * t_update_main);
+ ed->t_closed_total = (int) (1000000.0 * t_closed_total);
+ ed->count = (int) vlib_worker_threads[0].barrier_sync_count;
+
+ /* Reset context for next trace */
+ vlib_worker_threads[0].barrier_context = NULL;
+}
+#else
+char barrier_trace[65536];
+char *btp = barrier_trace;
+
+ /*
+ * syslog Barrier trace functions, which are nulled out if BARRIER_TRACING
+ * isn't defined
+ */
+
+
+static inline void
+barrier_trace_sync (f64 t_entry, f64 t_open, f64 t_closed)
+{
+ btp += sprintf (btp, "<%u#%s",
+ (unsigned int) vlib_worker_threads[0].barrier_sync_count,
+ vlib_worker_threads[0].barrier_caller);
+
+ if (vlib_worker_threads[0].barrier_context)
+ {
+ btp += sprintf (btp, "[%s]", vlib_worker_threads[0].barrier_context);
+
+ }
+
+ btp += sprintf (btp, "(O:%dus:%dus)(%dus):",
+ (int) (1000000.0 * t_entry),
+ (int) (1000000.0 * t_open), (int) (1000000.0 * t_closed));
+
+}
+
+static inline void
+barrier_trace_sync_rec (f64 t_entry)
+{
+ btp += sprintf (btp, "<%u(%dus)%s:",
+ (int) vlib_worker_threads[0].recursion_level - 1,
+ (int) (1000000.0 * t_entry),
+ vlib_worker_threads[0].barrier_caller);
+}
+
+static inline void
+barrier_trace_release_rec (f64 t_entry)
+{
+ btp += sprintf (btp, ":(%dus)%u>", (int) (1000000.0 * t_entry),
+ (int) vlib_worker_threads[0].recursion_level);
+}
+
+static inline void
+barrier_trace_release (f64 t_entry, f64 t_closed_total, f64 t_update_main)
+{
+
+ btp += sprintf (btp, ":(%dus)", (int) (1000000.0 * t_entry));
+ if (t_update_main > 0)
+ {
+ btp += sprintf (btp, "{%dus}", (int) (1000000.0 * t_update_main));
+ }
+
+ btp += sprintf (btp, "(C:%dus)#%u>",
+ (int) (1000000.0 * t_closed_total),
+ (int) vlib_worker_threads[0].barrier_sync_count);
+
+ /* Dump buffer to syslog, and reset for next trace */
+ fformat (stderr, "BTRC %s\n", barrier_trace);
+ btp = barrier_trace;
+ vlib_worker_threads[0].barrier_context = NULL;
+}
+#endif
+#else
+
+ /* Null functions for default case where barrier tracing isn't used */
+static inline void
+barrier_trace_sync (f64 t_entry, f64 t_open, f64 t_closed)
+{
+}
+
+static inline void
+barrier_trace_sync_rec (f64 t_entry)
+{
+}
+
+static inline void
+barrier_trace_release_rec (f64 t_entry)
+{
+}
+
+static inline void
+barrier_trace_release (f64 t_entry, f64 t_closed_total, f64 t_update_main)
+{
+}
+#endif
+
uword
os_get_nthreads (void)
{
@@ -558,6 +774,10 @@ start_workers (vlib_main_t * vm)
*vlib_worker_threads->node_reforks_required = 0;
vm->need_vlib_worker_thread_node_runtime_update = 0;
+ /* init timing */
+ vm->barrier_epoch = 0;
+ vm->barrier_no_close_before = 0;
+
worker_thread_index = 1;
for (i = 0; i < vec_len (tm->registrations); i++)
@@ -790,6 +1010,7 @@ start_workers (vlib_main_t * vm)
VLIB_MAIN_LOOP_ENTER_FUNCTION (start_workers);
+
static inline void
worker_thread_node_runtime_update_internal (void)
{
@@ -993,7 +1214,6 @@ vlib_worker_thread_node_refork (void)
nm_clone->processes = vec_dup (nm->processes);
}
-
void
vlib_worker_thread_node_runtime_update (void)
{
@@ -1192,10 +1412,29 @@ vlib_worker_thread_fork_fixup (vlib_fork_fixup_t which)
vlib_worker_thread_barrier_release (vm);
}
+ /*
+ * Enforce minimum open time to minimize packet loss due to Rx overflow,
+ * based on a test based heuristic that barrier should be open for at least
+ * 3 time as long as it is closed (with an upper bound of 1ms because by that
+ * point it is probably too late to make a difference)
+ */
+
+#ifndef BARRIER_MINIMUM_OPEN_LIMIT
+#define BARRIER_MINIMUM_OPEN_LIMIT 0.001
+#endif
+
+#ifndef BARRIER_MINIMUM_OPEN_FACTOR
+#define BARRIER_MINIMUM_OPEN_FACTOR 3
+#endif
+
void
-vlib_worker_thread_barrier_sync (vlib_main_t * vm)
+vlib_worker_thread_barrier_sync_int (vlib_main_t * vm)
{
f64 deadline;
+ f64 now;
+ f64 t_entry;
+ f64 t_open;
+ f64 t_closed;
u32 count;
if (vec_len (vlib_mains) < 2)
@@ -1205,29 +1444,55 @@ vlib_worker_thread_barrier_sync (vlib_main_t * vm)
count = vec_len (vlib_mains) - 1;
+ /* Record entry relative to last close */
+ now = vlib_time_now (vm);
+ t_entry = now - vm->barrier_epoch;
+
/* Tolerate recursive calls */
if (++vlib_worker_threads[0].recursion_level > 1)
- return;
+ {
+ barrier_trace_sync_rec (t_entry);
+ return;
+ }
vlib_worker_threads[0].barrier_sync_count++;
- deadline = vlib_time_now (vm) + BARRIER_SYNC_TIMEOUT;
+ /* Enforce minimum barrier open time to minimize packet loss */
+ ASSERT (vm->barrier_no_close_before <= (now + BARRIER_MINIMUM_OPEN_LIMIT));
+ while ((now = vlib_time_now (vm)) < vm->barrier_no_close_before)
+ ;
+
+ /* Record time of closure */
+ t_open = now - vm->barrier_epoch;
+ vm->barrier_epoch = now;
+
+ deadline = now + BARRIER_SYNC_TIMEOUT;
*vlib_worker_threads->wait_at_barrier = 1;
while (*vlib_worker_threads->workers_at_barrier != count)
{
- if (vlib_time_now (vm) > deadline)
+ if ((now = vlib_time_now (vm)) > deadline)
{
fformat (stderr, "%s: worker thread deadlock\n", __FUNCTION__);
os_panic ();
}
}
+
+ t_closed = now - vm->barrier_epoch;
+
+ barrier_trace_sync (t_entry, t_open, t_closed);
+
}
void
vlib_worker_thread_barrier_release (vlib_main_t * vm)
{
f64 deadline;
+ f64 now;
+ f64 minimum_open;
+ f64 t_entry;
+ f64 t_closed_total;
+ f64 t_update_main = 0.0;
int refork_needed = 0;
if (vec_len (vlib_mains) < 2)
@@ -1235,8 +1500,15 @@ vlib_worker_thread_barrier_release (vlib_main_t * vm)
ASSERT (vlib_get_thread_index () == 0);
+
+ now = vlib_time_now (vm);
+ t_entry = now - vm->barrier_epoch;
+
if (--vlib_worker_threads[0].recursion_level > 0)
- return;
+ {
+ barrier_trace_release_rec (t_entry);
+ return;
+ }
/* Update (all) node runtimes before releasing the barrier, if needed */
if (vm->need_vlib_worker_thread_node_runtime_update)
@@ -1249,15 +1521,17 @@ vlib_worker_thread_barrier_release (vlib_main_t * vm)
refork_needed = 1;
clib_smp_atomic_add (vlib_worker_threads->node_reforks_required,
(vec_len (vlib_mains) - 1));
+ now = vlib_time_now (vm);
+ t_update_main = now - vm->barrier_epoch;
}
- deadline = vlib_time_now (vm) + BARRIER_SYNC_TIMEOUT;
+ deadline = now + BARRIER_SYNC_TIMEOUT;
*vlib_worker_threads->wait_at_barrier = 0;
while (*vlib_worker_threads->workers_at_barrier > 0)
{
- if (vlib_time_now (vm) > deadline)
+ if ((now = vlib_time_now (vm)) > deadline)
{
fformat (stderr, "%s: worker thread deadlock\n", __FUNCTION__);
os_panic ();
@@ -1267,11 +1541,13 @@ vlib_worker_thread_barrier_release (vlib_main_t * vm)
/* Wait for reforks before continuing */
if (refork_needed)
{
- deadline = vlib_time_now (vm) + BARRIER_SYNC_TIMEOUT;
+ now = vlib_time_now (vm);
+
+ deadline = now + BARRIER_SYNC_TIMEOUT;
while (*vlib_worker_threads->node_reforks_required > 0)
{
- if (vlib_time_now (vm) > deadline)
+ if ((now = vlib_time_now (vm)) > deadline)
{
fformat (stderr, "%s: worker thread refork deadlock\n",
__FUNCTION__);
@@ -1279,6 +1555,23 @@ vlib_worker_thread_barrier_release (vlib_main_t * vm)
}
}
}
+
+ t_closed_total = now - vm->barrier_epoch;
+
+ minimum_open = t_closed_total * BARRIER_MINIMUM_OPEN_FACTOR;
+
+ if (minimum_open > BARRIER_MINIMUM_OPEN_LIMIT)
+ {
+ minimum_open = BARRIER_MINIMUM_OPEN_LIMIT;
+ }
+
+ vm->barrier_no_close_before = now + minimum_open;
+
+ /* Record barrier epoch (used to enforce minimum open time) */
+ vm->barrier_epoch = now;
+
+ barrier_trace_release (t_entry, t_closed_total, t_update_main);
+
}
/*
diff --git a/src/vlib/threads.h b/src/vlib/threads.h
index c3f1cadee16..72340ee1a37 100644
--- a/src/vlib/threads.h
+++ b/src/vlib/threads.h
@@ -18,6 +18,22 @@
#include <vlib/main.h>
#include <linux/sched.h>
+/*
+ * To enable detailed tracing of barrier usage, including call stacks and
+ * timings, define BARRIER_TRACING here or in relevant TAGS. If also used
+ * with CLIB_DEBUG, timing will _not_ be representative of normal code
+ * execution.
+ *
+ */
+
+// #define BARRIER_TRACING 1
+
+/*
+ * Two options for barrier tracing output: syslog & elog.
+ */
+
+// #define BARRIER_TRACING_ELOG 1
+
extern vlib_main_t **vlib_mains;
void vlib_set_thread_name (char *name);
@@ -102,6 +118,10 @@ typedef struct
vlib_thread_registration_t *registration;
u8 *name;
u64 barrier_sync_count;
+#ifdef BARRIER_TRACING
+ const char *barrier_caller;
+ const char *barrier_context;
+#endif
volatile u32 *node_reforks_required;
long lwp;
@@ -179,7 +199,14 @@ u32 vlib_frame_queue_main_init (u32 node_index, u32 frame_queue_nelts);
#define BARRIER_SYNC_TIMEOUT (1.0)
#endif
-void vlib_worker_thread_barrier_sync (vlib_main_t * vm);
+#ifdef BARRIER_TRACING
+#define vlib_worker_thread_barrier_sync(X) {vlib_worker_threads[0].barrier_caller=__FUNCTION__;vlib_worker_thread_barrier_sync_int(X);}
+#else
+#define vlib_worker_thread_barrier_sync(X) vlib_worker_thread_barrier_sync_int(X)
+#endif
+
+
+void vlib_worker_thread_barrier_sync_int (vlib_main_t * vm);
void vlib_worker_thread_barrier_release (vlib_main_t * vm);
void vlib_worker_thread_node_refork (void);
diff --git a/src/vlibapi/api_common.h b/src/vlibapi/api_common.h
index 651566aea29..bbeccfc2540 100644
--- a/src/vlibapi/api_common.h
+++ b/src/vlibapi/api_common.h
@@ -144,6 +144,12 @@ void vl_msg_api_queue_handler (unix_shared_memory_queue_t * q);
void vl_msg_api_barrier_sync (void) __attribute__ ((weak));
void vl_msg_api_barrier_release (void) __attribute__ ((weak));
+#ifdef BARRIER_TRACING
+void vl_msg_api_barrier_trace_context (const char *context)
+ __attribute__ ((weak));
+#else
+#define vl_msg_api_barrier_trace_context(X)
+#endif
void vl_msg_api_free (void *);
void vl_noop_handler (void *mp);
void vl_msg_api_increment_missing_client_counter (void);
diff --git a/src/vlibapi/api_shared.c b/src/vlibapi/api_shared.c
index 5c1a9940dc7..59dc2375cf6 100644
--- a/src/vlibapi/api_shared.c
+++ b/src/vlibapi/api_shared.c
@@ -418,7 +418,10 @@ msg_handler_internal (api_main_t * am,
if (do_it)
{
if (!am->is_mp_safe[id])
- vl_msg_api_barrier_sync ();
+ {
+ vl_msg_api_barrier_trace_context (am->msg_names[id]);
+ vl_msg_api_barrier_sync ();
+ }
(*am->msg_handlers[id]) (the_msg);
if (!am->is_mp_safe[id])
vl_msg_api_barrier_release ();
@@ -498,7 +501,10 @@ vl_msg_api_handler_with_vm_node (api_main_t * am,
vl_msg_api_trace (am, am->rx_trace, the_msg);
if (!am->is_mp_safe[id])
- vl_msg_api_barrier_sync ();
+ {
+ vl_msg_api_barrier_trace_context (am->msg_names[id]);
+ vl_msg_api_barrier_sync ();
+ }
(*handler) (the_msg, vm, node);
if (!am->is_mp_safe[id])
vl_msg_api_barrier_release ();
diff --git a/src/vlibmemory/memory_vlib.c b/src/vlibmemory/memory_vlib.c
index 688ce6044e0..55a90d641bc 100644
--- a/src/vlibmemory/memory_vlib.c
+++ b/src/vlibmemory/memory_vlib.c
@@ -1462,6 +1462,7 @@ _(TRACE_PLUGIN_MSG_IDS,trace_plugin_msg_ids)
static clib_error_t *
rpc_api_hookup (vlib_main_t * vm)
{
+ api_main_t *am = &api_main;
#define _(N,n) \
vl_msg_api_set_handlers(VL_API_##N, #n, \
vl_api_##n##_t_handler, \
@@ -1481,6 +1482,10 @@ rpc_api_hookup (vlib_main_t * vm)
sizeof(vl_api_##n##_t), 1 /* do trace */);
foreach_plugin_trace_msg;
#undef _
+
+ /* No reason to halt the parade to create a trace record... */
+ am->is_mp_safe[VL_API_TRACE_PLUGIN_MSG_IDS] = 1;
+
return 0;
}
diff --git a/src/vpp/vnet/main.c b/src/vpp/vnet/main.c
index 76371dbe996..b330f60fafe 100644
--- a/src/vpp/vnet/main.c
+++ b/src/vpp/vnet/main.c
@@ -294,6 +294,14 @@ os_exit (int code)
exit (code);
}
+#ifdef BARRIER_TRACING
+void
+vl_msg_api_barrier_trace_context (const char *context)
+{
+ vlib_worker_threads[0].barrier_context = context;
+}
+#endif
+
void
vl_msg_api_barrier_sync (void)
{