aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDave Barach <dave@barachs.net>2019-01-26 09:50:26 -0500
committerFlorin Coras <florin.coras@gmail.com>2019-01-27 03:01:59 +0000
commit53fe4a79269671fd37bf8a1fbb147bcc99b04fab (patch)
treebc9f614004a9f51081f9a652a45b6fe51992ebf0
parentf23a885b892e432c7eac0f00c9e010fc21b6b211 (diff)
perfmon: collect data on selected thread(s)
Add missing pre-input node runtime fork and refork code. unix-epoll-input runs on all threads; each instance needs its own runtime stats. Change-Id: I16b02e42d0c95f863161176c4bb9f9917bef809d Signed-off-by: Dave Barach <dave@barachs.net>
-rw-r--r--src/plugins/perfmon/perfmon.c28
-rw-r--r--src/plugins/perfmon/perfmon.h3
-rw-r--r--src/plugins/perfmon/perfmon_periodic.c73
-rw-r--r--src/vlib/main.c8
-rw-r--r--src/vlib/threads.c44
5 files changed, 129 insertions, 27 deletions
diff --git a/src/plugins/perfmon/perfmon.c b/src/plugins/perfmon/perfmon.c
index 359555705aa..7f621a1e671 100644
--- a/src/plugins/perfmon/perfmon.c
+++ b/src/plugins/perfmon/perfmon.c
@@ -301,11 +301,15 @@ set_pmc_command_fn (vlib_main_t * vm,
unformat_input_t * input, vlib_cli_command_t * cmd)
{
perfmon_main_t *pm = &perfmon_main;
+ vlib_thread_main_t *vtm = vlib_get_thread_main ();
+ int num_threads = 1 + vtm->n_threads;
unformat_input_t _line_input, *line_input = &_line_input;
perfmon_event_config_t ec;
f64 delay;
u32 timeout_seconds;
u32 deadman;
+ int last_set;
+ clib_error_t *error;
vec_reset_length (pm->single_events_to_collect);
vec_reset_length (pm->paired_events_to_collect);
@@ -315,6 +319,8 @@ set_pmc_command_fn (vlib_main_t * vm,
if (!unformat_user (input, unformat_line_input, line_input))
return clib_error_return (0, "counter names required...");
+ clib_bitmap_zero (pm->thread_bitmap);
+
while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
{
if (unformat (line_input, "timeout %u", &timeout_seconds))
@@ -343,6 +349,12 @@ set_pmc_command_fn (vlib_main_t * vm,
ec.pe_config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS;
vec_add1 (pm->paired_events_to_collect, ec);
}
+ else if (unformat (line_input, "threads %U",
+ unformat_bitmap_list, &pm->thread_bitmap))
+ ;
+ else if (unformat (line_input, "thread %U",
+ unformat_bitmap_list, &pm->thread_bitmap))
+ ;
else if (unformat (line_input, "%U", unformat_processor_event, pm, &ec))
{
vec_add1 (pm->single_events_to_collect, ec);
@@ -358,10 +370,20 @@ set_pmc_command_fn (vlib_main_t * vm,
foreach_perfmon_event
#undef _
else
- return clib_error_return (0, "unknown input '%U'",
- format_unformat_error, line_input);
+ {
+ error = clib_error_return (0, "unknown input '%U'",
+ format_unformat_error, line_input);
+ unformat_free (line_input);
+ return error;
+ }
}
+ unformat_free (line_input);
+
+ last_set = clib_bitmap_last_set (pm->thread_bitmap);
+ if (last_set != ~0 && last_set >= num_threads)
+ return clib_error_return (0, "thread %d does not exist", last_set);
+
/* Stick paired events at the front of the (unified) list */
if (vec_len (pm->paired_events_to_collect) > 0)
{
@@ -410,7 +432,7 @@ set_pmc_command_fn (vlib_main_t * vm,
VLIB_CLI_COMMAND (set_pmc_command, static) =
{
.path = "set pmc",
- .short_help = "set pmc c1 [..., use \"show pmc events\"]",
+ .short_help = "set pmc [threads n,n1-n2] c1... [see \"show pmc events\"]",
.function = set_pmc_command_fn,
.is_mp_safe = 1,
};
diff --git a/src/plugins/perfmon/perfmon.h b/src/plugins/perfmon/perfmon.h
index 9663dae36d1..9c4c34e36c1 100644
--- a/src/plugins/perfmon/perfmon.h
+++ b/src/plugins/perfmon/perfmon.h
@@ -121,6 +121,9 @@ typedef struct
/* Current perf_event file descriptors, per thread */
int **pm_fds;
+ /* thread bitmap */
+ uword *thread_bitmap;
+
/* Logging */
vlib_log_class_t log_class;
diff --git a/src/plugins/perfmon/perfmon_periodic.c b/src/plugins/perfmon/perfmon_periodic.c
index ccf3e9eed81..0811439d7eb 100644
--- a/src/plugins/perfmon/perfmon_periodic.c
+++ b/src/plugins/perfmon/perfmon_periodic.c
@@ -21,6 +21,7 @@
#include <asm/unistd.h>
#include <sys/ioctl.h>
+/* "not in glibc" */
static long
perf_event_open (struct perf_event_attr *hw_event, pid_t pid, int cpu,
int group_fd, unsigned long flags)
@@ -114,6 +115,7 @@ enable_current_events (perfmon_main_t * pm)
u32 my_thread_index = vm->thread_index;
u32 index;
int i, limit = 1;
+ int cpu;
if ((pm->current_event + 1) < vec_len (pm->single_events_to_collect))
limit = 2;
@@ -140,7 +142,9 @@ enable_current_events (perfmon_main_t * pm)
pe.exclude_hv = 1;
}
- fd = perf_event_open (&pe, 0, -1, -1, 0);
+ cpu = vm->cpu_index;
+
+ fd = perf_event_open (&pe, 0, cpu, -1, 0);
if (fd == -1)
{
clib_unix_warning ("event open: type %d config %d", c->pe_type,
@@ -237,25 +241,38 @@ static void
start_event (perfmon_main_t * pm, f64 now, uword event_data)
{
int i;
+ int last_set;
+ int all = 0;
pm->current_event = 0;
+
if (vec_len (pm->single_events_to_collect) == 0)
{
pm->state = PERFMON_STATE_OFF;
return;
}
+
+ last_set = clib_bitmap_last_set (pm->thread_bitmap);
+ all = (last_set == ~0);
+
pm->state = PERFMON_STATE_RUNNING;
clear_counters (pm);
- /* Start collection on this thread */
- enable_current_events (pm);
+ /* Start collection on thread 0? */
+ if (all || clib_bitmap_get (pm->thread_bitmap, 0))
+ {
+ /* Start collection on this thread */
+ enable_current_events (pm);
+ }
/* And also on worker threads */
for (i = 1; i < vec_len (vlib_mains); i++)
{
if (vlib_mains[i] == 0)
continue;
- vlib_mains[i]->worker_thread_main_loop_callback = (void *)
- worker_thread_start_event;
+
+ if (all || clib_bitmap_get (pm->thread_bitmap, i))
+ vlib_mains[i]->worker_thread_main_loop_callback = (void *)
+ worker_thread_start_event;
}
}
@@ -397,23 +414,46 @@ scrape_and_clear_counters (perfmon_main_t * pm)
}
static void
-handle_timeout (perfmon_main_t * pm, f64 now)
+handle_timeout (vlib_main_t * vm, perfmon_main_t * pm, f64 now)
{
int i;
- disable_events (pm);
+ int last_set, all;
+
+ last_set = clib_bitmap_last_set (pm->thread_bitmap);
+ all = (last_set == ~0);
+
+ if (all || clib_bitmap_get (pm->thread_bitmap, 0))
+ disable_events (pm);
/* And also on worker threads */
for (i = 1; i < vec_len (vlib_mains); i++)
{
if (vlib_mains[i] == 0)
continue;
- vlib_mains[i]->worker_thread_main_loop_callback = (void *)
- worker_thread_stop_event;
+ if (all || clib_bitmap_get (pm->thread_bitmap, i))
+ vlib_mains[i]->worker_thread_main_loop_callback = (void *)
+ worker_thread_stop_event;
}
- /* Short delay to make sure workers have stopped collection */
+ /* Make sure workers have stopped collection */
if (i > 1)
- vlib_process_suspend (pm->vlib_main, 1e-3);
+ {
+ f64 deadman = vlib_time_now (vm) + 1.0;
+
+ for (i = 1; i < vec_len (vlib_mains); i++)
+ {
+ /* Has the worker actually stopped collecting data? */
+ while (vlib_mains[i]->worker_thread_main_loop_callback)
+ {
+ if (vlib_time_now (vm) > deadman)
+ {
+ clib_warning ("Thread %d deadman timeout!", i);
+ break;
+ }
+ vlib_process_suspend (pm->vlib_main, 1e-3);
+ }
+ }
+ }
scrape_and_clear_counters (pm);
pm->current_event += pm->n_active;
if (pm->current_event >= vec_len (pm->single_events_to_collect))
@@ -422,15 +462,18 @@ handle_timeout (perfmon_main_t * pm, f64 now)
pm->state = PERFMON_STATE_OFF;
return;
}
- enable_current_events (pm);
+
+ if (all || clib_bitmap_get (pm->thread_bitmap, 0))
+ enable_current_events (pm);
/* And also on worker threads */
for (i = 1; i < vec_len (vlib_mains); i++)
{
if (vlib_mains[i] == 0)
continue;
- vlib_mains[i]->worker_thread_main_loop_callback = (void *)
- worker_thread_start_event;
+ if (all || clib_bitmap_get (pm->thread_bitmap, i))
+ vlib_mains[i]->worker_thread_main_loop_callback = (void *)
+ worker_thread_start_event;
}
}
@@ -464,7 +507,7 @@ perfmon_periodic_process (vlib_main_t * vm,
/* Handle timeout */
case ~0:
- handle_timeout (pm, now);
+ handle_timeout (vm, pm, now);
break;
default:
diff --git a/src/vlib/main.c b/src/vlib/main.c
index 0e480fabe2a..3048a0d72de 100644
--- a/src/vlib/main.c
+++ b/src/vlib/main.c
@@ -1105,8 +1105,6 @@ dispatch_pcap_trace (vlib_main_t * vm,
}
}
-u64 oingo0, oingo1;
-
static_always_inline u64
dispatch_node (vlib_main_t * vm,
vlib_node_runtime_t * node,
@@ -1207,12 +1205,6 @@ dispatch_node (vlib_main_t * vm,
vm->main_loop_vectors_processed += n;
vm->main_loop_nodes_processed += n > 0;
- if (pmc_delta[0] || pmc_delta[1])
- {
- oingo0 += pmc_delta[0];
- oingo1 += pmc_delta[1];
- }
-
v = vlib_node_runtime_update_stats (vm, node,
/* n_calls */ 1,
/* n_vectors */ n,
diff --git a/src/vlib/threads.c b/src/vlib/threads.c
index 45e4d89b7ab..e6ac6db543a 100644
--- a/src/vlib/threads.c
+++ b/src/vlib/threads.c
@@ -801,7 +801,7 @@ start_workers (vlib_main_t * vm)
/* fork the frame dispatch queue */
nm_clone->pending_frames = 0;
- vec_validate (nm_clone->pending_frames, 10); /* $$$$$?????? */
+ vec_validate (nm_clone->pending_frames, 10);
_vec_len (nm_clone->pending_frames) = 0;
/* fork nodes */
@@ -850,6 +850,21 @@ start_workers (vlib_main_t * vm)
n->runtime_data_bytes));
}
+ nm_clone->nodes_by_type[VLIB_NODE_TYPE_PRE_INPUT] =
+ vec_dup_aligned (nm->nodes_by_type[VLIB_NODE_TYPE_PRE_INPUT],
+ CLIB_CACHE_LINE_BYTES);
+ vec_foreach (rt,
+ nm_clone->nodes_by_type[VLIB_NODE_TYPE_PRE_INPUT])
+ {
+ vlib_node_t *n = vlib_get_node (vm, rt->node_index);
+ rt->thread_index = vm_clone->thread_index;
+ /* copy initial runtime_data from node */
+ if (n->runtime_data && n->runtime_data_bytes > 0)
+ clib_memcpy (rt->runtime_data, n->runtime_data,
+ clib_min (VLIB_NODE_RUNTIME_DATA_SIZE,
+ n->runtime_data_bytes));
+ }
+
nm_clone->processes = vec_dup_aligned (nm->processes,
CLIB_CACHE_LINE_BYTES);
@@ -1173,6 +1188,33 @@ vlib_worker_thread_node_refork (void)
vec_free (old_rt);
+ /* re-clone pre-input nodes */
+ old_rt = nm_clone->nodes_by_type[VLIB_NODE_TYPE_PRE_INPUT];
+ nm_clone->nodes_by_type[VLIB_NODE_TYPE_PRE_INPUT] =
+ vec_dup_aligned (nm->nodes_by_type[VLIB_NODE_TYPE_PRE_INPUT],
+ CLIB_CACHE_LINE_BYTES);
+
+ vec_foreach (rt, nm_clone->nodes_by_type[VLIB_NODE_TYPE_PRE_INPUT])
+ {
+ vlib_node_t *n = vlib_get_node (vm, rt->node_index);
+ rt->thread_index = vm_clone->thread_index;
+ /* copy runtime_data, will be overwritten later for existing rt */
+ if (n->runtime_data && n->runtime_data_bytes > 0)
+ clib_memcpy_fast (rt->runtime_data, n->runtime_data,
+ clib_min (VLIB_NODE_RUNTIME_DATA_SIZE,
+ n->runtime_data_bytes));
+ }
+
+ for (j = 0; j < vec_len (old_rt); j++)
+ {
+ rt = vlib_node_get_runtime (vm_clone, old_rt[j].node_index);
+ rt->state = old_rt[j].state;
+ clib_memcpy_fast (rt->runtime_data, old_rt[j].runtime_data,
+ VLIB_NODE_RUNTIME_DATA_SIZE);
+ }
+
+ vec_free (old_rt);
+
nm_clone->processes = vec_dup_aligned (nm->processes,
CLIB_CACHE_LINE_BYTES);
}