summaryrefslogtreecommitdiffstats
path: root/src/plugins/perfmon/perfmon.c
diff options
context:
space:
mode:
authorDave Barach <dave@barachs.net>2019-01-24 10:34:24 -0500
committerDamjan Marion <dmarion@me.com>2019-01-24 16:19:04 +0000
commitec595ef02639005b34334097af76b41ceef3dca5 (patch)
treeb8b752d9c9371b9ea75a9a28bfa1a3a6e7494b18 /src/plugins/perfmon/perfmon.c
parent22f23ae802f6dc654dbef27340c67773eb8be8c3 (diff)
perfmon plugin: 2-way parallel stat collection
As a FUD reduction measure, this patch implements 2-way parallel counter collection. Synthetic stat component counter pairs run at the same time. Running two counters (of any kind) at the same time naturally reduces the aggregate time required by an approximate factor-of-2, depending on whether an even or odd number of stats have been requested. I don't completely buy the argument that computing synthetic stats such as instructions-per-clock will be inaccurate if component counter values are collected sequentially. Given uniform traffic pattern, it must make no difference. As the collection interval increases, the difference between serial and parallel component counter collection will approach zero, see also the Central Limit theorem. Change-Id: I36ebdcf125e8882cca8a1929ec58f17fba1ad8f1 Signed-off-by: Dave Barach <dave@barachs.net>
Diffstat (limited to 'src/plugins/perfmon/perfmon.c')
-rw-r--r--src/plugins/perfmon/perfmon.c62
1 files changed, 41 insertions, 21 deletions
diff --git a/src/plugins/perfmon/perfmon.c b/src/plugins/perfmon/perfmon.c
index c6a80224e0e..359555705aa 100644
--- a/src/plugins/perfmon/perfmon.c
+++ b/src/plugins/perfmon/perfmon.c
@@ -157,10 +157,16 @@ perfmon_init (vlib_main_t * vm)
pm->log_class = vlib_log_register_class ("perfmon", 0);
/* Default data collection interval */
- pm->timeout_interval = 3.0;
- vec_validate (pm->pm_fds, vec_len (vlib_mains) - 1);
- vec_validate (pm->perf_event_pages, vec_len (vlib_mains) - 1);
- vec_validate (pm->rdpmc_indices, vec_len (vlib_mains) - 1);
+ pm->timeout_interval = 2.0; /* seconds */
+ vec_validate (pm->pm_fds, 1);
+ vec_validate (pm->pm_fds[0], vec_len (vlib_mains) - 1);
+ vec_validate (pm->pm_fds[1], vec_len (vlib_mains) - 1);
+ vec_validate (pm->perf_event_pages, 1);
+ vec_validate (pm->perf_event_pages[0], vec_len (vlib_mains) - 1);
+ vec_validate (pm->perf_event_pages[1], vec_len (vlib_mains) - 1);
+ vec_validate (pm->rdpmc_indices, 1);
+ vec_validate (pm->rdpmc_indices[0], vec_len (vlib_mains) - 1);
+ vec_validate (pm->rdpmc_indices[1], vec_len (vlib_mains) - 1);
pm->page_size = getpagesize ();
ht = pm->perfmon_table = 0;
@@ -297,10 +303,12 @@ set_pmc_command_fn (vlib_main_t * vm,
perfmon_main_t *pm = &perfmon_main;
unformat_input_t _line_input, *line_input = &_line_input;
perfmon_event_config_t ec;
+ f64 delay;
u32 timeout_seconds;
u32 deadman;
- vec_reset_length (pm->events_to_collect);
+ vec_reset_length (pm->single_events_to_collect);
+ vec_reset_length (pm->paired_events_to_collect);
pm->ipc_event_index = ~0;
pm->mispredict_event_index = ~0;
@@ -316,28 +324,28 @@ set_pmc_command_fn (vlib_main_t * vm,
ec.name = "instructions";
ec.pe_type = PERF_TYPE_HARDWARE;
ec.pe_config = PERF_COUNT_HW_INSTRUCTIONS;
- pm->ipc_event_index = vec_len (pm->events_to_collect);
- vec_add1 (pm->events_to_collect, ec);
+ pm->ipc_event_index = vec_len (pm->paired_events_to_collect);
+ vec_add1 (pm->paired_events_to_collect, ec);
ec.name = "cpu-cycles";
ec.pe_type = PERF_TYPE_HARDWARE;
ec.pe_config = PERF_COUNT_HW_CPU_CYCLES;
- vec_add1 (pm->events_to_collect, ec);
+ vec_add1 (pm->paired_events_to_collect, ec);
}
else if (unformat (line_input, "branch-mispredict-rate"))
{
ec.name = "branch-misses";
ec.pe_type = PERF_TYPE_HARDWARE;
ec.pe_config = PERF_COUNT_HW_BRANCH_MISSES;
- pm->mispredict_event_index = vec_len (pm->events_to_collect);
- vec_add1 (pm->events_to_collect, ec);
+ pm->mispredict_event_index = vec_len (pm->paired_events_to_collect);
+ vec_add1 (pm->paired_events_to_collect, ec);
ec.name = "branches";
ec.pe_type = PERF_TYPE_HARDWARE;
ec.pe_config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS;
- vec_add1 (pm->events_to_collect, ec);
+ vec_add1 (pm->paired_events_to_collect, ec);
}
else if (unformat (line_input, "%U", unformat_processor_event, pm, &ec))
{
- vec_add1 (pm->events_to_collect, ec);
+ vec_add1 (pm->single_events_to_collect, ec);
}
#define _(type,event,str) \
else if (unformat (line_input, str)) \
@@ -345,7 +353,7 @@ set_pmc_command_fn (vlib_main_t * vm,
ec.name = str; \
ec.pe_type = type; \
ec.pe_config = event; \
- vec_add1 (pm->events_to_collect, ec); \
+ vec_add1 (pm->single_events_to_collect, ec); \
}
foreach_perfmon_event
#undef _
@@ -354,21 +362,33 @@ set_pmc_command_fn (vlib_main_t * vm,
format_unformat_error, line_input);
}
- if (vec_len (pm->events_to_collect) == 0)
+ /* Stick paired events at the front of the (unified) list */
+ if (vec_len (pm->paired_events_to_collect) > 0)
+ {
+ perfmon_event_config_t *tmp;
+ /* first 2n events are pairs... */
+ vec_append (pm->paired_events_to_collect, pm->single_events_to_collect);
+ tmp = pm->single_events_to_collect;
+ pm->single_events_to_collect = pm->paired_events_to_collect;
+ pm->paired_events_to_collect = tmp;
+ }
+
+ if (vec_len (pm->single_events_to_collect) == 0)
return clib_error_return (0, "no events specified...");
+ /* Figure out how long data collection will take */
+ delay =
+ ((f64) vec_len (pm->single_events_to_collect)) * pm->timeout_interval;
+ delay /= 2.0; /* collect 2 stats at once */
+
vlib_cli_output (vm, "Start collection for %d events, wait %.2f seconds",
- vec_len (pm->events_to_collect),
- (f64) (vec_len (pm->events_to_collect))
- * pm->timeout_interval);
+ vec_len (pm->single_events_to_collect), delay);
vlib_process_signal_event (pm->vlib_main, perfmon_periodic_node.index,
PERFMON_START, 0);
/* Coarse-grained wait */
- vlib_process_suspend (vm,
- ((f64) (vec_len (pm->events_to_collect)
- * pm->timeout_interval)));
+ vlib_process_suspend (vm, delay);
deadman = 0;
/* Reasonable to guess that collection may not be quite done... */
@@ -438,7 +458,7 @@ format_capture (u8 * s, va_list * args)
if (i == pm->ipc_event_index)
{
f64 ipc_rate;
- ASSERT (i + 1 < vec_len (c->counter_names));
+ ASSERT ((i + 1) < vec_len (c->counter_names));
if (c->counter_values[i + 1] > 0)
ipc_rate = (f64) c->counter_values[i]