diff options
author | Dave Barach <dave@barachs.net> | 2019-01-24 10:34:24 -0500 |
---|---|---|
committer | Damjan Marion <dmarion@me.com> | 2019-01-24 16:19:04 +0000 |
commit | ec595ef02639005b34334097af76b41ceef3dca5 (patch) | |
tree | b8b752d9c9371b9ea75a9a28bfa1a3a6e7494b18 /src/plugins/perfmon/perfmon.c | |
parent | 22f23ae802f6dc654dbef27340c67773eb8be8c3 (diff) |
perfmon plugin: 2-way parallel stat collection
As a FUD reduction measure, this patch implements 2-way parallel
counter collection. Synthetic stat component counter pairs run at the
same time. Running two counters (of any kind) at the same time
naturally reduces the aggregate time required by an approximate
factor-of-2, depending on whether an even or odd number of stats have
been requested.
I don't completely buy the argument that computing synthetic stats
such as instructions-per-clock will be inaccurate if component counter
values are collected sequentially. Given uniform traffic pattern, it
must make no difference.
As the collection interval increases, the difference between serial
and parallel component counter collection will approach zero, see also
the Central Limit theorem.
Change-Id: I36ebdcf125e8882cca8a1929ec58f17fba1ad8f1
Signed-off-by: Dave Barach <dave@barachs.net>
Diffstat (limited to 'src/plugins/perfmon/perfmon.c')
-rw-r--r-- | src/plugins/perfmon/perfmon.c | 62 |
1 files changed, 41 insertions, 21 deletions
diff --git a/src/plugins/perfmon/perfmon.c b/src/plugins/perfmon/perfmon.c index c6a80224e0e..359555705aa 100644 --- a/src/plugins/perfmon/perfmon.c +++ b/src/plugins/perfmon/perfmon.c @@ -157,10 +157,16 @@ perfmon_init (vlib_main_t * vm) pm->log_class = vlib_log_register_class ("perfmon", 0); /* Default data collection interval */ - pm->timeout_interval = 3.0; - vec_validate (pm->pm_fds, vec_len (vlib_mains) - 1); - vec_validate (pm->perf_event_pages, vec_len (vlib_mains) - 1); - vec_validate (pm->rdpmc_indices, vec_len (vlib_mains) - 1); + pm->timeout_interval = 2.0; /* seconds */ + vec_validate (pm->pm_fds, 1); + vec_validate (pm->pm_fds[0], vec_len (vlib_mains) - 1); + vec_validate (pm->pm_fds[1], vec_len (vlib_mains) - 1); + vec_validate (pm->perf_event_pages, 1); + vec_validate (pm->perf_event_pages[0], vec_len (vlib_mains) - 1); + vec_validate (pm->perf_event_pages[1], vec_len (vlib_mains) - 1); + vec_validate (pm->rdpmc_indices, 1); + vec_validate (pm->rdpmc_indices[0], vec_len (vlib_mains) - 1); + vec_validate (pm->rdpmc_indices[1], vec_len (vlib_mains) - 1); pm->page_size = getpagesize (); ht = pm->perfmon_table = 0; @@ -297,10 +303,12 @@ set_pmc_command_fn (vlib_main_t * vm, perfmon_main_t *pm = &perfmon_main; unformat_input_t _line_input, *line_input = &_line_input; perfmon_event_config_t ec; + f64 delay; u32 timeout_seconds; u32 deadman; - vec_reset_length (pm->events_to_collect); + vec_reset_length (pm->single_events_to_collect); + vec_reset_length (pm->paired_events_to_collect); pm->ipc_event_index = ~0; pm->mispredict_event_index = ~0; @@ -316,28 +324,28 @@ set_pmc_command_fn (vlib_main_t * vm, ec.name = "instructions"; ec.pe_type = PERF_TYPE_HARDWARE; ec.pe_config = PERF_COUNT_HW_INSTRUCTIONS; - pm->ipc_event_index = vec_len (pm->events_to_collect); - vec_add1 (pm->events_to_collect, ec); + pm->ipc_event_index = vec_len (pm->paired_events_to_collect); + vec_add1 (pm->paired_events_to_collect, ec); ec.name = "cpu-cycles"; ec.pe_type = PERF_TYPE_HARDWARE; ec.pe_config = PERF_COUNT_HW_CPU_CYCLES; - vec_add1 (pm->events_to_collect, ec); + vec_add1 (pm->paired_events_to_collect, ec); } else if (unformat (line_input, "branch-mispredict-rate")) { ec.name = "branch-misses"; ec.pe_type = PERF_TYPE_HARDWARE; ec.pe_config = PERF_COUNT_HW_BRANCH_MISSES; - pm->mispredict_event_index = vec_len (pm->events_to_collect); - vec_add1 (pm->events_to_collect, ec); + pm->mispredict_event_index = vec_len (pm->paired_events_to_collect); + vec_add1 (pm->paired_events_to_collect, ec); ec.name = "branches"; ec.pe_type = PERF_TYPE_HARDWARE; ec.pe_config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS; - vec_add1 (pm->events_to_collect, ec); + vec_add1 (pm->paired_events_to_collect, ec); } else if (unformat (line_input, "%U", unformat_processor_event, pm, &ec)) { - vec_add1 (pm->events_to_collect, ec); + vec_add1 (pm->single_events_to_collect, ec); } #define _(type,event,str) \ else if (unformat (line_input, str)) \ @@ -345,7 +353,7 @@ set_pmc_command_fn (vlib_main_t * vm, ec.name = str; \ ec.pe_type = type; \ ec.pe_config = event; \ - vec_add1 (pm->events_to_collect, ec); \ + vec_add1 (pm->single_events_to_collect, ec); \ } foreach_perfmon_event #undef _ @@ -354,21 +362,33 @@ set_pmc_command_fn (vlib_main_t * vm, format_unformat_error, line_input); } - if (vec_len (pm->events_to_collect) == 0) + /* Stick paired events at the front of the (unified) list */ + if (vec_len (pm->paired_events_to_collect) > 0) + { + perfmon_event_config_t *tmp; + /* first 2n events are pairs... */ + vec_append (pm->paired_events_to_collect, pm->single_events_to_collect); + tmp = pm->single_events_to_collect; + pm->single_events_to_collect = pm->paired_events_to_collect; + pm->paired_events_to_collect = tmp; + } + + if (vec_len (pm->single_events_to_collect) == 0) return clib_error_return (0, "no events specified..."); + /* Figure out how long data collection will take */ + delay = + ((f64) vec_len (pm->single_events_to_collect)) * pm->timeout_interval; + delay /= 2.0; /* collect 2 stats at once */ + vlib_cli_output (vm, "Start collection for %d events, wait %.2f seconds", - vec_len (pm->events_to_collect), - (f64) (vec_len (pm->events_to_collect)) - * pm->timeout_interval); + vec_len (pm->single_events_to_collect), delay); vlib_process_signal_event (pm->vlib_main, perfmon_periodic_node.index, PERFMON_START, 0); /* Coarse-grained wait */ - vlib_process_suspend (vm, - ((f64) (vec_len (pm->events_to_collect) - * pm->timeout_interval))); + vlib_process_suspend (vm, delay); deadman = 0; /* Reasonable to guess that collection may not be quite done... */ @@ -438,7 +458,7 @@ format_capture (u8 * s, va_list * args) if (i == pm->ipc_event_index) { f64 ipc_rate; - ASSERT (i + 1 < vec_len (c->counter_names)); + ASSERT ((i + 1) < vec_len (c->counter_names)); if (c->counter_values[i + 1] > 0) ipc_rate = (f64) c->counter_values[i] |