diff options
author | Dave Barach <dave@barachs.net> | 2018-09-10 12:31:15 -0400 |
---|---|---|
committer | Damjan Marion <dmarion@me.com> | 2018-10-22 12:02:04 +0000 |
commit | 4d1a866aff6ceb03025990b6e60b42faf09ef486 (patch) | |
tree | bec495932876d9649f26179b4c24b6938be43f38 /src/plugins/perfmon | |
parent | 115a3ac59a16f9dcfee92eaecc79cd1fa3320e29 (diff) |
X86_64 perf counter plugin
Change-Id: Ie5a00c15ee9536cc61afab57f6cadc1aa1972f3c
Signed-off-by: Dave Barach <dave@barachs.net>
Diffstat (limited to 'src/plugins/perfmon')
-rw-r--r-- | src/plugins/perfmon/CMakeLists.txt | 38 | ||||
-rw-r--r-- | src/plugins/perfmon/parse_util.c | 235 | ||||
-rw-r--r-- | src/plugins/perfmon/perfmon.c | 615 | ||||
-rw-r--r-- | src/plugins/perfmon/perfmon.h | 145 | ||||
-rw-r--r-- | src/plugins/perfmon/perfmon_periodic.c | 433 |
5 files changed, 1466 insertions, 0 deletions
diff --git a/src/plugins/perfmon/CMakeLists.txt b/src/plugins/perfmon/CMakeLists.txt new file mode 100644 index 00000000000..30e1f2caf3e --- /dev/null +++ b/src/plugins/perfmon/CMakeLists.txt @@ -0,0 +1,38 @@ +# Copyright (c) 2018 Cisco and/or its affiliates. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +add_vpp_plugin(perfmon + SOURCES + perfmon.c + perfmon_periodic.c + parse_util.c +) + +# Reenable / extend when .json file license issue fixed +# +# set (PERFMON_JSON_FILES +# haswell_core_v28.json +# haswellx_core_v20.json +# ivybridge_core_v21.json +# ivytown_core_v20.json +# jaketown_core_v20.json +# sandybridge_core_v16.json +# skylake_core_v42.json +# skylakex_core_v1.12.json +# ) + +# install( +# FILES ${PERFMON_JSON_FILES} +# DESTINATION share/vpp/plugins/perfmon +# COMPONENT vpp-dev +# ) diff --git a/src/plugins/perfmon/parse_util.c b/src/plugins/perfmon/parse_util.c new file mode 100644 index 00000000000..436acaa6de0 --- /dev/null +++ b/src/plugins/perfmon/parse_util.c @@ -0,0 +1,235 @@ +/* + * parse_util.c - halfhearted json parser + * + * Copyright (c) 2018 Cisco Systems and/or its affiliates + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <perfmon/perfmon.h> +#include <vppinfra/unix.h> + +typedef enum +{ + STATE_START, + STATE_READ_NAME, + STATE_READ_VALUE, +} parse_state_t; + +static u8 * +downcase (u8 * s) +{ + u8 *rv = 0; + u8 c; + int i; + + for (i = 0; i < vec_len (s); i++) + { + c = s[i]; + if (c >= 'A' && c <= 'Z') + c = c + ('a' - 'A'); + vec_add1 (rv, c); + } + return (rv); +} + +uword * +perfmon_parse_table (perfmon_main_t * pm, char *path, char *table_name) +{ + u8 *cp; + u8 *event_name; + int state = STATE_START; + uword *ht; + name_value_pair_t *nvp = 0; + name_value_pair_t **nvps = 0; + u8 *v; + int i; + u8 *json_filename; + clib_error_t *error; + + /* Create the name/value hash table in any case... */ + ht = hash_create_string (0, sizeof (uword)); + + json_filename = format (0, "%s/%s%c", path, table_name, 0); + + vlib_log_debug (pm->log_class, "Try to read perfmon events from %s", + json_filename); + + error = unix_proc_file_contents ((char *) json_filename, &cp); + + if (error) + { + vlib_log_err (pm->log_class, + "Failed to read CPU-specific counter table"); + vlib_log_err (pm->log_class, + "Download from https://download.01.org/perfmon, " + "and install as %s", json_filename); + vec_free (json_filename); + clib_error_report (error); + return ht; + } + vlib_log_debug (pm->log_class, "Read OK, parse the event table..."); + vec_free (json_filename); + +again: + while (*cp) + { + switch (state) + { + case STATE_START: + while (*cp && *cp != '{' && *cp != '}' && *cp != ',') + cp++; + if (*cp == 0) + goto done; + + /* Look for a new event */ + if (*cp == '{') + { + if (*cp == 0) + { + error: + clib_warning ("parse fail"); + hash_free (ht); + return 0; + } + cp++; + state = STATE_READ_NAME; + goto again; + } + else if (*cp == '}') /* end of event */ + { + /* Look for the "EventName" nvp */ + for (i = 0; i < vec_len (nvps); i++) + { + nvp = nvps[i]; + if (!strncmp ((char *) nvp->name, "EventName", 9)) + { + event_name = nvp->value; + goto found; + } + } + /* no name? */ + for (i = 0; i < vec_len (nvps); i++) + { + vec_free (nvps[i]->name); + vec_free (nvps[i]->value); + } + vec_free (nvps); + cp++; + goto again; + + found: + event_name = downcase (event_name); + hash_set_mem (ht, event_name, nvps); + nvp = 0; + nvps = 0; + cp++; + goto again; + } + else if (*cp == ',') /* punctuation */ + { + cp++; + goto again; + } + + case STATE_READ_NAME: + vec_validate (nvp, 0); + v = 0; + while (*cp && *cp != '"') + cp++; + + if (*cp == 0) + { + vec_free (nvp); + goto error; + } + + cp++; + while (*cp && *cp != '"') + { + vec_add1 (v, *cp); + cp++; + } + if (*cp == 0) + { + vec_free (v); + goto error; + } + cp++; + vec_add1 (v, 0); + nvp->name = v; + state = STATE_READ_VALUE; + goto again; + + case STATE_READ_VALUE: + while (*cp && *cp != ':') + cp++; + if (*cp == 0) + { + vec_free (nvp->name); + goto error; + } + while (*cp && *cp != '"') + cp++; + if (*cp == 0) + { + vec_free (nvp->name); + goto error; + } + else + cp++; + v = 0; + while (*cp && *cp != '"') + { + vec_add1 (v, *cp); + cp++; + } + if (*cp == 0) + { + vec_free (nvp->name); + vec_free (v); + goto error; + } + vec_add1 (v, 0); + nvp->value = v; + vec_add1 (nvps, nvp); + while (*cp && *cp != ',' && *cp != '}') + cp++; + if (*cp == 0) + { + vec_free (nvp->name); + vec_free (nvp->value); + goto error; + } + else if (*cp == '}') + state = STATE_START; + else + { + cp++; + state = STATE_READ_NAME; + } + nvp = 0; + goto again; + } + } + +done: + return (ht); +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/perfmon/perfmon.c b/src/plugins/perfmon/perfmon.c new file mode 100644 index 00000000000..61dbe5cd918 --- /dev/null +++ b/src/plugins/perfmon/perfmon.c @@ -0,0 +1,615 @@ +/* + * perfmon.c - skeleton vpp engine plug-in + * + * Copyright (c) <current-year> <your-organization> + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vnet/vnet.h> +#include <vnet/plugin/plugin.h> +#include <perfmon/perfmon.h> + +#include <vlibapi/api.h> +#include <vlibmemory/api.h> +#include <vpp/app/version.h> +#include <linux/limits.h> + +perfmon_main_t perfmon_main; + +static char *perfmon_json_path = "/usr/share/vpp/plugins/perfmon"; + +static void +set_perfmon_json_path () +{ + char *p, path[PATH_MAX]; + int rv; + u8 *s; + + /* find executable path */ + if ((rv = readlink ("/proc/self/exe", path, PATH_MAX - 1)) == -1) + return; + + /* readlink doesn't provide null termination */ + path[rv] = 0; + + /* strip filename */ + if ((p = strrchr (path, '/')) == 0) + return; + *p = 0; + + /* strip bin/ */ + if ((p = strrchr (path, '/')) == 0) + return; + *p = 0; + + /* cons up the .json file path */ + s = format (0, "%s/share/vpp/plugins/perfmon", path); + vec_add1 (s, 0); + perfmon_json_path = (char *) s; +} + +#define foreach_cpuid_table \ +_(0x0306C3, haswell_core_v28.json) \ +_(0x0306F2, haswell_core_v28.json) \ +_(0x0406E3, skylake_core_v42.json) \ +_(0x0506E3, skylake_core_v42.json) + +static inline u32 +get_cpuid (void) +{ +#if defined(__x86_64__) + u32 cpuid; + asm volatile ("mov $1, %%eax; cpuid; mov %%eax, %0":"=r" (cpuid)::"%eax", + "%edx", "%ecx", "%rbx"); + return cpuid; +#else + return 0; +#endif +} + +static clib_error_t * +perfmon_init (vlib_main_t * vm) +{ + perfmon_main_t *pm = &perfmon_main; + clib_error_t *error = 0; + u32 cpuid; + uword *ht; + int found_a_table = 0; + + pm->vlib_main = vm; + pm->vnet_main = vnet_get_main (); + + pm->capture_by_thread_and_node_name = + hash_create_string (0, sizeof (uword)); + + pm->log_class = vlib_log_register_class ("perfmon", 0); + + /* Default data collection interval */ + pm->timeout_interval = 3.0; + vec_validate (pm->pm_fds, vec_len (vlib_mains) - 1); + vec_validate (pm->perf_event_pages, vec_len (vlib_mains) - 1); + vec_validate (pm->rdpmc_indices, vec_len (vlib_mains) - 1); + pm->page_size = getpagesize (); + + ht = pm->perfmon_table = 0; + + set_perfmon_json_path (); + + cpuid = get_cpuid (); + + if (0) + { + } +#define _(id,table) \ + else if (cpuid == id) \ + { \ + vlib_log_debug (pm->log_class, "Found table %s", #table); \ + ht = perfmon_parse_table (pm, perfmon_json_path, #table); \ + found_a_table = 1; \ + } + foreach_cpuid_table; +#undef _ + + pm->perfmon_table = ht; + + if (found_a_table == 0) + vlib_log_err (pm->log_class, "No table for cpuid %x", cpuid); + + return error; +} + +VLIB_INIT_FUNCTION (perfmon_init); + +/* *INDENT-OFF* */ +VLIB_PLUGIN_REGISTER () = +{ + .version = VPP_BUILD_VER, + .description = "Performance monitor plugin", +#if !defined(__x86_64__) + .default_disabled = 1, +#endif +}; +/* *INDENT-ON* */ + +static uword +atox (u8 * s) +{ + uword rv = 0; + + while (*s) + { + if (*s >= '0' && *s <= '9') + rv = (rv << 4) | (*s - '0'); + else if (*s >= 'a' && *s <= 'f') + rv = (rv << 4) | (*s - 'a' + 10); + else if (*s >= 'A' && *s <= 'A') + rv = (rv << 4) | (*s - 'A' + 10); + else if (*s == 'x') + ; + else + break; + s++; + } + return rv; +} + +static uword +unformat_processor_event (unformat_input_t * input, va_list * args) +{ + perfmon_main_t *pm = va_arg (*args, perfmon_main_t *); + perfmon_event_config_t *ep = va_arg (*args, perfmon_event_config_t *); + u8 *s = 0; + name_value_pair_t **nvps, *nvp; + hash_pair_t *hp; + int i; + int set_values = 0; + u32 pe_config = 0; + + if (pm->perfmon_table == 0) + return 0; + + if (!unformat (input, "%s", &s)) + return 0; + + hp = hash_get_pair_mem (pm->perfmon_table, s); + + vec_free (s); + + if (hp == 0) + return 0; + + nvps = (name_value_pair_t **) (hp->value[0]); + + for (i = 0; i < vec_len (nvps); i++) + { + nvp = nvps[i]; + if (!strncmp ((char *) nvp->name, "EventCode", 9)) + { + pe_config |= atox (nvp->value); + set_values++; + } + else if (!strncmp ((char *) nvp->name, "UMask", 5)) + { + pe_config |= (atox (nvp->value) << 8); + set_values++; + } + if (set_values == 2) + break; + } + + if (set_values != 2) + { + clib_warning ("BUG: only found %d values", set_values); + return 0; + } + + ep->name = (char *) hp->key; + ep->pe_type = PERF_TYPE_RAW; + ep->pe_config = pe_config; + return 1; +} + +static clib_error_t * +set_pmc_command_fn (vlib_main_t * vm, + unformat_input_t * input, vlib_cli_command_t * cmd) +{ + perfmon_main_t *pm = &perfmon_main; + unformat_input_t _line_input, *line_input = &_line_input; + perfmon_event_config_t ec; + u32 timeout_seconds; + u32 deadman; + + vec_reset_length (pm->events_to_collect); + pm->ipc_event_index = ~0; + pm->mispredict_event_index = ~0; + + if (!unformat_user (input, unformat_line_input, line_input)) + return clib_error_return (0, "counter names required..."); + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "timeout %u", &timeout_seconds)) + pm->timeout_interval = (f64) timeout_seconds; + else if (unformat (line_input, "instructions-per-clock")) + { + ec.name = "instructions"; + ec.pe_type = PERF_TYPE_HARDWARE; + ec.pe_config = PERF_COUNT_HW_INSTRUCTIONS; + pm->ipc_event_index = vec_len (pm->events_to_collect); + vec_add1 (pm->events_to_collect, ec); + ec.name = "cpu-cycles"; + ec.pe_type = PERF_TYPE_HARDWARE; + ec.pe_config = PERF_COUNT_HW_CPU_CYCLES; + vec_add1 (pm->events_to_collect, ec); + } + else if (unformat (line_input, "branch-mispredict-rate")) + { + ec.name = "branch-misses"; + ec.pe_type = PERF_TYPE_HARDWARE; + ec.pe_config = PERF_COUNT_HW_BRANCH_MISSES; + pm->mispredict_event_index = vec_len (pm->events_to_collect); + vec_add1 (pm->events_to_collect, ec); + ec.name = "branches"; + ec.pe_type = PERF_TYPE_HARDWARE; + ec.pe_config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS; + vec_add1 (pm->events_to_collect, ec); + } + else if (unformat (line_input, "%U", unformat_processor_event, pm, &ec)) + { + vec_add1 (pm->events_to_collect, ec); + } +#define _(type,event,str) \ + else if (unformat (line_input, str)) \ + { \ + ec.name = str; \ + ec.pe_type = type; \ + ec.pe_config = event; \ + vec_add1 (pm->events_to_collect, ec); \ + } + foreach_perfmon_event +#undef _ + else + return clib_error_return (0, "unknown input '%U'", + format_unformat_error, line_input); + } + + if (vec_len (pm->events_to_collect) == 0) + return clib_error_return (0, "no events specified..."); + + vlib_cli_output (vm, "Start collection for %d events, wait %.2f seconds", + vec_len (pm->events_to_collect), + (f64) (vec_len (pm->events_to_collect)) + * pm->timeout_interval); + + vlib_process_signal_event (pm->vlib_main, perfmon_periodic_node.index, + PERFMON_START, 0); + + /* Coarse-grained wait */ + vlib_process_suspend (vm, + ((f64) (vec_len (pm->events_to_collect) + * pm->timeout_interval))); + + deadman = 0; + /* Reasonable to guess that collection may not be quite done... */ + while (pm->state == PERFMON_STATE_RUNNING) + { + vlib_process_suspend (vm, 10e-3); + if (deadman++ > 200) + { + vlib_cli_output (vm, "DEADMAN: collection still running..."); + break; + } + } + + vlib_cli_output (vm, "Data collection complete..."); + return 0; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (set_pmc_command, static) = +{ + .path = "set pmc", + .short_help = "set pmc c1 [..., use \"show pmc events\"]", + .function = set_pmc_command_fn, + .is_mp_safe = 1, +}; +/* *INDENT-ON* */ + +static int +capture_name_sort (void *a1, void *a2) +{ + perfmon_capture_t *c1 = a1; + perfmon_capture_t *c2 = a2; + + return strcmp ((char *) c1->thread_and_node_name, + (char *) c2->thread_and_node_name); +} + +static u8 * +format_capture (u8 * s, va_list * args) +{ + perfmon_main_t *pm = va_arg (*args, perfmon_main_t *); + perfmon_capture_t *c = va_arg (*args, perfmon_capture_t *); + int verbose __attribute__ ((unused)) = va_arg (*args, int); + f64 ticks_per_pkt; + int i; + + if (c == 0) + { + s = format (s, "%=40s%=20s%=16s%=16s%=16s", + "Name", "Counter", "Count", "Pkts", "Counts/Pkt"); + return s; + } + + for (i = 0; i < vec_len (c->counter_names); i++) + { + u8 *name; + + if (i == 0) + name = c->thread_and_node_name; + else + { + vec_add1 (s, '\n'); + name = (u8 *) ""; + } + + /* Deal with synthetic events right here */ + if (i == pm->ipc_event_index) + { + f64 ipc_rate; + ASSERT (i + 1 < vec_len (c->counter_names)); + + if (c->counter_values[i + 1] > 0) + ipc_rate = (f64) c->counter_values[i] + / (f64) c->counter_values[i + 1]; + else + ipc_rate = 0.0; + + s = format (s, "%-40s%+20s%+16llu%+16llu%+16.2e\n", + name, "instructions-per-clock", + c->counter_values[i], + c->counter_values[i + 1], ipc_rate); + name = (u8 *) ""; + } + + if (i == pm->mispredict_event_index) + { + f64 mispredict_rate; + ASSERT (i + 1 < vec_len (c->counter_names)); + + if (c->counter_values[i + 1] > 0) + mispredict_rate = (f64) c->counter_values[i] + / (f64) c->counter_values[i + 1]; + else + mispredict_rate = 0.0; + + s = format (s, "%-40s%+20s%+16llu%+16llu%+16.2e\n", + name, "branch-mispredict-rate", + c->counter_values[i], + c->counter_values[i + 1], mispredict_rate); + name = (u8 *) ""; + } + + if (c->vectors_this_counter[i]) + ticks_per_pkt = + ((f64) c->counter_values[i]) / ((f64) c->vectors_this_counter[i]); + else + ticks_per_pkt = 0.0; + + s = format (s, "%-40s%+20s%+16llu%+16llu%+16.2e", + name, c->counter_names[i], + c->counter_values[i], + c->vectors_this_counter[i], ticks_per_pkt); + } + return s; +} + +static u8 * +format_generic_events (u8 * s, va_list * args) +{ + int verbose = va_arg (*args, int); + +#define _(type,config,name) \ + if (verbose == 0) \ + s = format (s, "\n %s", name); \ + else \ + s = format (s, "\n %s (%d, %d)", name, type, config); + foreach_perfmon_event; +#undef _ + return s; +} + +typedef struct +{ + u8 *name; + name_value_pair_t **nvps; +} sort_nvp_t; + +static int +sort_nvps_by_name (void *a1, void *a2) +{ + sort_nvp_t *nvp1 = a1; + sort_nvp_t *nvp2 = a2; + + return strcmp ((char *) nvp1->name, (char *) nvp2->name); +} + +static u8 * +format_processor_events (u8 * s, va_list * args) +{ + perfmon_main_t *pm = va_arg (*args, perfmon_main_t *); + int verbose = va_arg (*args, int); + int i, j; + sort_nvp_t *sort_nvps = 0; + sort_nvp_t *sn; + u8 *key; + name_value_pair_t **value; + + /* *INDENT-OFF* */ + hash_foreach_mem (key, value, pm->perfmon_table, + ({ + vec_add2 (sort_nvps, sn, 1); + sn->name = key; + sn->nvps = value; + })); + + vec_sort_with_function (sort_nvps, sort_nvps_by_name); + + if (verbose == 0) + { + for (i = 0; i < vec_len (sort_nvps); i++) + s = format (s, "\n %s ", sort_nvps[i].name); + } + else + { + for (i = 0; i < vec_len (sort_nvps); i++) + { + name_value_pair_t **nvps; + s = format (s, "\n %s:", sort_nvps[i].name); + + nvps = sort_nvps[i].nvps; + + for (j = 0; j < vec_len (nvps); j++) + s = format (s, "\n %s = %s", nvps[j]->name, nvps[j]->value); + } + } + vec_free (sort_nvps); + return s; +} + + +static clib_error_t * +show_pmc_command_fn (vlib_main_t * vm, + unformat_input_t * input, vlib_cli_command_t * cmd) +{ + perfmon_main_t *pm = &perfmon_main; + int verbose = 0; + int events = 0; + int i; + perfmon_capture_t *c; + perfmon_capture_t *captures = 0; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "events")) + events = 1; + else if (unformat (input, "verbose")) + verbose = 1; + else + break; + } + + if (events) + { + vlib_cli_output (vm, "Generic Events %U", + format_generic_events, verbose); + vlib_cli_output (vm, "Synthetic Events"); + vlib_cli_output (vm, " instructions-per-clock"); + vlib_cli_output (vm, " branch-mispredict-rate"); + if (pm->perfmon_table) + vlib_cli_output (vm, "Processor Events %U", + format_processor_events, pm, verbose); + return 0; + } + + if (pm->state == PERFMON_STATE_RUNNING) + { + vlib_cli_output (vm, "Data collection in progress..."); + return 0; + } + + if (pool_elts (pm->capture_pool) == 0) + { + vlib_cli_output (vm, "No data..."); + return 0; + } + + /* *INDENT-OFF* */ + pool_foreach (c, pm->capture_pool, + ({ + vec_add1 (captures, *c); + })); + /* *INDENT-ON* */ + + vec_sort_with_function (captures, capture_name_sort); + + vlib_cli_output (vm, "%U", format_capture, pm, 0 /* header */ , + 0 /* verbose */ ); + + for (i = 0; i < vec_len (captures); i++) + { + c = captures + i; + + vlib_cli_output (vm, "%U", format_capture, pm, c, verbose); + } + + vec_free (captures); + + return 0; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (show_pmc_command, static) = +{ + .path = "show pmc", + .short_help = "show pmc [verbose]", + .function = show_pmc_command_fn, + .is_mp_safe = 1, +}; +/* *INDENT-ON* */ + +static clib_error_t * +clear_pmc_command_fn (vlib_main_t * vm, + unformat_input_t * input, vlib_cli_command_t * cmd) +{ + perfmon_main_t *pm = &perfmon_main; + u8 *key; + u32 *value; + + if (pm->state == PERFMON_STATE_RUNNING) + { + vlib_cli_output (vm, "Performance monitor is still running..."); + return 0; + } + + pool_free (pm->capture_pool); + + /* *INDENT-OFF* */ + hash_foreach_mem (key, value, pm->capture_by_thread_and_node_name, + ({ + vec_free (key); + })); + /* *INDENT-ON* */ + hash_free (pm->capture_by_thread_and_node_name); + pm->capture_by_thread_and_node_name = + hash_create_string (0, sizeof (uword)); + return 0; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (clear_pmc_command, static) = +{ + .path = "clear pmc", + .short_help = "clear the performance monitor counters", + .function = clear_pmc_command_fn, +}; +/* *INDENT-ON* */ + + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/perfmon/perfmon.h b/src/plugins/perfmon/perfmon.h new file mode 100644 index 00000000000..47ee471d5fc --- /dev/null +++ b/src/plugins/perfmon/perfmon.h @@ -0,0 +1,145 @@ +/* + * perfmon.h - performance monitor + * + * Copyright (c) 2018 Cisco Systems and/or its affiliates + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __included_perfmon_h__ +#define __included_perfmon_h__ + +#include <vnet/vnet.h> +#include <vnet/ip/ip.h> +#include <vnet/ethernet/ethernet.h> +#include <vlib/log.h> + +#include <vppinfra/hash.h> +#include <vppinfra/error.h> + +#include <linux/perf_event.h> + +#define foreach_perfmon_event \ +_(PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, "cpu-cycles") \ +_(PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, "instructions") \ +_(PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_REFERENCES, \ + "cache-references") \ +_(PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_MISSES, "cache-misses") \ +_(PERF_TYPE_HARDWARE, PERF_COUNT_HW_BRANCH_INSTRUCTIONS, "branches") \ + _(PERF_TYPE_HARDWARE, PERF_COUNT_HW_BRANCH_MISSES, "branch-misses") \ +_(PERF_TYPE_HARDWARE, PERF_COUNT_HW_BUS_CYCLES, "bus-cycles") \ +_(PERF_TYPE_HARDWARE, PERF_COUNT_HW_STALLED_CYCLES_FRONTEND, \ + "stall-frontend") \ +_(PERF_TYPE_HARDWARE, PERF_COUNT_HW_STALLED_CYCLES_BACKEND, \ + "stall-backend") \ +_(PERF_TYPE_HARDWARE, PERF_COUNT_HW_REF_CPU_CYCLES, "ref-cpu-cycles") \ +_(PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS, "page-faults") \ +_(PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CONTEXT_SWITCHES, "context-switches") \ +_(PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CPU_MIGRATIONS, "cpu-migrations") \ +_(PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS_MIN, "minor-pagefaults") \ +_(PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS_MAJ, "major-pagefaults") \ +_(PERF_TYPE_SOFTWARE, PERF_COUNT_SW_EMULATION_FAULTS, "emulation-faults") + +typedef struct +{ + char *name; + int pe_type; + int pe_config; +} perfmon_event_config_t; + +typedef enum +{ + PERFMON_STATE_OFF = 0, + PERFMON_STATE_RUNNING, +} perfmon_state_t; + +typedef struct +{ + u8 *thread_and_node_name; + u8 **counter_names; + u64 *counter_values; + u64 *vectors_this_counter; +} perfmon_capture_t; + +typedef struct +{ + u32 cpuid; + const char **table; +} perfmon_cpuid_and_table_t; + +typedef struct +{ + u8 *name; + u8 *value; +} name_value_pair_t; + +typedef struct +{ + /* API message ID base */ + u16 msg_id_base; + + /* on/off switch for the periodic function */ + volatile u8 state; + + /* capture pool, hash table */ + perfmon_capture_t *capture_pool; + uword *capture_by_thread_and_node_name; + + /* CPU-specific event tables, hash table of selected table (if any) */ + perfmon_cpuid_and_table_t *perfmon_tables; + uword *perfmon_table; + + /* vector of events to collect */ + perfmon_event_config_t *events_to_collect; + + /* Base indices of synthetic event tuples */ + u32 ipc_event_index; + u32 mispredict_event_index; + + /* Length of time to capture a single event */ + f64 timeout_interval; + + /* Current event (index) being collected */ + u32 current_event; + u32 *rdpmc_indices; + /* mmap base / size of (mapped) struct perf_event_mmap_page */ + u8 **perf_event_pages; + u32 page_size; + + /* Current perf_event file descriptors, per thread */ + int *pm_fds; + + /* Logging */ + vlib_log_class_t log_class; + + /* convenience */ + vlib_main_t *vlib_main; + vnet_main_t *vnet_main; + ethernet_main_t *ethernet_main; +} perfmon_main_t; + +extern perfmon_main_t perfmon_main; + +extern vlib_node_registration_t perfmon_periodic_node; +uword *perfmon_parse_table (perfmon_main_t * pm, char *path, char *filename); + +/* Periodic function events */ +#define PERFMON_START 1 + +#endif /* __included_perfmon_h__ */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/perfmon/perfmon_periodic.c b/src/plugins/perfmon/perfmon_periodic.c new file mode 100644 index 00000000000..329a75d7305 --- /dev/null +++ b/src/plugins/perfmon/perfmon_periodic.c @@ -0,0 +1,433 @@ +/* + * perfmon_periodic.c - skeleton plug-in periodic function + * + * Copyright (c) <current-year> <your-organization> + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vlib/vlib.h> +#include <vppinfra/error.h> +#include <perfmon/perfmon.h> +#include <asm/unistd.h> +#include <sys/ioctl.h> + +static long +perf_event_open (struct perf_event_attr *hw_event, pid_t pid, int cpu, + int group_fd, unsigned long flags) +{ + int ret; + + ret = syscall (__NR_perf_event_open, hw_event, pid, cpu, group_fd, flags); + return ret; +} + +static u64 +read_current_perf_counter (vlib_main_t * vm) +{ + if (vm->perf_counter_id) + return clib_rdpmc (vm->perf_counter_id); + else + { + u64 sw_value; + if (read (vm->perf_counter_fd, &sw_value, sizeof (sw_value)) != + sizeof (sw_value)) + { + clib_unix_warning ("counter read failed, disable collection..."); + vm->vlib_node_runtime_perf_counter_cb = 0; + return 0ULL; + } + return sw_value; + } +} + +static void +clear_counters (perfmon_main_t * pm) +{ + int i, j; + vlib_main_t *vm = pm->vlib_main; + vlib_main_t *stat_vm; + vlib_node_main_t *nm; + vlib_node_t *n; + + vlib_worker_thread_barrier_sync (vm); + + for (j = 0; j < vec_len (vlib_mains); j++) + { + stat_vm = vlib_mains[j]; + if (stat_vm == 0) + continue; + + nm = &stat_vm->node_main; + + /* Clear the node runtime perfmon counters */ + for (i = 0; i < vec_len (nm->nodes); i++) + { + n = nm->nodes[i]; + vlib_node_sync_stats (stat_vm, n); + } + + /* And clear the node perfmon counters */ + for (i = 0; i < vec_len (nm->nodes); i++) + { + n = nm->nodes[i]; + n->stats_total.perf_counter_ticks = 0; + n->stats_total.perf_counter_vectors = 0; + n->stats_last_clear.perf_counter_ticks = 0; + n->stats_last_clear.perf_counter_vectors = 0; + } + } + vlib_worker_thread_barrier_release (vm); +} + +static void +enable_current_event (perfmon_main_t * pm) +{ + struct perf_event_attr pe; + int fd; + struct perf_event_mmap_page *p = 0; + perfmon_event_config_t *c; + vlib_main_t *vm = vlib_get_main (); + u32 my_thread_index = vm->thread_index; + + c = vec_elt_at_index (pm->events_to_collect, pm->current_event); + + memset (&pe, 0, sizeof (struct perf_event_attr)); + pe.type = c->pe_type; + pe.size = sizeof (struct perf_event_attr); + pe.config = c->pe_config; + pe.disabled = 1; + pe.pinned = 1; + /* + * Note: excluding the kernel makes the + * (software) context-switch counter read 0... + */ + if (pe.type != PERF_TYPE_SOFTWARE) + { + /* Exclude kernel and hypervisor */ + pe.exclude_kernel = 1; + pe.exclude_hv = 1; + } + + fd = perf_event_open (&pe, 0, -1, -1, 0); + if (fd == -1) + { + clib_unix_warning ("event open: type %d config %d", c->pe_type, + c->pe_config); + return; + } + + if (pe.type != PERF_TYPE_SOFTWARE) + { + p = mmap (0, pm->page_size, PROT_READ, MAP_SHARED, fd, 0); + if (p == MAP_FAILED) + { + clib_unix_warning ("mmap"); + close (fd); + return; + } + } + + if (ioctl (fd, PERF_EVENT_IOC_RESET, 0) < 0) + clib_unix_warning ("reset ioctl"); + + if (ioctl (fd, PERF_EVENT_IOC_ENABLE, 0) < 0) + clib_unix_warning ("enable ioctl"); + + /* + * Software event counters - and others not capable of being + * read via the "rdpmc" instruction - will be read + * by system calls. + */ + if (pe.type == PERF_TYPE_SOFTWARE || p->cap_user_rdpmc == 0) + pm->rdpmc_indices[my_thread_index] = 0; + else /* use rdpmc instrs */ + pm->rdpmc_indices[my_thread_index] = p->index - 1; + pm->perf_event_pages[my_thread_index] = (void *) p; + + pm->pm_fds[my_thread_index] = fd; + + /* Enable the main loop counter snapshot mechanism */ + vm->perf_counter_id = pm->rdpmc_indices[my_thread_index]; + vm->perf_counter_fd = fd; + vm->vlib_node_runtime_perf_counter_cb = read_current_perf_counter; +} + +static void +disable_event (perfmon_main_t * pm) +{ + vlib_main_t *vm = vlib_get_main (); + u32 my_thread_index = vm->thread_index; + + if (pm->pm_fds[my_thread_index] == 0) + return; + + /* Stop main loop collection */ + vm->vlib_node_runtime_perf_counter_cb = 0; + + if (ioctl (pm->pm_fds[my_thread_index], PERF_EVENT_IOC_DISABLE, 0) < 0) + clib_unix_warning ("disable ioctl"); + + if (pm->perf_event_pages[my_thread_index]) + if (munmap (pm->perf_event_pages[my_thread_index], pm->page_size) < 0) + clib_unix_warning ("munmap"); + + (void) close (pm->pm_fds[my_thread_index]); + pm->pm_fds[my_thread_index] = 0; +} + +static void +worker_thread_start_event (vlib_main_t * vm) +{ + perfmon_main_t *pm = &perfmon_main; + + enable_current_event (pm); + vm->worker_thread_main_loop_callback = 0; +} + +static void +worker_thread_stop_event (vlib_main_t * vm) +{ + perfmon_main_t *pm = &perfmon_main; + disable_event (pm); + vm->worker_thread_main_loop_callback = 0; +} + +static void +start_event (perfmon_main_t * pm, f64 now, uword event_data) +{ + int i; + pm->current_event = 0; + if (vec_len (pm->events_to_collect) == 0) + { + pm->state = PERFMON_STATE_OFF; + return; + } + pm->state = PERFMON_STATE_RUNNING; + clear_counters (pm); + + /* Start collection on this thread */ + enable_current_event (pm); + + /* And also on worker threads */ + for (i = 1; i < vec_len (vlib_mains); i++) + { + if (vlib_mains[i] == 0) + continue; + vlib_mains[i]->worker_thread_main_loop_callback = (void *) + worker_thread_start_event; + } +} + +void +scrape_and_clear_counters (perfmon_main_t * pm) +{ + int i, j; + vlib_main_t *vm = pm->vlib_main; + vlib_main_t *stat_vm; + vlib_node_main_t *nm; + vlib_node_t ***node_dups = 0; + vlib_node_t **nodes; + vlib_node_t *n; + perfmon_capture_t *c; + perfmon_event_config_t *current_event; + uword *p; + u8 *counter_name; + u64 counter_value; + u64 vectors_this_counter; + + /* snapshoot the nodes, including pm counters */ + vlib_worker_thread_barrier_sync (vm); + + for (j = 0; j < vec_len (vlib_mains); j++) + { + stat_vm = vlib_mains[j]; + if (stat_vm == 0) + continue; + + nm = &stat_vm->node_main; + + for (i = 0; i < vec_len (nm->nodes); i++) + { + n = nm->nodes[i]; + vlib_node_sync_stats (stat_vm, n); + } + + nodes = 0; + vec_validate (nodes, vec_len (nm->nodes) - 1); + vec_add1 (node_dups, nodes); + + /* Snapshoot and clear the per-node perfmon counters */ + for (i = 0; i < vec_len (nm->nodes); i++) + { + n = nm->nodes[i]; + nodes[i] = clib_mem_alloc (sizeof (*n)); + clib_memcpy (nodes[i], n, sizeof (*n)); + n->stats_total.perf_counter_ticks = 0; + n->stats_total.perf_counter_vectors = 0; + n->stats_last_clear.perf_counter_ticks = 0; + n->stats_last_clear.perf_counter_vectors = 0; + } + } + + vlib_worker_thread_barrier_release (vm); + + current_event = pm->events_to_collect + pm->current_event; + + for (j = 0; j < vec_len (vlib_mains); j++) + { + stat_vm = vlib_mains[j]; + if (stat_vm == 0) + continue; + + nodes = node_dups[j]; + + for (i = 0; i < vec_len (nodes); i++) + { + u8 *capture_name; + + n = nodes[i]; + if (n->stats_total.perf_counter_ticks == 0) + { + clib_mem_free (n); + continue; + } + + capture_name = format (0, "t%d-%v%c", j, n->name, 0); + + p = hash_get_mem (pm->capture_by_thread_and_node_name, + capture_name); + + if (p == 0) + { + pool_get (pm->capture_pool, c); + memset (c, 0, sizeof (*c)); + c->thread_and_node_name = capture_name; + hash_set_mem (pm->capture_by_thread_and_node_name, + capture_name, c - pm->capture_pool); + } + else + c = pool_elt_at_index (pm->capture_pool, p[0]); + + /* Snapshoot counters, etc. into the capture */ + counter_name = (u8 *) current_event->name; + counter_value = n->stats_total.perf_counter_ticks - + n->stats_last_clear.perf_counter_ticks; + vectors_this_counter = n->stats_total.perf_counter_vectors - + n->stats_last_clear.perf_counter_vectors; + + vec_add1 (c->counter_names, counter_name); + vec_add1 (c->counter_values, counter_value); + vec_add1 (c->vectors_this_counter, vectors_this_counter); + clib_mem_free (n); + } + vec_free (nodes); + } + vec_free (node_dups); +} + +static void +handle_timeout (perfmon_main_t * pm, f64 now) +{ + int i; + disable_event (pm); + + /* And also on worker threads */ + for (i = 1; i < vec_len (vlib_mains); i++) + { + if (vlib_mains[i] == 0) + continue; + vlib_mains[i]->worker_thread_main_loop_callback = (void *) + worker_thread_stop_event; + } + + /* Short delay to make sure workers have stopped collection */ + if (i > 1) + vlib_process_suspend (pm->vlib_main, 1e-3); + scrape_and_clear_counters (pm); + pm->current_event++; + if (pm->current_event >= vec_len (pm->events_to_collect)) + { + pm->current_event = 0; + pm->state = PERFMON_STATE_OFF; + return; + } + enable_current_event (pm); + + /* And also on worker threads */ + for (i = 1; i < vec_len (vlib_mains); i++) + { + if (vlib_mains[i] == 0) + continue; + vlib_mains[i]->worker_thread_main_loop_callback = (void *) + worker_thread_start_event; + } +} + +static uword +perfmon_periodic_process (vlib_main_t * vm, + vlib_node_runtime_t * rt, vlib_frame_t * f) +{ + perfmon_main_t *pm = &perfmon_main; + f64 now; + uword *event_data = 0; + uword event_type; + int i; + + while (1) + { + if (pm->state == PERFMON_STATE_RUNNING) + vlib_process_wait_for_event_or_clock (vm, pm->timeout_interval); + else + vlib_process_wait_for_event (vm); + + now = vlib_time_now (vm); + + event_type = vlib_process_get_events (vm, (uword **) & event_data); + + switch (event_type) + { + case PERFMON_START: + for (i = 0; i < vec_len (event_data); i++) + start_event (pm, now, event_data[i]); + break; + + /* Handle timeout */ + case ~0: + handle_timeout (pm, now); + break; + + default: + clib_warning ("Unexpected event %d", event_type); + break; + } + vec_reset_length (event_data); + } + return 0; /* or not */ +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (perfmon_periodic_node) = +{ + .function = perfmon_periodic_process, + .type = VLIB_NODE_TYPE_PROCESS, + .name = "perfmon-periodic-process", +}; +/* *INDENT-ON* */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ |