summaryrefslogtreecommitdiffstats
path: root/src/plugins/perfmon
diff options
context:
space:
mode:
authorDave Barach <dave@barachs.net>2018-09-10 12:31:15 -0400
committerDamjan Marion <dmarion@me.com>2018-10-22 12:02:04 +0000
commit4d1a866aff6ceb03025990b6e60b42faf09ef486 (patch)
treebec495932876d9649f26179b4c24b6938be43f38 /src/plugins/perfmon
parent115a3ac59a16f9dcfee92eaecc79cd1fa3320e29 (diff)
X86_64 perf counter plugin
Change-Id: Ie5a00c15ee9536cc61afab57f6cadc1aa1972f3c Signed-off-by: Dave Barach <dave@barachs.net>
Diffstat (limited to 'src/plugins/perfmon')
-rw-r--r--src/plugins/perfmon/CMakeLists.txt38
-rw-r--r--src/plugins/perfmon/parse_util.c235
-rw-r--r--src/plugins/perfmon/perfmon.c615
-rw-r--r--src/plugins/perfmon/perfmon.h145
-rw-r--r--src/plugins/perfmon/perfmon_periodic.c433
5 files changed, 1466 insertions, 0 deletions
diff --git a/src/plugins/perfmon/CMakeLists.txt b/src/plugins/perfmon/CMakeLists.txt
new file mode 100644
index 00000000000..30e1f2caf3e
--- /dev/null
+++ b/src/plugins/perfmon/CMakeLists.txt
@@ -0,0 +1,38 @@
+# Copyright (c) 2018 Cisco and/or its affiliates.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+add_vpp_plugin(perfmon
+ SOURCES
+ perfmon.c
+ perfmon_periodic.c
+ parse_util.c
+)
+
+# Reenable / extend when .json file license issue fixed
+#
+# set (PERFMON_JSON_FILES
+# haswell_core_v28.json
+# haswellx_core_v20.json
+# ivybridge_core_v21.json
+# ivytown_core_v20.json
+# jaketown_core_v20.json
+# sandybridge_core_v16.json
+# skylake_core_v42.json
+# skylakex_core_v1.12.json
+# )
+
+# install(
+# FILES ${PERFMON_JSON_FILES}
+# DESTINATION share/vpp/plugins/perfmon
+# COMPONENT vpp-dev
+# )
diff --git a/src/plugins/perfmon/parse_util.c b/src/plugins/perfmon/parse_util.c
new file mode 100644
index 00000000000..436acaa6de0
--- /dev/null
+++ b/src/plugins/perfmon/parse_util.c
@@ -0,0 +1,235 @@
+/*
+ * parse_util.c - halfhearted json parser
+ *
+ * Copyright (c) 2018 Cisco Systems and/or its affiliates
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <perfmon/perfmon.h>
+#include <vppinfra/unix.h>
+
+typedef enum
+{
+ STATE_START,
+ STATE_READ_NAME,
+ STATE_READ_VALUE,
+} parse_state_t;
+
+static u8 *
+downcase (u8 * s)
+{
+ u8 *rv = 0;
+ u8 c;
+ int i;
+
+ for (i = 0; i < vec_len (s); i++)
+ {
+ c = s[i];
+ if (c >= 'A' && c <= 'Z')
+ c = c + ('a' - 'A');
+ vec_add1 (rv, c);
+ }
+ return (rv);
+}
+
+uword *
+perfmon_parse_table (perfmon_main_t * pm, char *path, char *table_name)
+{
+ u8 *cp;
+ u8 *event_name;
+ int state = STATE_START;
+ uword *ht;
+ name_value_pair_t *nvp = 0;
+ name_value_pair_t **nvps = 0;
+ u8 *v;
+ int i;
+ u8 *json_filename;
+ clib_error_t *error;
+
+ /* Create the name/value hash table in any case... */
+ ht = hash_create_string (0, sizeof (uword));
+
+ json_filename = format (0, "%s/%s%c", path, table_name, 0);
+
+ vlib_log_debug (pm->log_class, "Try to read perfmon events from %s",
+ json_filename);
+
+ error = unix_proc_file_contents ((char *) json_filename, &cp);
+
+ if (error)
+ {
+ vlib_log_err (pm->log_class,
+ "Failed to read CPU-specific counter table");
+ vlib_log_err (pm->log_class,
+ "Download from https://download.01.org/perfmon, "
+ "and install as %s", json_filename);
+ vec_free (json_filename);
+ clib_error_report (error);
+ return ht;
+ }
+ vlib_log_debug (pm->log_class, "Read OK, parse the event table...");
+ vec_free (json_filename);
+
+again:
+ while (*cp)
+ {
+ switch (state)
+ {
+ case STATE_START:
+ while (*cp && *cp != '{' && *cp != '}' && *cp != ',')
+ cp++;
+ if (*cp == 0)
+ goto done;
+
+ /* Look for a new event */
+ if (*cp == '{')
+ {
+ if (*cp == 0)
+ {
+ error:
+ clib_warning ("parse fail");
+ hash_free (ht);
+ return 0;
+ }
+ cp++;
+ state = STATE_READ_NAME;
+ goto again;
+ }
+ else if (*cp == '}') /* end of event */
+ {
+ /* Look for the "EventName" nvp */
+ for (i = 0; i < vec_len (nvps); i++)
+ {
+ nvp = nvps[i];
+ if (!strncmp ((char *) nvp->name, "EventName", 9))
+ {
+ event_name = nvp->value;
+ goto found;
+ }
+ }
+ /* no name? */
+ for (i = 0; i < vec_len (nvps); i++)
+ {
+ vec_free (nvps[i]->name);
+ vec_free (nvps[i]->value);
+ }
+ vec_free (nvps);
+ cp++;
+ goto again;
+
+ found:
+ event_name = downcase (event_name);
+ hash_set_mem (ht, event_name, nvps);
+ nvp = 0;
+ nvps = 0;
+ cp++;
+ goto again;
+ }
+ else if (*cp == ',') /* punctuation */
+ {
+ cp++;
+ goto again;
+ }
+
+ case STATE_READ_NAME:
+ vec_validate (nvp, 0);
+ v = 0;
+ while (*cp && *cp != '"')
+ cp++;
+
+ if (*cp == 0)
+ {
+ vec_free (nvp);
+ goto error;
+ }
+
+ cp++;
+ while (*cp && *cp != '"')
+ {
+ vec_add1 (v, *cp);
+ cp++;
+ }
+ if (*cp == 0)
+ {
+ vec_free (v);
+ goto error;
+ }
+ cp++;
+ vec_add1 (v, 0);
+ nvp->name = v;
+ state = STATE_READ_VALUE;
+ goto again;
+
+ case STATE_READ_VALUE:
+ while (*cp && *cp != ':')
+ cp++;
+ if (*cp == 0)
+ {
+ vec_free (nvp->name);
+ goto error;
+ }
+ while (*cp && *cp != '"')
+ cp++;
+ if (*cp == 0)
+ {
+ vec_free (nvp->name);
+ goto error;
+ }
+ else
+ cp++;
+ v = 0;
+ while (*cp && *cp != '"')
+ {
+ vec_add1 (v, *cp);
+ cp++;
+ }
+ if (*cp == 0)
+ {
+ vec_free (nvp->name);
+ vec_free (v);
+ goto error;
+ }
+ vec_add1 (v, 0);
+ nvp->value = v;
+ vec_add1 (nvps, nvp);
+ while (*cp && *cp != ',' && *cp != '}')
+ cp++;
+ if (*cp == 0)
+ {
+ vec_free (nvp->name);
+ vec_free (nvp->value);
+ goto error;
+ }
+ else if (*cp == '}')
+ state = STATE_START;
+ else
+ {
+ cp++;
+ state = STATE_READ_NAME;
+ }
+ nvp = 0;
+ goto again;
+ }
+ }
+
+done:
+ return (ht);
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/perfmon/perfmon.c b/src/plugins/perfmon/perfmon.c
new file mode 100644
index 00000000000..61dbe5cd918
--- /dev/null
+++ b/src/plugins/perfmon/perfmon.c
@@ -0,0 +1,615 @@
+/*
+ * perfmon.c - skeleton vpp engine plug-in
+ *
+ * Copyright (c) <current-year> <your-organization>
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/plugin/plugin.h>
+#include <perfmon/perfmon.h>
+
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+#include <vpp/app/version.h>
+#include <linux/limits.h>
+
+perfmon_main_t perfmon_main;
+
+static char *perfmon_json_path = "/usr/share/vpp/plugins/perfmon";
+
+static void
+set_perfmon_json_path ()
+{
+ char *p, path[PATH_MAX];
+ int rv;
+ u8 *s;
+
+ /* find executable path */
+ if ((rv = readlink ("/proc/self/exe", path, PATH_MAX - 1)) == -1)
+ return;
+
+ /* readlink doesn't provide null termination */
+ path[rv] = 0;
+
+ /* strip filename */
+ if ((p = strrchr (path, '/')) == 0)
+ return;
+ *p = 0;
+
+ /* strip bin/ */
+ if ((p = strrchr (path, '/')) == 0)
+ return;
+ *p = 0;
+
+ /* cons up the .json file path */
+ s = format (0, "%s/share/vpp/plugins/perfmon", path);
+ vec_add1 (s, 0);
+ perfmon_json_path = (char *) s;
+}
+
+#define foreach_cpuid_table \
+_(0x0306C3, haswell_core_v28.json) \
+_(0x0306F2, haswell_core_v28.json) \
+_(0x0406E3, skylake_core_v42.json) \
+_(0x0506E3, skylake_core_v42.json)
+
+static inline u32
+get_cpuid (void)
+{
+#if defined(__x86_64__)
+ u32 cpuid;
+ asm volatile ("mov $1, %%eax; cpuid; mov %%eax, %0":"=r" (cpuid)::"%eax",
+ "%edx", "%ecx", "%rbx");
+ return cpuid;
+#else
+ return 0;
+#endif
+}
+
+static clib_error_t *
+perfmon_init (vlib_main_t * vm)
+{
+ perfmon_main_t *pm = &perfmon_main;
+ clib_error_t *error = 0;
+ u32 cpuid;
+ uword *ht;
+ int found_a_table = 0;
+
+ pm->vlib_main = vm;
+ pm->vnet_main = vnet_get_main ();
+
+ pm->capture_by_thread_and_node_name =
+ hash_create_string (0, sizeof (uword));
+
+ pm->log_class = vlib_log_register_class ("perfmon", 0);
+
+ /* Default data collection interval */
+ pm->timeout_interval = 3.0;
+ vec_validate (pm->pm_fds, vec_len (vlib_mains) - 1);
+ vec_validate (pm->perf_event_pages, vec_len (vlib_mains) - 1);
+ vec_validate (pm->rdpmc_indices, vec_len (vlib_mains) - 1);
+ pm->page_size = getpagesize ();
+
+ ht = pm->perfmon_table = 0;
+
+ set_perfmon_json_path ();
+
+ cpuid = get_cpuid ();
+
+ if (0)
+ {
+ }
+#define _(id,table) \
+ else if (cpuid == id) \
+ { \
+ vlib_log_debug (pm->log_class, "Found table %s", #table); \
+ ht = perfmon_parse_table (pm, perfmon_json_path, #table); \
+ found_a_table = 1; \
+ }
+ foreach_cpuid_table;
+#undef _
+
+ pm->perfmon_table = ht;
+
+ if (found_a_table == 0)
+ vlib_log_err (pm->log_class, "No table for cpuid %x", cpuid);
+
+ return error;
+}
+
+VLIB_INIT_FUNCTION (perfmon_init);
+
+/* *INDENT-OFF* */
+VLIB_PLUGIN_REGISTER () =
+{
+ .version = VPP_BUILD_VER,
+ .description = "Performance monitor plugin",
+#if !defined(__x86_64__)
+ .default_disabled = 1,
+#endif
+};
+/* *INDENT-ON* */
+
+static uword
+atox (u8 * s)
+{
+ uword rv = 0;
+
+ while (*s)
+ {
+ if (*s >= '0' && *s <= '9')
+ rv = (rv << 4) | (*s - '0');
+ else if (*s >= 'a' && *s <= 'f')
+ rv = (rv << 4) | (*s - 'a' + 10);
+ else if (*s >= 'A' && *s <= 'A')
+ rv = (rv << 4) | (*s - 'A' + 10);
+ else if (*s == 'x')
+ ;
+ else
+ break;
+ s++;
+ }
+ return rv;
+}
+
+static uword
+unformat_processor_event (unformat_input_t * input, va_list * args)
+{
+ perfmon_main_t *pm = va_arg (*args, perfmon_main_t *);
+ perfmon_event_config_t *ep = va_arg (*args, perfmon_event_config_t *);
+ u8 *s = 0;
+ name_value_pair_t **nvps, *nvp;
+ hash_pair_t *hp;
+ int i;
+ int set_values = 0;
+ u32 pe_config = 0;
+
+ if (pm->perfmon_table == 0)
+ return 0;
+
+ if (!unformat (input, "%s", &s))
+ return 0;
+
+ hp = hash_get_pair_mem (pm->perfmon_table, s);
+
+ vec_free (s);
+
+ if (hp == 0)
+ return 0;
+
+ nvps = (name_value_pair_t **) (hp->value[0]);
+
+ for (i = 0; i < vec_len (nvps); i++)
+ {
+ nvp = nvps[i];
+ if (!strncmp ((char *) nvp->name, "EventCode", 9))
+ {
+ pe_config |= atox (nvp->value);
+ set_values++;
+ }
+ else if (!strncmp ((char *) nvp->name, "UMask", 5))
+ {
+ pe_config |= (atox (nvp->value) << 8);
+ set_values++;
+ }
+ if (set_values == 2)
+ break;
+ }
+
+ if (set_values != 2)
+ {
+ clib_warning ("BUG: only found %d values", set_values);
+ return 0;
+ }
+
+ ep->name = (char *) hp->key;
+ ep->pe_type = PERF_TYPE_RAW;
+ ep->pe_config = pe_config;
+ return 1;
+}
+
+static clib_error_t *
+set_pmc_command_fn (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ perfmon_main_t *pm = &perfmon_main;
+ unformat_input_t _line_input, *line_input = &_line_input;
+ perfmon_event_config_t ec;
+ u32 timeout_seconds;
+ u32 deadman;
+
+ vec_reset_length (pm->events_to_collect);
+ pm->ipc_event_index = ~0;
+ pm->mispredict_event_index = ~0;
+
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return clib_error_return (0, "counter names required...");
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "timeout %u", &timeout_seconds))
+ pm->timeout_interval = (f64) timeout_seconds;
+ else if (unformat (line_input, "instructions-per-clock"))
+ {
+ ec.name = "instructions";
+ ec.pe_type = PERF_TYPE_HARDWARE;
+ ec.pe_config = PERF_COUNT_HW_INSTRUCTIONS;
+ pm->ipc_event_index = vec_len (pm->events_to_collect);
+ vec_add1 (pm->events_to_collect, ec);
+ ec.name = "cpu-cycles";
+ ec.pe_type = PERF_TYPE_HARDWARE;
+ ec.pe_config = PERF_COUNT_HW_CPU_CYCLES;
+ vec_add1 (pm->events_to_collect, ec);
+ }
+ else if (unformat (line_input, "branch-mispredict-rate"))
+ {
+ ec.name = "branch-misses";
+ ec.pe_type = PERF_TYPE_HARDWARE;
+ ec.pe_config = PERF_COUNT_HW_BRANCH_MISSES;
+ pm->mispredict_event_index = vec_len (pm->events_to_collect);
+ vec_add1 (pm->events_to_collect, ec);
+ ec.name = "branches";
+ ec.pe_type = PERF_TYPE_HARDWARE;
+ ec.pe_config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS;
+ vec_add1 (pm->events_to_collect, ec);
+ }
+ else if (unformat (line_input, "%U", unformat_processor_event, pm, &ec))
+ {
+ vec_add1 (pm->events_to_collect, ec);
+ }
+#define _(type,event,str) \
+ else if (unformat (line_input, str)) \
+ { \
+ ec.name = str; \
+ ec.pe_type = type; \
+ ec.pe_config = event; \
+ vec_add1 (pm->events_to_collect, ec); \
+ }
+ foreach_perfmon_event
+#undef _
+ else
+ return clib_error_return (0, "unknown input '%U'",
+ format_unformat_error, line_input);
+ }
+
+ if (vec_len (pm->events_to_collect) == 0)
+ return clib_error_return (0, "no events specified...");
+
+ vlib_cli_output (vm, "Start collection for %d events, wait %.2f seconds",
+ vec_len (pm->events_to_collect),
+ (f64) (vec_len (pm->events_to_collect))
+ * pm->timeout_interval);
+
+ vlib_process_signal_event (pm->vlib_main, perfmon_periodic_node.index,
+ PERFMON_START, 0);
+
+ /* Coarse-grained wait */
+ vlib_process_suspend (vm,
+ ((f64) (vec_len (pm->events_to_collect)
+ * pm->timeout_interval)));
+
+ deadman = 0;
+ /* Reasonable to guess that collection may not be quite done... */
+ while (pm->state == PERFMON_STATE_RUNNING)
+ {
+ vlib_process_suspend (vm, 10e-3);
+ if (deadman++ > 200)
+ {
+ vlib_cli_output (vm, "DEADMAN: collection still running...");
+ break;
+ }
+ }
+
+ vlib_cli_output (vm, "Data collection complete...");
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (set_pmc_command, static) =
+{
+ .path = "set pmc",
+ .short_help = "set pmc c1 [..., use \"show pmc events\"]",
+ .function = set_pmc_command_fn,
+ .is_mp_safe = 1,
+};
+/* *INDENT-ON* */
+
+static int
+capture_name_sort (void *a1, void *a2)
+{
+ perfmon_capture_t *c1 = a1;
+ perfmon_capture_t *c2 = a2;
+
+ return strcmp ((char *) c1->thread_and_node_name,
+ (char *) c2->thread_and_node_name);
+}
+
+static u8 *
+format_capture (u8 * s, va_list * args)
+{
+ perfmon_main_t *pm = va_arg (*args, perfmon_main_t *);
+ perfmon_capture_t *c = va_arg (*args, perfmon_capture_t *);
+ int verbose __attribute__ ((unused)) = va_arg (*args, int);
+ f64 ticks_per_pkt;
+ int i;
+
+ if (c == 0)
+ {
+ s = format (s, "%=40s%=20s%=16s%=16s%=16s",
+ "Name", "Counter", "Count", "Pkts", "Counts/Pkt");
+ return s;
+ }
+
+ for (i = 0; i < vec_len (c->counter_names); i++)
+ {
+ u8 *name;
+
+ if (i == 0)
+ name = c->thread_and_node_name;
+ else
+ {
+ vec_add1 (s, '\n');
+ name = (u8 *) "";
+ }
+
+ /* Deal with synthetic events right here */
+ if (i == pm->ipc_event_index)
+ {
+ f64 ipc_rate;
+ ASSERT (i + 1 < vec_len (c->counter_names));
+
+ if (c->counter_values[i + 1] > 0)
+ ipc_rate = (f64) c->counter_values[i]
+ / (f64) c->counter_values[i + 1];
+ else
+ ipc_rate = 0.0;
+
+ s = format (s, "%-40s%+20s%+16llu%+16llu%+16.2e\n",
+ name, "instructions-per-clock",
+ c->counter_values[i],
+ c->counter_values[i + 1], ipc_rate);
+ name = (u8 *) "";
+ }
+
+ if (i == pm->mispredict_event_index)
+ {
+ f64 mispredict_rate;
+ ASSERT (i + 1 < vec_len (c->counter_names));
+
+ if (c->counter_values[i + 1] > 0)
+ mispredict_rate = (f64) c->counter_values[i]
+ / (f64) c->counter_values[i + 1];
+ else
+ mispredict_rate = 0.0;
+
+ s = format (s, "%-40s%+20s%+16llu%+16llu%+16.2e\n",
+ name, "branch-mispredict-rate",
+ c->counter_values[i],
+ c->counter_values[i + 1], mispredict_rate);
+ name = (u8 *) "";
+ }
+
+ if (c->vectors_this_counter[i])
+ ticks_per_pkt =
+ ((f64) c->counter_values[i]) / ((f64) c->vectors_this_counter[i]);
+ else
+ ticks_per_pkt = 0.0;
+
+ s = format (s, "%-40s%+20s%+16llu%+16llu%+16.2e",
+ name, c->counter_names[i],
+ c->counter_values[i],
+ c->vectors_this_counter[i], ticks_per_pkt);
+ }
+ return s;
+}
+
+static u8 *
+format_generic_events (u8 * s, va_list * args)
+{
+ int verbose = va_arg (*args, int);
+
+#define _(type,config,name) \
+ if (verbose == 0) \
+ s = format (s, "\n %s", name); \
+ else \
+ s = format (s, "\n %s (%d, %d)", name, type, config);
+ foreach_perfmon_event;
+#undef _
+ return s;
+}
+
+typedef struct
+{
+ u8 *name;
+ name_value_pair_t **nvps;
+} sort_nvp_t;
+
+static int
+sort_nvps_by_name (void *a1, void *a2)
+{
+ sort_nvp_t *nvp1 = a1;
+ sort_nvp_t *nvp2 = a2;
+
+ return strcmp ((char *) nvp1->name, (char *) nvp2->name);
+}
+
+static u8 *
+format_processor_events (u8 * s, va_list * args)
+{
+ perfmon_main_t *pm = va_arg (*args, perfmon_main_t *);
+ int verbose = va_arg (*args, int);
+ int i, j;
+ sort_nvp_t *sort_nvps = 0;
+ sort_nvp_t *sn;
+ u8 *key;
+ name_value_pair_t **value;
+
+ /* *INDENT-OFF* */
+ hash_foreach_mem (key, value, pm->perfmon_table,
+ ({
+ vec_add2 (sort_nvps, sn, 1);
+ sn->name = key;
+ sn->nvps = value;
+ }));
+
+ vec_sort_with_function (sort_nvps, sort_nvps_by_name);
+
+ if (verbose == 0)
+ {
+ for (i = 0; i < vec_len (sort_nvps); i++)
+ s = format (s, "\n %s ", sort_nvps[i].name);
+ }
+ else
+ {
+ for (i = 0; i < vec_len (sort_nvps); i++)
+ {
+ name_value_pair_t **nvps;
+ s = format (s, "\n %s:", sort_nvps[i].name);
+
+ nvps = sort_nvps[i].nvps;
+
+ for (j = 0; j < vec_len (nvps); j++)
+ s = format (s, "\n %s = %s", nvps[j]->name, nvps[j]->value);
+ }
+ }
+ vec_free (sort_nvps);
+ return s;
+}
+
+
+static clib_error_t *
+show_pmc_command_fn (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ perfmon_main_t *pm = &perfmon_main;
+ int verbose = 0;
+ int events = 0;
+ int i;
+ perfmon_capture_t *c;
+ perfmon_capture_t *captures = 0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "events"))
+ events = 1;
+ else if (unformat (input, "verbose"))
+ verbose = 1;
+ else
+ break;
+ }
+
+ if (events)
+ {
+ vlib_cli_output (vm, "Generic Events %U",
+ format_generic_events, verbose);
+ vlib_cli_output (vm, "Synthetic Events");
+ vlib_cli_output (vm, " instructions-per-clock");
+ vlib_cli_output (vm, " branch-mispredict-rate");
+ if (pm->perfmon_table)
+ vlib_cli_output (vm, "Processor Events %U",
+ format_processor_events, pm, verbose);
+ return 0;
+ }
+
+ if (pm->state == PERFMON_STATE_RUNNING)
+ {
+ vlib_cli_output (vm, "Data collection in progress...");
+ return 0;
+ }
+
+ if (pool_elts (pm->capture_pool) == 0)
+ {
+ vlib_cli_output (vm, "No data...");
+ return 0;
+ }
+
+ /* *INDENT-OFF* */
+ pool_foreach (c, pm->capture_pool,
+ ({
+ vec_add1 (captures, *c);
+ }));
+ /* *INDENT-ON* */
+
+ vec_sort_with_function (captures, capture_name_sort);
+
+ vlib_cli_output (vm, "%U", format_capture, pm, 0 /* header */ ,
+ 0 /* verbose */ );
+
+ for (i = 0; i < vec_len (captures); i++)
+ {
+ c = captures + i;
+
+ vlib_cli_output (vm, "%U", format_capture, pm, c, verbose);
+ }
+
+ vec_free (captures);
+
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (show_pmc_command, static) =
+{
+ .path = "show pmc",
+ .short_help = "show pmc [verbose]",
+ .function = show_pmc_command_fn,
+ .is_mp_safe = 1,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+clear_pmc_command_fn (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ perfmon_main_t *pm = &perfmon_main;
+ u8 *key;
+ u32 *value;
+
+ if (pm->state == PERFMON_STATE_RUNNING)
+ {
+ vlib_cli_output (vm, "Performance monitor is still running...");
+ return 0;
+ }
+
+ pool_free (pm->capture_pool);
+
+ /* *INDENT-OFF* */
+ hash_foreach_mem (key, value, pm->capture_by_thread_and_node_name,
+ ({
+ vec_free (key);
+ }));
+ /* *INDENT-ON* */
+ hash_free (pm->capture_by_thread_and_node_name);
+ pm->capture_by_thread_and_node_name =
+ hash_create_string (0, sizeof (uword));
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (clear_pmc_command, static) =
+{
+ .path = "clear pmc",
+ .short_help = "clear the performance monitor counters",
+ .function = clear_pmc_command_fn,
+};
+/* *INDENT-ON* */
+
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/perfmon/perfmon.h b/src/plugins/perfmon/perfmon.h
new file mode 100644
index 00000000000..47ee471d5fc
--- /dev/null
+++ b/src/plugins/perfmon/perfmon.h
@@ -0,0 +1,145 @@
+/*
+ * perfmon.h - performance monitor
+ *
+ * Copyright (c) 2018 Cisco Systems and/or its affiliates
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __included_perfmon_h__
+#define __included_perfmon_h__
+
+#include <vnet/vnet.h>
+#include <vnet/ip/ip.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vlib/log.h>
+
+#include <vppinfra/hash.h>
+#include <vppinfra/error.h>
+
+#include <linux/perf_event.h>
+
+#define foreach_perfmon_event \
+_(PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, "cpu-cycles") \
+_(PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, "instructions") \
+_(PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_REFERENCES, \
+ "cache-references") \
+_(PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_MISSES, "cache-misses") \
+_(PERF_TYPE_HARDWARE, PERF_COUNT_HW_BRANCH_INSTRUCTIONS, "branches") \
+ _(PERF_TYPE_HARDWARE, PERF_COUNT_HW_BRANCH_MISSES, "branch-misses") \
+_(PERF_TYPE_HARDWARE, PERF_COUNT_HW_BUS_CYCLES, "bus-cycles") \
+_(PERF_TYPE_HARDWARE, PERF_COUNT_HW_STALLED_CYCLES_FRONTEND, \
+ "stall-frontend") \
+_(PERF_TYPE_HARDWARE, PERF_COUNT_HW_STALLED_CYCLES_BACKEND, \
+ "stall-backend") \
+_(PERF_TYPE_HARDWARE, PERF_COUNT_HW_REF_CPU_CYCLES, "ref-cpu-cycles") \
+_(PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS, "page-faults") \
+_(PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CONTEXT_SWITCHES, "context-switches") \
+_(PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CPU_MIGRATIONS, "cpu-migrations") \
+_(PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS_MIN, "minor-pagefaults") \
+_(PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS_MAJ, "major-pagefaults") \
+_(PERF_TYPE_SOFTWARE, PERF_COUNT_SW_EMULATION_FAULTS, "emulation-faults")
+
+typedef struct
+{
+ char *name;
+ int pe_type;
+ int pe_config;
+} perfmon_event_config_t;
+
+typedef enum
+{
+ PERFMON_STATE_OFF = 0,
+ PERFMON_STATE_RUNNING,
+} perfmon_state_t;
+
+typedef struct
+{
+ u8 *thread_and_node_name;
+ u8 **counter_names;
+ u64 *counter_values;
+ u64 *vectors_this_counter;
+} perfmon_capture_t;
+
+typedef struct
+{
+ u32 cpuid;
+ const char **table;
+} perfmon_cpuid_and_table_t;
+
+typedef struct
+{
+ u8 *name;
+ u8 *value;
+} name_value_pair_t;
+
+typedef struct
+{
+ /* API message ID base */
+ u16 msg_id_base;
+
+ /* on/off switch for the periodic function */
+ volatile u8 state;
+
+ /* capture pool, hash table */
+ perfmon_capture_t *capture_pool;
+ uword *capture_by_thread_and_node_name;
+
+ /* CPU-specific event tables, hash table of selected table (if any) */
+ perfmon_cpuid_and_table_t *perfmon_tables;
+ uword *perfmon_table;
+
+ /* vector of events to collect */
+ perfmon_event_config_t *events_to_collect;
+
+ /* Base indices of synthetic event tuples */
+ u32 ipc_event_index;
+ u32 mispredict_event_index;
+
+ /* Length of time to capture a single event */
+ f64 timeout_interval;
+
+ /* Current event (index) being collected */
+ u32 current_event;
+ u32 *rdpmc_indices;
+ /* mmap base / size of (mapped) struct perf_event_mmap_page */
+ u8 **perf_event_pages;
+ u32 page_size;
+
+ /* Current perf_event file descriptors, per thread */
+ int *pm_fds;
+
+ /* Logging */
+ vlib_log_class_t log_class;
+
+ /* convenience */
+ vlib_main_t *vlib_main;
+ vnet_main_t *vnet_main;
+ ethernet_main_t *ethernet_main;
+} perfmon_main_t;
+
+extern perfmon_main_t perfmon_main;
+
+extern vlib_node_registration_t perfmon_periodic_node;
+uword *perfmon_parse_table (perfmon_main_t * pm, char *path, char *filename);
+
+/* Periodic function events */
+#define PERFMON_START 1
+
+#endif /* __included_perfmon_h__ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/perfmon/perfmon_periodic.c b/src/plugins/perfmon/perfmon_periodic.c
new file mode 100644
index 00000000000..329a75d7305
--- /dev/null
+++ b/src/plugins/perfmon/perfmon_periodic.c
@@ -0,0 +1,433 @@
+/*
+ * perfmon_periodic.c - skeleton plug-in periodic function
+ *
+ * Copyright (c) <current-year> <your-organization>
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/vlib.h>
+#include <vppinfra/error.h>
+#include <perfmon/perfmon.h>
+#include <asm/unistd.h>
+#include <sys/ioctl.h>
+
+static long
+perf_event_open (struct perf_event_attr *hw_event, pid_t pid, int cpu,
+ int group_fd, unsigned long flags)
+{
+ int ret;
+
+ ret = syscall (__NR_perf_event_open, hw_event, pid, cpu, group_fd, flags);
+ return ret;
+}
+
+static u64
+read_current_perf_counter (vlib_main_t * vm)
+{
+ if (vm->perf_counter_id)
+ return clib_rdpmc (vm->perf_counter_id);
+ else
+ {
+ u64 sw_value;
+ if (read (vm->perf_counter_fd, &sw_value, sizeof (sw_value)) !=
+ sizeof (sw_value))
+ {
+ clib_unix_warning ("counter read failed, disable collection...");
+ vm->vlib_node_runtime_perf_counter_cb = 0;
+ return 0ULL;
+ }
+ return sw_value;
+ }
+}
+
+static void
+clear_counters (perfmon_main_t * pm)
+{
+ int i, j;
+ vlib_main_t *vm = pm->vlib_main;
+ vlib_main_t *stat_vm;
+ vlib_node_main_t *nm;
+ vlib_node_t *n;
+
+ vlib_worker_thread_barrier_sync (vm);
+
+ for (j = 0; j < vec_len (vlib_mains); j++)
+ {
+ stat_vm = vlib_mains[j];
+ if (stat_vm == 0)
+ continue;
+
+ nm = &stat_vm->node_main;
+
+ /* Clear the node runtime perfmon counters */
+ for (i = 0; i < vec_len (nm->nodes); i++)
+ {
+ n = nm->nodes[i];
+ vlib_node_sync_stats (stat_vm, n);
+ }
+
+ /* And clear the node perfmon counters */
+ for (i = 0; i < vec_len (nm->nodes); i++)
+ {
+ n = nm->nodes[i];
+ n->stats_total.perf_counter_ticks = 0;
+ n->stats_total.perf_counter_vectors = 0;
+ n->stats_last_clear.perf_counter_ticks = 0;
+ n->stats_last_clear.perf_counter_vectors = 0;
+ }
+ }
+ vlib_worker_thread_barrier_release (vm);
+}
+
+static void
+enable_current_event (perfmon_main_t * pm)
+{
+ struct perf_event_attr pe;
+ int fd;
+ struct perf_event_mmap_page *p = 0;
+ perfmon_event_config_t *c;
+ vlib_main_t *vm = vlib_get_main ();
+ u32 my_thread_index = vm->thread_index;
+
+ c = vec_elt_at_index (pm->events_to_collect, pm->current_event);
+
+ memset (&pe, 0, sizeof (struct perf_event_attr));
+ pe.type = c->pe_type;
+ pe.size = sizeof (struct perf_event_attr);
+ pe.config = c->pe_config;
+ pe.disabled = 1;
+ pe.pinned = 1;
+ /*
+ * Note: excluding the kernel makes the
+ * (software) context-switch counter read 0...
+ */
+ if (pe.type != PERF_TYPE_SOFTWARE)
+ {
+ /* Exclude kernel and hypervisor */
+ pe.exclude_kernel = 1;
+ pe.exclude_hv = 1;
+ }
+
+ fd = perf_event_open (&pe, 0, -1, -1, 0);
+ if (fd == -1)
+ {
+ clib_unix_warning ("event open: type %d config %d", c->pe_type,
+ c->pe_config);
+ return;
+ }
+
+ if (pe.type != PERF_TYPE_SOFTWARE)
+ {
+ p = mmap (0, pm->page_size, PROT_READ, MAP_SHARED, fd, 0);
+ if (p == MAP_FAILED)
+ {
+ clib_unix_warning ("mmap");
+ close (fd);
+ return;
+ }
+ }
+
+ if (ioctl (fd, PERF_EVENT_IOC_RESET, 0) < 0)
+ clib_unix_warning ("reset ioctl");
+
+ if (ioctl (fd, PERF_EVENT_IOC_ENABLE, 0) < 0)
+ clib_unix_warning ("enable ioctl");
+
+ /*
+ * Software event counters - and others not capable of being
+ * read via the "rdpmc" instruction - will be read
+ * by system calls.
+ */
+ if (pe.type == PERF_TYPE_SOFTWARE || p->cap_user_rdpmc == 0)
+ pm->rdpmc_indices[my_thread_index] = 0;
+ else /* use rdpmc instrs */
+ pm->rdpmc_indices[my_thread_index] = p->index - 1;
+ pm->perf_event_pages[my_thread_index] = (void *) p;
+
+ pm->pm_fds[my_thread_index] = fd;
+
+ /* Enable the main loop counter snapshot mechanism */
+ vm->perf_counter_id = pm->rdpmc_indices[my_thread_index];
+ vm->perf_counter_fd = fd;
+ vm->vlib_node_runtime_perf_counter_cb = read_current_perf_counter;
+}
+
+static void
+disable_event (perfmon_main_t * pm)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ u32 my_thread_index = vm->thread_index;
+
+ if (pm->pm_fds[my_thread_index] == 0)
+ return;
+
+ /* Stop main loop collection */
+ vm->vlib_node_runtime_perf_counter_cb = 0;
+
+ if (ioctl (pm->pm_fds[my_thread_index], PERF_EVENT_IOC_DISABLE, 0) < 0)
+ clib_unix_warning ("disable ioctl");
+
+ if (pm->perf_event_pages[my_thread_index])
+ if (munmap (pm->perf_event_pages[my_thread_index], pm->page_size) < 0)
+ clib_unix_warning ("munmap");
+
+ (void) close (pm->pm_fds[my_thread_index]);
+ pm->pm_fds[my_thread_index] = 0;
+}
+
+static void
+worker_thread_start_event (vlib_main_t * vm)
+{
+ perfmon_main_t *pm = &perfmon_main;
+
+ enable_current_event (pm);
+ vm->worker_thread_main_loop_callback = 0;
+}
+
+static void
+worker_thread_stop_event (vlib_main_t * vm)
+{
+ perfmon_main_t *pm = &perfmon_main;
+ disable_event (pm);
+ vm->worker_thread_main_loop_callback = 0;
+}
+
+static void
+start_event (perfmon_main_t * pm, f64 now, uword event_data)
+{
+ int i;
+ pm->current_event = 0;
+ if (vec_len (pm->events_to_collect) == 0)
+ {
+ pm->state = PERFMON_STATE_OFF;
+ return;
+ }
+ pm->state = PERFMON_STATE_RUNNING;
+ clear_counters (pm);
+
+ /* Start collection on this thread */
+ enable_current_event (pm);
+
+ /* And also on worker threads */
+ for (i = 1; i < vec_len (vlib_mains); i++)
+ {
+ if (vlib_mains[i] == 0)
+ continue;
+ vlib_mains[i]->worker_thread_main_loop_callback = (void *)
+ worker_thread_start_event;
+ }
+}
+
+void
+scrape_and_clear_counters (perfmon_main_t * pm)
+{
+ int i, j;
+ vlib_main_t *vm = pm->vlib_main;
+ vlib_main_t *stat_vm;
+ vlib_node_main_t *nm;
+ vlib_node_t ***node_dups = 0;
+ vlib_node_t **nodes;
+ vlib_node_t *n;
+ perfmon_capture_t *c;
+ perfmon_event_config_t *current_event;
+ uword *p;
+ u8 *counter_name;
+ u64 counter_value;
+ u64 vectors_this_counter;
+
+ /* snapshoot the nodes, including pm counters */
+ vlib_worker_thread_barrier_sync (vm);
+
+ for (j = 0; j < vec_len (vlib_mains); j++)
+ {
+ stat_vm = vlib_mains[j];
+ if (stat_vm == 0)
+ continue;
+
+ nm = &stat_vm->node_main;
+
+ for (i = 0; i < vec_len (nm->nodes); i++)
+ {
+ n = nm->nodes[i];
+ vlib_node_sync_stats (stat_vm, n);
+ }
+
+ nodes = 0;
+ vec_validate (nodes, vec_len (nm->nodes) - 1);
+ vec_add1 (node_dups, nodes);
+
+ /* Snapshoot and clear the per-node perfmon counters */
+ for (i = 0; i < vec_len (nm->nodes); i++)
+ {
+ n = nm->nodes[i];
+ nodes[i] = clib_mem_alloc (sizeof (*n));
+ clib_memcpy (nodes[i], n, sizeof (*n));
+ n->stats_total.perf_counter_ticks = 0;
+ n->stats_total.perf_counter_vectors = 0;
+ n->stats_last_clear.perf_counter_ticks = 0;
+ n->stats_last_clear.perf_counter_vectors = 0;
+ }
+ }
+
+ vlib_worker_thread_barrier_release (vm);
+
+ current_event = pm->events_to_collect + pm->current_event;
+
+ for (j = 0; j < vec_len (vlib_mains); j++)
+ {
+ stat_vm = vlib_mains[j];
+ if (stat_vm == 0)
+ continue;
+
+ nodes = node_dups[j];
+
+ for (i = 0; i < vec_len (nodes); i++)
+ {
+ u8 *capture_name;
+
+ n = nodes[i];
+ if (n->stats_total.perf_counter_ticks == 0)
+ {
+ clib_mem_free (n);
+ continue;
+ }
+
+ capture_name = format (0, "t%d-%v%c", j, n->name, 0);
+
+ p = hash_get_mem (pm->capture_by_thread_and_node_name,
+ capture_name);
+
+ if (p == 0)
+ {
+ pool_get (pm->capture_pool, c);
+ memset (c, 0, sizeof (*c));
+ c->thread_and_node_name = capture_name;
+ hash_set_mem (pm->capture_by_thread_and_node_name,
+ capture_name, c - pm->capture_pool);
+ }
+ else
+ c = pool_elt_at_index (pm->capture_pool, p[0]);
+
+ /* Snapshoot counters, etc. into the capture */
+ counter_name = (u8 *) current_event->name;
+ counter_value = n->stats_total.perf_counter_ticks -
+ n->stats_last_clear.perf_counter_ticks;
+ vectors_this_counter = n->stats_total.perf_counter_vectors -
+ n->stats_last_clear.perf_counter_vectors;
+
+ vec_add1 (c->counter_names, counter_name);
+ vec_add1 (c->counter_values, counter_value);
+ vec_add1 (c->vectors_this_counter, vectors_this_counter);
+ clib_mem_free (n);
+ }
+ vec_free (nodes);
+ }
+ vec_free (node_dups);
+}
+
+static void
+handle_timeout (perfmon_main_t * pm, f64 now)
+{
+ int i;
+ disable_event (pm);
+
+ /* And also on worker threads */
+ for (i = 1; i < vec_len (vlib_mains); i++)
+ {
+ if (vlib_mains[i] == 0)
+ continue;
+ vlib_mains[i]->worker_thread_main_loop_callback = (void *)
+ worker_thread_stop_event;
+ }
+
+ /* Short delay to make sure workers have stopped collection */
+ if (i > 1)
+ vlib_process_suspend (pm->vlib_main, 1e-3);
+ scrape_and_clear_counters (pm);
+ pm->current_event++;
+ if (pm->current_event >= vec_len (pm->events_to_collect))
+ {
+ pm->current_event = 0;
+ pm->state = PERFMON_STATE_OFF;
+ return;
+ }
+ enable_current_event (pm);
+
+ /* And also on worker threads */
+ for (i = 1; i < vec_len (vlib_mains); i++)
+ {
+ if (vlib_mains[i] == 0)
+ continue;
+ vlib_mains[i]->worker_thread_main_loop_callback = (void *)
+ worker_thread_start_event;
+ }
+}
+
+static uword
+perfmon_periodic_process (vlib_main_t * vm,
+ vlib_node_runtime_t * rt, vlib_frame_t * f)
+{
+ perfmon_main_t *pm = &perfmon_main;
+ f64 now;
+ uword *event_data = 0;
+ uword event_type;
+ int i;
+
+ while (1)
+ {
+ if (pm->state == PERFMON_STATE_RUNNING)
+ vlib_process_wait_for_event_or_clock (vm, pm->timeout_interval);
+ else
+ vlib_process_wait_for_event (vm);
+
+ now = vlib_time_now (vm);
+
+ event_type = vlib_process_get_events (vm, (uword **) & event_data);
+
+ switch (event_type)
+ {
+ case PERFMON_START:
+ for (i = 0; i < vec_len (event_data); i++)
+ start_event (pm, now, event_data[i]);
+ break;
+
+ /* Handle timeout */
+ case ~0:
+ handle_timeout (pm, now);
+ break;
+
+ default:
+ clib_warning ("Unexpected event %d", event_type);
+ break;
+ }
+ vec_reset_length (event_data);
+ }
+ return 0; /* or not */
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (perfmon_periodic_node) =
+{
+ .function = perfmon_periodic_process,
+ .type = VLIB_NODE_TYPE_PROCESS,
+ .name = "perfmon-periodic-process",
+};
+/* *INDENT-ON* */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */