X86_64 perf counter plugin

Change-Id: Ie5a00c15ee9536cc61afab57f6cadc1aa1972f3c Signed-off-by: Dave Barach <dave@barachs.net>
author: Dave Barach <dave@barachs.net> 2018-09-10 12:31:15 -0400
committer: Damjan Marion <dmarion@me.com> 2018-10-22 12:02:04 +0000
commit: 4d1a866aff6ceb03025990b6e60b42faf09ef486 (patch)
tree: bec495932876d9649f26179b4c24b6938be43f38 /src
parent: 115a3ac59a16f9dcfee92eaecc79cd1fa3320e29 (diff)
12 files changed, 1627 insertions, 26 deletions
diff --git a/src/plugins/perfmon/CMakeLists.txt b/src/plugins/perfmon/CMakeLists.txt
new file mode 100644
index 00000000000..30e1f2caf3e
--- /dev/null
+++ b/src/plugins/perfmon/CMakeLists.txt
@@ -0,0 +1,38 @@
+# Copyright (c) 2018 Cisco and/or its affiliates.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+add_vpp_plugin(perfmon
+  SOURCES
+  perfmon.c
+  perfmon_periodic.c
+  parse_util.c
+)
+
+# Reenable / extend when .json file license issue fixed
+#
+# set (PERFMON_JSON_FILES
+#  haswell_core_v28.json
+#  haswellx_core_v20.json
+#  ivybridge_core_v21.json
+#  ivytown_core_v20.json
+#  jaketown_core_v20.json
+#  sandybridge_core_v16.json
+#  skylake_core_v42.json
+#  skylakex_core_v1.12.json
+# )
+
+# install(
+#  FILES ${PERFMON_JSON_FILES}
+#  DESTINATION share/vpp/plugins/perfmon
+#  COMPONENT vpp-dev
+# )
diff --git a/src/plugins/perfmon/parse_util.c b/src/plugins/perfmon/parse_util.c
new file mode 100644
index 00000000000..436acaa6de0
--- /dev/null
+++ b/src/plugins/perfmon/parse_util.c
@@ -0,0 +1,235 @@
+/*
+ * parse_util.c - halfhearted json parser
+ *
+ * Copyright (c) 2018 Cisco Systems and/or its affiliates
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <perfmon/perfmon.h>
+#include <vppinfra/unix.h>
+
+typedef enum
+{
+  STATE_START,
+  STATE_READ_NAME,
+  STATE_READ_VALUE,
+} parse_state_t;
+
+static u8 *
+downcase (u8 * s)
+{
+  u8 *rv = 0;
+  u8 c;
+  int i;
+
+  for (i = 0; i < vec_len (s); i++)
+    {
+      c = s[i];
+      if (c >= 'A' && c <= 'Z')
+	c = c + ('a' - 'A');
+      vec_add1 (rv, c);
+    }
+  return (rv);
+}
+
+uword *
+perfmon_parse_table (perfmon_main_t * pm, char *path, char *table_name)
+{
+  u8 *cp;
+  u8 *event_name;
+  int state = STATE_START;
+  uword *ht;
+  name_value_pair_t *nvp = 0;
+  name_value_pair_t **nvps = 0;
+  u8 *v;
+  int i;
+  u8 *json_filename;
+  clib_error_t *error;
+
+  /* Create the name/value hash table in any case... */
+  ht = hash_create_string (0, sizeof (uword));
+
+  json_filename = format (0, "%s/%s%c", path, table_name, 0);
+
+  vlib_log_debug (pm->log_class, "Try to read perfmon events from %s",
+		  json_filename);
+
+  error = unix_proc_file_contents ((char *) json_filename, &cp);
+
+  if (error)
+    {
+      vlib_log_err (pm->log_class,
+		    "Failed to read CPU-specific counter table");
+      vlib_log_err (pm->log_class,
+		    "Download from https://download.01.org/perfmon, "
+		    "and install as %s", json_filename);
+      vec_free (json_filename);
+      clib_error_report (error);
+      return ht;
+    }
+  vlib_log_debug (pm->log_class, "Read OK, parse the event table...");
+  vec_free (json_filename);
+
+again:
+  while (*cp)
+    {
+      switch (state)
+	{
+	case STATE_START:
+	  while (*cp && *cp != '{' && *cp != '}' && *cp != ',')
+	    cp++;
+	  if (*cp == 0)
+	    goto done;
+
+	  /* Look for a new event */
+	  if (*cp == '{')
+	    {
+	      if (*cp == 0)
+		{
+		error:
+		  clib_warning ("parse fail");
+		  hash_free (ht);
+		  return 0;
+		}
+	      cp++;
+	      state = STATE_READ_NAME;
+	      goto again;
+	    }
+	  else if (*cp == '}')	/* end of event */
+	    {
+	      /* Look for the "EventName" nvp */
+	      for (i = 0; i < vec_len (nvps); i++)
+		{
+		  nvp = nvps[i];
+		  if (!strncmp ((char *) nvp->name, "EventName", 9))
+		    {
+		      event_name = nvp->value;
+		      goto found;
+		    }
+		}
+	      /* no name? */
+	      for (i = 0; i < vec_len (nvps); i++)
+		{
+		  vec_free (nvps[i]->name);
+		  vec_free (nvps[i]->value);
+		}
+	      vec_free (nvps);
+	      cp++;
+	      goto again;
+
+	    found:
+	      event_name = downcase (event_name);
+	      hash_set_mem (ht, event_name, nvps);
+	      nvp = 0;
+	      nvps = 0;
+	      cp++;
+	      goto again;
+	    }
+	  else if (*cp == ',')	/* punctuation */
+	    {
+	      cp++;
+	      goto again;
+	    }
+
+	case STATE_READ_NAME:
+	  vec_validate (nvp, 0);
+	  v = 0;
+	  while (*cp && *cp != '"')
+	    cp++;
+
+	  if (*cp == 0)
+	    {
+	      vec_free (nvp);
+	      goto error;
+	    }
+
+	  cp++;
+	  while (*cp && *cp != '"')
+	    {
+	      vec_add1 (v, *cp);
+	      cp++;
+	    }
+	  if (*cp == 0)
+	    {
+	      vec_free (v);
+	      goto error;
+	    }
+	  cp++;
+	  vec_add1 (v, 0);
+	  nvp->name = v;
+	  state = STATE_READ_VALUE;
+	  goto again;
+
+	case STATE_READ_VALUE:
+	  while (*cp && *cp != ':')
+	    cp++;
+	  if (*cp == 0)
+	    {
+	      vec_free (nvp->name);
+	      goto error;
+	    }
+	  while (*cp && *cp != '"')
+	    cp++;
+	  if (*cp == 0)
+	    {
+	      vec_free (nvp->name);
+	      goto error;
+	    }
+	  else
+	    cp++;
+	  v = 0;
+	  while (*cp && *cp != '"')
+	    {
+	      vec_add1 (v, *cp);
+	      cp++;
+	    }
+	  if (*cp == 0)
+	    {
+	      vec_free (nvp->name);
+	      vec_free (v);
+	      goto error;
+	    }
+	  vec_add1 (v, 0);
+	  nvp->value = v;
+	  vec_add1 (nvps, nvp);
+	  while (*cp && *cp != ',' && *cp != '}')
+	    cp++;
+	  if (*cp == 0)
+	    {
+	      vec_free (nvp->name);
+	      vec_free (nvp->value);
+	      goto error;
+	    }
+	  else if (*cp == '}')
+	    state = STATE_START;
+	  else
+	    {
+	      cp++;
+	      state = STATE_READ_NAME;
+	    }
+	  nvp = 0;
+	  goto again;
+	}
+    }
+
+done:
+  return (ht);
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/perfmon/perfmon.c b/src/plugins/perfmon/perfmon.c
new file mode 100644
index 00000000000..61dbe5cd918
--- /dev/null
+++ b/src/plugins/perfmon/perfmon.c
@@ -0,0 +1,615 @@
+/*
+ * perfmon.c - skeleton vpp engine plug-in
+ *
+ * Copyright (c) <current-year> <your-organization>
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/plugin/plugin.h>
+#include <perfmon/perfmon.h>
+
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+#include <vpp/app/version.h>
+#include <linux/limits.h>
+
+perfmon_main_t perfmon_main;
+
+static char *perfmon_json_path = "/usr/share/vpp/plugins/perfmon";
+
+static void
+set_perfmon_json_path ()
+{
+  char *p, path[PATH_MAX];
+  int rv;
+  u8 *s;
+
+  /* find executable path */
+  if ((rv = readlink ("/proc/self/exe", path, PATH_MAX - 1)) == -1)
+    return;
+
+  /* readlink doesn't provide null termination */
+  path[rv] = 0;
+
+  /* strip filename */
+  if ((p = strrchr (path, '/')) == 0)
+    return;
+  *p = 0;
+
+  /* strip bin/ */
+  if ((p = strrchr (path, '/')) == 0)
+    return;
+  *p = 0;
+
+  /* cons up the .json file path */
+  s = format (0, "%s/share/vpp/plugins/perfmon", path);
+  vec_add1 (s, 0);
+  perfmon_json_path = (char *) s;
+}
+
+#define foreach_cpuid_table                     \
+_(0x0306C3, haswell_core_v28.json)              \
+_(0x0306F2, haswell_core_v28.json)              \
+_(0x0406E3, skylake_core_v42.json)              \
+_(0x0506E3, skylake_core_v42.json)
+
+static inline u32
+get_cpuid (void)
+{
+#if defined(__x86_64__)
+  u32 cpuid;
+  asm volatile ("mov $1, %%eax; cpuid; mov %%eax, %0":"=r" (cpuid)::"%eax",
+		"%edx", "%ecx", "%rbx");
+  return cpuid;
+#else
+  return 0;
+#endif
+}
+
+static clib_error_t *
+perfmon_init (vlib_main_t * vm)
+{
+  perfmon_main_t *pm = &perfmon_main;
+  clib_error_t *error = 0;
+  u32 cpuid;
+  uword *ht;
+  int found_a_table = 0;
+
+  pm->vlib_main = vm;
+  pm->vnet_main = vnet_get_main ();
+
+  pm->capture_by_thread_and_node_name =
+    hash_create_string (0, sizeof (uword));
+
+  pm->log_class = vlib_log_register_class ("perfmon", 0);
+
+  /* Default data collection interval */
+  pm->timeout_interval = 3.0;
+  vec_validate (pm->pm_fds, vec_len (vlib_mains) - 1);
+  vec_validate (pm->perf_event_pages, vec_len (vlib_mains) - 1);
+  vec_validate (pm->rdpmc_indices, vec_len (vlib_mains) - 1);
+  pm->page_size = getpagesize ();
+
+  ht = pm->perfmon_table = 0;
+
+  set_perfmon_json_path ();
+
+  cpuid = get_cpuid ();
+
+  if (0)
+    {
+    }
+#define _(id,table)                                             \
+  else if (cpuid == id)                                         \
+    {                                                           \
+      vlib_log_debug (pm->log_class, "Found table %s", #table); \
+      ht = perfmon_parse_table (pm, perfmon_json_path, #table); \
+      found_a_table = 1;                                        \
+    }
+  foreach_cpuid_table;
+#undef _
+
+  pm->perfmon_table = ht;
+
+  if (found_a_table == 0)
+    vlib_log_err (pm->log_class, "No table for cpuid %x", cpuid);
+
+  return error;
+}
+
+VLIB_INIT_FUNCTION (perfmon_init);
+
+/* *INDENT-OFF* */
+VLIB_PLUGIN_REGISTER () =
+{
+  .version = VPP_BUILD_VER,
+  .description = "Performance monitor plugin",
+#if !defined(__x86_64__)
+  .default_disabled = 1,
+#endif
+};
+/* *INDENT-ON* */
+
+static uword
+atox (u8 * s)
+{
+  uword rv = 0;
+
+  while (*s)
+    {
+      if (*s >= '0' && *s <= '9')
+	rv = (rv << 4) | (*s - '0');
+      else if (*s >= 'a' && *s <= 'f')
+	rv = (rv << 4) | (*s - 'a' + 10);
+      else if (*s >= 'A' && *s <= 'A')
+	rv = (rv << 4) | (*s - 'A' + 10);
+      else if (*s == 'x')
+	;
+      else
+	break;
+      s++;
+    }
+  return rv;
+}
+
+static uword
+unformat_processor_event (unformat_input_t * input, va_list * args)
+{
+  perfmon_main_t *pm = va_arg (*args, perfmon_main_t *);
+  perfmon_event_config_t *ep = va_arg (*args, perfmon_event_config_t *);
+  u8 *s = 0;
+  name_value_pair_t **nvps, *nvp;
+  hash_pair_t *hp;
+  int i;
+  int set_values = 0;
+  u32 pe_config = 0;
+
+  if (pm->perfmon_table == 0)
+    return 0;
+
+  if (!unformat (input, "%s", &s))
+    return 0;
+
+  hp = hash_get_pair_mem (pm->perfmon_table, s);
+
+  vec_free (s);
+
+  if (hp == 0)
+    return 0;
+
+  nvps = (name_value_pair_t **) (hp->value[0]);
+
+  for (i = 0; i < vec_len (nvps); i++)
+    {
+      nvp = nvps[i];
+      if (!strncmp ((char *) nvp->name, "EventCode", 9))
+	{
+	  pe_config |= atox (nvp->value);
+	  set_values++;
+	}
+      else if (!strncmp ((char *) nvp->name, "UMask", 5))
+	{
+	  pe_config |= (atox (nvp->value) << 8);
+	  set_values++;
+	}
+      if (set_values == 2)
+	break;
+    }
+
+  if (set_values != 2)
+    {
+      clib_warning ("BUG: only found %d values", set_values);
+      return 0;
+    }
+
+  ep->name = (char *) hp->key;
+  ep->pe_type = PERF_TYPE_RAW;
+  ep->pe_config = pe_config;
+  return 1;
+}
+
+static clib_error_t *
+set_pmc_command_fn (vlib_main_t * vm,
+		    unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+  perfmon_main_t *pm = &perfmon_main;
+  unformat_input_t _line_input, *line_input = &_line_input;
+  perfmon_event_config_t ec;
+  u32 timeout_seconds;
+  u32 deadman;
+
+  vec_reset_length (pm->events_to_collect);
+  pm->ipc_event_index = ~0;
+  pm->mispredict_event_index = ~0;
+
+  if (!unformat_user (input, unformat_line_input, line_input))
+    return clib_error_return (0, "counter names required...");
+
+  while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+    {
+      if (unformat (line_input, "timeout %u", &timeout_seconds))
+	pm->timeout_interval = (f64) timeout_seconds;
+      else if (unformat (line_input, "instructions-per-clock"))
+	{
+	  ec.name = "instructions";
+	  ec.pe_type = PERF_TYPE_HARDWARE;
+	  ec.pe_config = PERF_COUNT_HW_INSTRUCTIONS;
+	  pm->ipc_event_index = vec_len (pm->events_to_collect);
+	  vec_add1 (pm->events_to_collect, ec);
+	  ec.name = "cpu-cycles";
+	  ec.pe_type = PERF_TYPE_HARDWARE;
+	  ec.pe_config = PERF_COUNT_HW_CPU_CYCLES;
+	  vec_add1 (pm->events_to_collect, ec);
+	}
+      else if (unformat (line_input, "branch-mispredict-rate"))
+	{
+	  ec.name = "branch-misses";
+	  ec.pe_type = PERF_TYPE_HARDWARE;
+	  ec.pe_config = PERF_COUNT_HW_BRANCH_MISSES;
+	  pm->mispredict_event_index = vec_len (pm->events_to_collect);
+	  vec_add1 (pm->events_to_collect, ec);
+	  ec.name = "branches";
+	  ec.pe_type = PERF_TYPE_HARDWARE;
+	  ec.pe_config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS;
+	  vec_add1 (pm->events_to_collect, ec);
+	}
+      else if (unformat (line_input, "%U", unformat_processor_event, pm, &ec))
+	{
+	  vec_add1 (pm->events_to_collect, ec);
+	}
+#define _(type,event,str)                       \
+      else if (unformat (line_input, str))      \
+        {                                       \
+          ec.name = str;                        \
+          ec.pe_type = type;                    \
+          ec.pe_config = event;                 \
+          vec_add1 (pm->events_to_collect, ec); \
+        }
+      foreach_perfmon_event
+#undef _
+	else
+	return clib_error_return (0, "unknown input '%U'",
+				  format_unformat_error, line_input);
+    }
+
+  if (vec_len (pm->events_to_collect) == 0)
+    return clib_error_return (0, "no events specified...");
+
+  vlib_cli_output (vm, "Start collection for %d events, wait %.2f seconds",
+		   vec_len (pm->events_to_collect),
+		   (f64) (vec_len (pm->events_to_collect))
+		   * pm->timeout_interval);
+
+  vlib_process_signal_event (pm->vlib_main, perfmon_periodic_node.index,
+			     PERFMON_START, 0);
+
+  /* Coarse-grained wait */
+  vlib_process_suspend (vm,
+			((f64) (vec_len (pm->events_to_collect)
+				* pm->timeout_interval)));
+
+  deadman = 0;
+  /* Reasonable to guess that collection may not be quite done... */
+  while (pm->state == PERFMON_STATE_RUNNING)
+    {
+      vlib_process_suspend (vm, 10e-3);
+      if (deadman++ > 200)
+	{
+	  vlib_cli_output (vm, "DEADMAN: collection still running...");
+	  break;
+	}
+    }
+
+  vlib_cli_output (vm, "Data collection complete...");
+  return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (set_pmc_command, static) =
+{
+  .path = "set pmc",
+  .short_help = "set pmc c1 [..., use \"show pmc events\"]",
+  .function = set_pmc_command_fn,
+  .is_mp_safe = 1,
+};
+/* *INDENT-ON* */
+
+static int
+capture_name_sort (void *a1, void *a2)
+{
+  perfmon_capture_t *c1 = a1;
+  perfmon_capture_t *c2 = a2;
+
+  return strcmp ((char *) c1->thread_and_node_name,
+		 (char *) c2->thread_and_node_name);
+}
+
+static u8 *
+format_capture (u8 * s, va_list * args)
+{
+  perfmon_main_t *pm = va_arg (*args, perfmon_main_t *);
+  perfmon_capture_t *c = va_arg (*args, perfmon_capture_t *);
+  int verbose __attribute__ ((unused)) = va_arg (*args, int);
+  f64 ticks_per_pkt;
+  int i;
+
+  if (c == 0)
+    {
+      s = format (s, "%=40s%=20s%=16s%=16s%=16s",
+		  "Name", "Counter", "Count", "Pkts", "Counts/Pkt");
+      return s;
+    }
+
+  for (i = 0; i < vec_len (c->counter_names); i++)
+    {
+      u8 *name;
+
+      if (i == 0)
+	name = c->thread_and_node_name;
+      else
+	{
+	  vec_add1 (s, '\n');
+	  name = (u8 *) "";
+	}
+
+      /* Deal with synthetic events right here */
+      if (i == pm->ipc_event_index)
+	{
+	  f64 ipc_rate;
+	  ASSERT (i + 1 < vec_len (c->counter_names));
+
+	  if (c->counter_values[i + 1] > 0)
+	    ipc_rate = (f64) c->counter_values[i]
+	      / (f64) c->counter_values[i + 1];
+	  else
+	    ipc_rate = 0.0;
+
+	  s = format (s, "%-40s%+20s%+16llu%+16llu%+16.2e\n",
+		      name, "instructions-per-clock",
+		      c->counter_values[i],
+		      c->counter_values[i + 1], ipc_rate);
+	  name = (u8 *) "";
+	}
+
+      if (i == pm->mispredict_event_index)
+	{
+	  f64 mispredict_rate;
+	  ASSERT (i + 1 < vec_len (c->counter_names));
+
+	  if (c->counter_values[i + 1] > 0)
+	    mispredict_rate = (f64) c->counter_values[i]
+	      / (f64) c->counter_values[i + 1];
+	  else
+	    mispredict_rate = 0.0;
+
+	  s = format (s, "%-40s%+20s%+16llu%+16llu%+16.2e\n",
+		      name, "branch-mispredict-rate",
+		      c->counter_values[i],
+		      c->counter_values[i + 1], mispredict_rate);
+	  name = (u8 *) "";
+	}
+
+      if (c->vectors_this_counter[i])
+	ticks_per_pkt =
+	  ((f64) c->counter_values[i]) / ((f64) c->vectors_this_counter[i]);
+      else
+	ticks_per_pkt = 0.0;
+
+      s = format (s, "%-40s%+20s%+16llu%+16llu%+16.2e",
+		  name, c->counter_names[i],
+		  c->counter_values[i],
+		  c->vectors_this_counter[i], ticks_per_pkt);
+    }
+  return s;
+}
+
+static u8 *
+format_generic_events (u8 * s, va_list * args)
+{
+  int verbose = va_arg (*args, int);
+
+#define _(type,config,name)                             \
+  if (verbose == 0)                                     \
+    s = format (s, "\n  %s", name);                     \
+  else                                                  \
+    s = format (s, "\n  %s (%d, %d)", name, type, config);
+  foreach_perfmon_event;
+#undef _
+  return s;
+}
+
+typedef struct
+{
+  u8 *name;
+  name_value_pair_t **nvps;
+} sort_nvp_t;
+
+static int
+sort_nvps_by_name (void *a1, void *a2)
+{
+  sort_nvp_t *nvp1 = a1;
+  sort_nvp_t *nvp2 = a2;
+
+  return strcmp ((char *) nvp1->name, (char *) nvp2->name);
+}
+
+static u8 *
+format_processor_events (u8 * s, va_list * args)
+{
+  perfmon_main_t *pm = va_arg (*args, perfmon_main_t *);
+  int verbose = va_arg (*args, int);
+  int i, j;
+  sort_nvp_t *sort_nvps = 0;
+  sort_nvp_t *sn;
+  u8 *key;
+  name_value_pair_t **value;
+
+  /* *INDENT-OFF* */
+  hash_foreach_mem (key, value, pm->perfmon_table,
+  ({
+    vec_add2 (sort_nvps, sn, 1);
+    sn->name = key;
+    sn->nvps = value;
+  }));
+
+  vec_sort_with_function (sort_nvps, sort_nvps_by_name);
+
+  if (verbose == 0)
+    {
+      for (i = 0; i < vec_len (sort_nvps); i++)
+        s = format (s, "\n  %s ", sort_nvps[i].name);
+    }
+  else
+    {
+      for (i = 0; i < vec_len (sort_nvps); i++)
+        {
+          name_value_pair_t **nvps;
+          s = format (s, "\n  %s:", sort_nvps[i].name);
+
+          nvps = sort_nvps[i].nvps;
+
+          for (j = 0; j < vec_len (nvps); j++)
+            s = format (s, "\n    %s = %s", nvps[j]->name, nvps[j]->value);
+        }
+    }
+  vec_free (sort_nvps);
+  return s;
+}
+
+
+static clib_error_t *
+show_pmc_command_fn (vlib_main_t * vm,
+		     unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+  perfmon_main_t *pm = &perfmon_main;
+  int verbose = 0;
+  int events = 0;
+  int i;
+  perfmon_capture_t *c;
+  perfmon_capture_t *captures = 0;
+
+  while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+    {
+      if (unformat (input, "events"))
+        events = 1;
+      else if (unformat (input, "verbose"))
+        verbose = 1;
+      else
+	break;
+    }
+
+  if (events)
+    {
+      vlib_cli_output (vm, "Generic Events %U",
+                       format_generic_events, verbose);
+      vlib_cli_output (vm, "Synthetic Events");
+      vlib_cli_output (vm, "  instructions-per-clock");
+      vlib_cli_output (vm, "  branch-mispredict-rate");
+      if (pm->perfmon_table)
+        vlib_cli_output (vm, "Processor Events %U",
+                         format_processor_events, pm, verbose);
+      return 0;
+    }
+
+  if (pm->state == PERFMON_STATE_RUNNING)
+    {
+      vlib_cli_output (vm, "Data collection in progress...");
+      return 0;
+    }
+
+  if (pool_elts (pm->capture_pool) == 0)
+    {
+      vlib_cli_output (vm, "No data...");
+      return 0;
+    }
+
+  /* *INDENT-OFF* */
+  pool_foreach (c, pm->capture_pool,
+  ({
+    vec_add1 (captures, *c);
+  }));
+  /* *INDENT-ON* */
+
+  vec_sort_with_function (captures, capture_name_sort);
+
+  vlib_cli_output (vm, "%U", format_capture, pm, 0 /* header */ ,
+		   0 /* verbose */ );
+
+  for (i = 0; i < vec_len (captures); i++)
+    {
+      c = captures + i;
+
+      vlib_cli_output (vm, "%U", format_capture, pm, c, verbose);
+    }
+
+  vec_free (captures);
+
+  return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (show_pmc_command, static) =
+{
+  .path = "show pmc",
+  .short_help = "show pmc [verbose]",
+  .function = show_pmc_command_fn,
+  .is_mp_safe = 1,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+clear_pmc_command_fn (vlib_main_t * vm,
+		      unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+  perfmon_main_t *pm = &perfmon_main;
+  u8 *key;
+  u32 *value;
+
+  if (pm->state == PERFMON_STATE_RUNNING)
+    {
+      vlib_cli_output (vm, "Performance monitor is still running...");
+      return 0;
+    }
+
+  pool_free (pm->capture_pool);
+
+  /* *INDENT-OFF* */
+  hash_foreach_mem (key, value, pm->capture_by_thread_and_node_name,
+  ({
+    vec_free (key);
+  }));
+  /* *INDENT-ON* */
+  hash_free (pm->capture_by_thread_and_node_name);
+  pm->capture_by_thread_and_node_name =
+    hash_create_string (0, sizeof (uword));
+  return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (clear_pmc_command, static) =
+{
+  .path = "clear pmc",
+  .short_help = "clear the performance monitor counters",
+  .function = clear_pmc_command_fn,
+};
+/* *INDENT-ON* */
+
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/perfmon/perfmon.h b/src/plugins/perfmon/perfmon.h
new file mode 100644
index 00000000000..47ee471d5fc
--- /dev/null
+++ b/src/plugins/perfmon/perfmon.h
@@ -0,0 +1,145 @@
+/*
+ * perfmon.h - performance monitor
+ *
+ * Copyright (c) 2018 Cisco Systems and/or its affiliates
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __included_perfmon_h__
+#define __included_perfmon_h__
+
+#include <vnet/vnet.h>
+#include <vnet/ip/ip.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vlib/log.h>
+
+#include <vppinfra/hash.h>
+#include <vppinfra/error.h>
+
+#include <linux/perf_event.h>
+
+#define foreach_perfmon_event                                           \
+_(PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, "cpu-cycles")           \
+_(PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, "instructions")       \
+_(PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_REFERENCES,                   \
+  "cache-references")                                                   \
+_(PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_MISSES, "cache-misses")       \
+_(PERF_TYPE_HARDWARE, PERF_COUNT_HW_BRANCH_INSTRUCTIONS, "branches")    \
+ _(PERF_TYPE_HARDWARE, PERF_COUNT_HW_BRANCH_MISSES, "branch-misses")    \
+_(PERF_TYPE_HARDWARE, PERF_COUNT_HW_BUS_CYCLES, "bus-cycles")           \
+_(PERF_TYPE_HARDWARE, PERF_COUNT_HW_STALLED_CYCLES_FRONTEND,            \
+  "stall-frontend")                                                     \
+_(PERF_TYPE_HARDWARE, PERF_COUNT_HW_STALLED_CYCLES_BACKEND,             \
+  "stall-backend")                                                      \
+_(PERF_TYPE_HARDWARE, PERF_COUNT_HW_REF_CPU_CYCLES, "ref-cpu-cycles")   \
+_(PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS, "page-faults")         \
+_(PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CONTEXT_SWITCHES, "context-switches") \
+_(PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CPU_MIGRATIONS, "cpu-migrations")   \
+_(PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS_MIN, "minor-pagefaults") \
+_(PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS_MAJ, "major-pagefaults") \
+_(PERF_TYPE_SOFTWARE, PERF_COUNT_SW_EMULATION_FAULTS, "emulation-faults")
+
+typedef struct
+{
+  char *name;
+  int pe_type;
+  int pe_config;
+} perfmon_event_config_t;
+
+typedef enum
+{
+  PERFMON_STATE_OFF = 0,
+  PERFMON_STATE_RUNNING,
+} perfmon_state_t;
+
+typedef struct
+{
+  u8 *thread_and_node_name;
+  u8 **counter_names;
+  u64 *counter_values;
+  u64 *vectors_this_counter;
+} perfmon_capture_t;
+
+typedef struct
+{
+  u32 cpuid;
+  const char **table;
+} perfmon_cpuid_and_table_t;
+
+typedef struct
+{
+  u8 *name;
+  u8 *value;
+} name_value_pair_t;
+
+typedef struct
+{
+  /* API message ID base */
+  u16 msg_id_base;
+
+  /* on/off switch for the periodic function */
+  volatile u8 state;
+
+  /* capture pool, hash table */
+  perfmon_capture_t *capture_pool;
+  uword *capture_by_thread_and_node_name;
+
+  /* CPU-specific event tables, hash table of selected table (if any)  */
+  perfmon_cpuid_and_table_t *perfmon_tables;
+  uword *perfmon_table;
+
+  /* vector of events to collect */
+  perfmon_event_config_t *events_to_collect;
+
+  /* Base indices of synthetic event tuples */
+  u32 ipc_event_index;
+  u32 mispredict_event_index;
+
+  /* Length of time to capture a single event */
+  f64 timeout_interval;
+
+  /* Current event (index) being collected */
+  u32 current_event;
+  u32 *rdpmc_indices;
+  /* mmap base / size of (mapped) struct perf_event_mmap_page */
+  u8 **perf_event_pages;
+  u32 page_size;
+
+  /* Current perf_event file descriptors, per thread */
+  int *pm_fds;
+
+  /* Logging */
+  vlib_log_class_t log_class;
+
+  /* convenience */
+  vlib_main_t *vlib_main;
+  vnet_main_t *vnet_main;
+  ethernet_main_t *ethernet_main;
+} perfmon_main_t;
+
+extern perfmon_main_t perfmon_main;
+
+extern vlib_node_registration_t perfmon_periodic_node;
+uword *perfmon_parse_table (perfmon_main_t * pm, char *path, char *filename);
+
+/* Periodic function events */
+#define PERFMON_START 1
+
+#endif /* __included_perfmon_h__ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/perfmon/perfmon_periodic.c b/src/plugins/perfmon/perfmon_periodic.c
new file mode 100644
index 00000000000..329a75d7305
--- /dev/null
+++ b/src/plugins/perfmon/perfmon_periodic.c
@@ -0,0 +1,433 @@
+/*
+ * perfmon_periodic.c - skeleton plug-in periodic function
+ *
+ * Copyright (c) <current-year> <your-organization>
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/vlib.h>
+#include <vppinfra/error.h>
+#include <perfmon/perfmon.h>
+#include <asm/unistd.h>
+#include <sys/ioctl.h>
+
+static long
+perf_event_open (struct perf_event_attr *hw_event, pid_t pid, int cpu,
+		 int group_fd, unsigned long flags)
+{
+  int ret;
+
+  ret = syscall (__NR_perf_event_open, hw_event, pid, cpu, group_fd, flags);
+  return ret;
+}
+
+static u64
+read_current_perf_counter (vlib_main_t * vm)
+{
+  if (vm->perf_counter_id)
+    return clib_rdpmc (vm->perf_counter_id);
+  else
+    {
+      u64 sw_value;
+      if (read (vm->perf_counter_fd, &sw_value, sizeof (sw_value)) !=
+	  sizeof (sw_value))
+	{
+	  clib_unix_warning ("counter read failed, disable collection...");
+	  vm->vlib_node_runtime_perf_counter_cb = 0;
+	  return 0ULL;
+	}
+      return sw_value;
+    }
+}
+
+static void
+clear_counters (perfmon_main_t * pm)
+{
+  int i, j;
+  vlib_main_t *vm = pm->vlib_main;
+  vlib_main_t *stat_vm;
+  vlib_node_main_t *nm;
+  vlib_node_t *n;
+
+  vlib_worker_thread_barrier_sync (vm);
+
+  for (j = 0; j < vec_len (vlib_mains); j++)
+    {
+      stat_vm = vlib_mains[j];
+      if (stat_vm == 0)
+	continue;
+
+      nm = &stat_vm->node_main;
+
+      /* Clear the node runtime perfmon counters */
+      for (i = 0; i < vec_len (nm->nodes); i++)
+	{
+	  n = nm->nodes[i];
+	  vlib_node_sync_stats (stat_vm, n);
+	}
+
+      /* And clear the node perfmon counters */
+      for (i = 0; i < vec_len (nm->nodes); i++)
+	{
+	  n = nm->nodes[i];
+	  n->stats_total.perf_counter_ticks = 0;
+	  n->stats_total.perf_counter_vectors = 0;
+	  n->stats_last_clear.perf_counter_ticks = 0;
+	  n->stats_last_clear.perf_counter_vectors = 0;
+	}
+    }
+  vlib_worker_thread_barrier_release (vm);
+}
+
+static void
+enable_current_event (perfmon_main_t * pm)
+{
+  struct perf_event_attr pe;
+  int fd;
+  struct perf_event_mmap_page *p = 0;
+  perfmon_event_config_t *c;
+  vlib_main_t *vm = vlib_get_main ();
+  u32 my_thread_index = vm->thread_index;
+
+  c = vec_elt_at_index (pm->events_to_collect, pm->current_event);
+
+  memset (&pe, 0, sizeof (struct perf_event_attr));
+  pe.type = c->pe_type;
+  pe.size = sizeof (struct perf_event_attr);
+  pe.config = c->pe_config;
+  pe.disabled = 1;
+  pe.pinned = 1;
+  /*
+   * Note: excluding the kernel makes the
+   * (software) context-switch counter read 0...
+   */
+  if (pe.type != PERF_TYPE_SOFTWARE)
+    {
+      /* Exclude kernel and hypervisor */
+      pe.exclude_kernel = 1;
+      pe.exclude_hv = 1;
+    }
+
+  fd = perf_event_open (&pe, 0, -1, -1, 0);
+  if (fd == -1)
+    {
+      clib_unix_warning ("event open: type %d config %d", c->pe_type,
+			 c->pe_config);
+      return;
+    }
+
+  if (pe.type != PERF_TYPE_SOFTWARE)
+    {
+      p = mmap (0, pm->page_size, PROT_READ, MAP_SHARED, fd, 0);
+      if (p == MAP_FAILED)
+	{
+	  clib_unix_warning ("mmap");
+	  close (fd);
+	  return;
+	}
+    }
+
+  if (ioctl (fd, PERF_EVENT_IOC_RESET, 0) < 0)
+    clib_unix_warning ("reset ioctl");
+
+  if (ioctl (fd, PERF_EVENT_IOC_ENABLE, 0) < 0)
+    clib_unix_warning ("enable ioctl");
+
+  /*
+   * Software event counters - and others not capable of being
+   * read via the "rdpmc" instruction - will be read
+   * by system calls.
+   */
+  if (pe.type == PERF_TYPE_SOFTWARE || p->cap_user_rdpmc == 0)
+    pm->rdpmc_indices[my_thread_index] = 0;
+  else				/* use rdpmc instrs */
+    pm->rdpmc_indices[my_thread_index] = p->index - 1;
+  pm->perf_event_pages[my_thread_index] = (void *) p;
+
+  pm->pm_fds[my_thread_index] = fd;
+
+  /* Enable the main loop counter snapshot mechanism */
+  vm->perf_counter_id = pm->rdpmc_indices[my_thread_index];
+  vm->perf_counter_fd = fd;
+  vm->vlib_node_runtime_perf_counter_cb = read_current_perf_counter;
+}
+
+static void
+disable_event (perfmon_main_t * pm)
+{
+  vlib_main_t *vm = vlib_get_main ();
+  u32 my_thread_index = vm->thread_index;
+
+  if (pm->pm_fds[my_thread_index] == 0)
+    return;
+
+  /* Stop main loop collection */
+  vm->vlib_node_runtime_perf_counter_cb = 0;
+
+  if (ioctl (pm->pm_fds[my_thread_index], PERF_EVENT_IOC_DISABLE, 0) < 0)
+    clib_unix_warning ("disable ioctl");
+
+  if (pm->perf_event_pages[my_thread_index])
+    if (munmap (pm->perf_event_pages[my_thread_index], pm->page_size) < 0)
+      clib_unix_warning ("munmap");
+
+  (void) close (pm->pm_fds[my_thread_index]);
+  pm->pm_fds[my_thread_index] = 0;
+}
+
+static void
+worker_thread_start_event (vlib_main_t * vm)
+{
+  perfmon_main_t *pm = &perfmon_main;
+
+  enable_current_event (pm);
+  vm->worker_thread_main_loop_callback = 0;
+}
+
+static void
+worker_thread_stop_event (vlib_main_t * vm)
+{
+  perfmon_main_t *pm = &perfmon_main;
+  disable_event (pm);
+  vm->worker_thread_main_loop_callback = 0;
+}
+
+static void
+start_event (perfmon_main_t * pm, f64 now, uword event_data)
+{
+  int i;
+  pm->current_event = 0;
+  if (vec_len (pm->events_to_collect) == 0)
+    {
+      pm->state = PERFMON_STATE_OFF;
+      return;
+    }
+  pm->state = PERFMON_STATE_RUNNING;
+  clear_counters (pm);
+
+  /* Start collection on this thread */
+  enable_current_event (pm);
+
+  /* And also on worker threads */
+  for (i = 1; i < vec_len (vlib_mains); i++)
+    {
+      if (vlib_mains[i] == 0)
+	continue;
+      vlib_mains[i]->worker_thread_main_loop_callback = (void *)
+	worker_thread_start_event;
+    }
+}
+
+void
+scrape_and_clear_counters (perfmon_main_t * pm)
+{
+  int i, j;
+  vlib_main_t *vm = pm->vlib_main;
+  vlib_main_t *stat_vm;
+  vlib_node_main_t *nm;
+  vlib_node_t ***node_dups = 0;
+  vlib_node_t **nodes;
+  vlib_node_t *n;
+  perfmon_capture_t *c;
+  perfmon_event_config_t *current_event;
+  uword *p;
+  u8 *counter_name;
+  u64 counter_value;
+  u64 vectors_this_counter;
+
+  /* snapshoot the nodes, including pm counters */
+  vlib_worker_thread_barrier_sync (vm);
+
+  for (j = 0; j < vec_len (vlib_mains); j++)
+    {
+      stat_vm = vlib_mains[j];
+      if (stat_vm == 0)
+	continue;
+
+      nm = &stat_vm->node_main;
+
+      for (i = 0; i < vec_len (nm->nodes); i++)
+	{
+	  n = nm->nodes[i];
+	  vlib_node_sync_stats (stat_vm, n);
+	}
+
+      nodes = 0;
+      vec_validate (nodes, vec_len (nm->nodes) - 1);
+      vec_add1 (node_dups, nodes);
+
+      /* Snapshoot and clear the per-node perfmon counters */
+      for (i = 0; i < vec_len (nm->nodes); i++)
+	{
+	  n = nm->nodes[i];
+	  nodes[i] = clib_mem_alloc (sizeof (*n));
+	  clib_memcpy (nodes[i], n, sizeof (*n));
+	  n->stats_total.perf_counter_ticks = 0;
+	  n->stats_total.perf_counter_vectors = 0;
+	  n->stats_last_clear.perf_counter_ticks = 0;
+	  n->stats_last_clear.perf_counter_vectors = 0;
+	}
+    }
+
+  vlib_worker_thread_barrier_release (vm);
+
+  current_event = pm->events_to_collect + pm->current_event;
+
+  for (j = 0; j < vec_len (vlib_mains); j++)
+    {
+      stat_vm = vlib_mains[j];
+      if (stat_vm == 0)
+	continue;
+
+      nodes = node_dups[j];
+
+      for (i = 0; i < vec_len (nodes); i++)
+	{
+	  u8 *capture_name;
+
+	  n = nodes[i];
+	  if (n->stats_total.perf_counter_ticks == 0)
+	    {
+	      clib_mem_free (n);
+	      continue;
+	    }
+
+	  capture_name = format (0, "t%d-%v%c", j, n->name, 0);
+
+	  p = hash_get_mem (pm->capture_by_thread_and_node_name,
+			    capture_name);
+
+	  if (p == 0)
+	    {
+	      pool_get (pm->capture_pool, c);
+	      memset (c, 0, sizeof (*c));
+	      c->thread_and_node_name = capture_name;
+	      hash_set_mem (pm->capture_by_thread_and_node_name,
+			    capture_name, c - pm->capture_pool);
+	    }
+	  else
+	    c = pool_elt_at_index (pm->capture_pool, p[0]);
+
+	  /* Snapshoot counters, etc. into the capture */
+	  counter_name = (u8 *) current_event->name;
+	  counter_value = n->stats_total.perf_counter_ticks -
+	    n->stats_last_clear.perf_counter_ticks;
+	  vectors_this_counter = n->stats_total.perf_counter_vectors -
+	    n->stats_last_clear.perf_counter_vectors;
+
+	  vec_add1 (c->counter_names, counter_name);
+	  vec_add1 (c->counter_values, counter_value);
+	  vec_add1 (c->vectors_this_counter, vectors_this_counter);
+	  clib_mem_free (n);
+	}
+      vec_free (nodes);
+    }
+  vec_free (node_dups);
+}
+
+static void
+handle_timeout (perfmon_main_t * pm, f64 now)
+{
+  int i;
+  disable_event (pm);
+
+  /* And also on worker threads */
+  for (i = 1; i < vec_len (vlib_mains); i++)
+    {
+      if (vlib_mains[i] == 0)
+	continue;
+      vlib_mains[i]->worker_thread_main_loop_callback = (void *)
+	worker_thread_stop_event;
+    }
+
+  /* Short delay to make sure workers have stopped collection */
+  if (i > 1)
+    vlib_process_suspend (pm->vlib_main, 1e-3);
+  scrape_and_clear_counters (pm);
+  pm->current_event++;
+  if (pm->current_event >= vec_len (pm->events_to_collect))
+    {
+      pm->current_event = 0;
+      pm->state = PERFMON_STATE_OFF;
+      return;
+    }
+  enable_current_event (pm);
+
+  /* And also on worker threads */
+  for (i = 1; i < vec_len (vlib_mains); i++)
+    {
+      if (vlib_mains[i] == 0)
+	continue;
+      vlib_mains[i]->worker_thread_main_loop_callback = (void *)
+	worker_thread_start_event;
+    }
+}
+
+static uword
+perfmon_periodic_process (vlib_main_t * vm,
+			  vlib_node_runtime_t * rt, vlib_frame_t * f)
+{
+  perfmon_main_t *pm = &perfmon_main;
+  f64 now;
+  uword *event_data = 0;
+  uword event_type;
+  int i;
+
+  while (1)
+    {
+      if (pm->state == PERFMON_STATE_RUNNING)
+	vlib_process_wait_for_event_or_clock (vm, pm->timeout_interval);
+      else
+	vlib_process_wait_for_event (vm);
+
+      now = vlib_time_now (vm);
+
+      event_type = vlib_process_get_events (vm, (uword **) & event_data);
+
+      switch (event_type)
+	{
+	case PERFMON_START:
+	  for (i = 0; i < vec_len (event_data); i++)
+	    start_event (pm, now, event_data[i]);
+	  break;
+
+	  /* Handle timeout */
+	case ~0:
+	  handle_timeout (pm, now);
+	  break;
+
+	default:
+	  clib_warning ("Unexpected event %d", event_type);
+	  break;
+	}
+      vec_reset_length (event_data);
+    }
+  return 0;			/* or not */
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (perfmon_periodic_node) =
+{
+  .function = perfmon_periodic_process,
+  .type = VLIB_NODE_TYPE_PROCESS,
+  .name = "perfmon-periodic-process",
+};
+/* *INDENT-ON* */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlib/main.c b/src/vlib/main.c
index 62599437f35..273ae5256a5 100644
--- a/src/vlib/main.c
+++ b/src/vlib/main.c
@@ -540,29 +540,38 @@ vlib_put_next_frame (vlib_main_t * vm,
 never_inline void
 vlib_node_runtime_sync_stats (vlib_main_t * vm,
 			      vlib_node_runtime_t * r,
-			      uword n_calls, uword n_vectors, uword n_clocks)
+			      uword n_calls, uword n_vectors, uword n_clocks,
+			      uword n_ticks)
 {
   vlib_node_t *n = vlib_get_node (vm, r->node_index);
 
   n->stats_total.calls += n_calls + r->calls_since_last_overflow;
   n->stats_total.vectors += n_vectors + r->vectors_since_last_overflow;
   n->stats_total.clocks += n_clocks + r->clocks_since_last_overflow;
+  n->stats_total.perf_counter_ticks += n_ticks +
+    r->perf_counter_ticks_since_last_overflow;
+  n->stats_total.perf_counter_vectors += n_vectors +
+    r->perf_counter_vectors_since_last_overflow;
   n->stats_total.max_clock = r->max_clock;
   n->stats_total.max_clock_n = r->max_clock_n;
 
   r->calls_since_last_overflow = 0;
   r->vectors_since_last_overflow = 0;
   r->clocks_since_last_overflow = 0;
+  r->perf_counter_ticks_since_last_overflow = 0ULL;
+  r->perf_counter_vectors_since_last_overflow = 0ULL;
 }
 
 always_inline void __attribute__ ((unused))
 vlib_process_sync_stats (vlib_main_t * vm,
 			 vlib_process_t * p,
-			 uword n_calls, uword n_vectors, uword n_clocks)
+			 uword n_calls, uword n_vectors, uword n_clocks,
+			 uword n_ticks)
 {
   vlib_node_runtime_t *rt = &p->node_runtime;
   vlib_node_t *n = vlib_get_node (vm, rt->node_index);
-  vlib_node_runtime_sync_stats (vm, rt, n_calls, n_vectors, n_clocks);
+  vlib_node_runtime_sync_stats (vm, rt, n_calls, n_vectors, n_clocks,
+				n_ticks);
   n->stats_total.suspends += p->n_suspends;
   p->n_suspends = 0;
 }
@@ -588,7 +597,7 @@ vlib_node_sync_stats (vlib_main_t * vm, vlib_node_t * n)
       vec_elt_at_index (vm->node_main.nodes_by_type[n->type],
 			n->runtime_index);
 
-  vlib_node_runtime_sync_stats (vm, rt, 0, 0, 0);
+  vlib_node_runtime_sync_stats (vm, rt, 0, 0, 0, 0);
 
   /* Sync up runtime next frame vector counters with main node structure. */
   {
@@ -608,45 +617,68 @@ always_inline u32
 vlib_node_runtime_update_stats (vlib_main_t * vm,
 				vlib_node_runtime_t * node,
 				uword n_calls,
-				uword n_vectors, uword n_clocks)
+				uword n_vectors, uword n_clocks,
+				uword n_ticks)
 {
   u32 ca0, ca1, v0, v1, cl0, cl1, r;
+  u32 ptick0, ptick1, pvec0, pvec1;
 
   cl0 = cl1 = node->clocks_since_last_overflow;
   ca0 = ca1 = node->calls_since_last_overflow;
   v0 = v1 = node->vectors_since_last_overflow;
+  ptick0 = ptick1 = node->perf_counter_ticks_since_last_overflow;
+  pvec0 = pvec1 = node->perf_counter_vectors_since_last_overflow;
 
   ca1 = ca0 + n_calls;
   v1 = v0 + n_vectors;
   cl1 = cl0 + n_clocks;
+  ptick1 = ptick0 + n_ticks;
+  pvec1 = pvec0 + n_vectors;
 
   node->calls_since_last_overflow = ca1;
   node->clocks_since_last_overflow = cl1;
   node->vectors_since_last_overflow = v1;
+  node->perf_counter_ticks_since_last_overflow = ptick1;
+  node->perf_counter_vectors_since_last_overflow = pvec1;
+
   node->max_clock_n = node->max_clock > n_clocks ?
     node->max_clock_n : n_vectors;
   node->max_clock = node->max_clock > n_clocks ? node->max_clock : n_clocks;
 
   r = vlib_node_runtime_update_main_loop_vector_stats (vm, node, n_vectors);
 
-  if (PREDICT_FALSE (ca1 < ca0 || v1 < v0 || cl1 < cl0))
+  if (PREDICT_FALSE (ca1 < ca0 || v1 < v0 || cl1 < cl0) || (ptick1 < ptick0)
+      || (pvec1 < pvec0))
     {
       node->calls_since_last_overflow = ca0;
       node->clocks_since_last_overflow = cl0;
       node->vectors_since_last_overflow = v0;
-      vlib_node_runtime_sync_stats (vm, node, n_calls, n_vectors, n_clocks);
+      node->perf_counter_ticks_since_last_overflow = ptick0;
+      node->perf_counter_vectors_since_last_overflow = pvec0;
+
+      vlib_node_runtime_sync_stats (vm, node, n_calls, n_vectors, n_clocks,
+				    n_ticks);
     }
 
   return r;
 }
 
+static inline u64
+vlib_node_runtime_perf_counter (vlib_main_t * vm)
+{
+  if (PREDICT_FALSE (vm->vlib_node_runtime_perf_counter_cb != 0))
+    return ((*vm->vlib_node_runtime_perf_counter_cb) (vm));
+  return 0ULL;
+}
+
 always_inline void
 vlib_process_update_stats (vlib_main_t * vm,
 			   vlib_process_t * p,
-			   uword n_calls, uword n_vectors, uword n_clocks)
+			   uword n_calls, uword n_vectors, uword n_clocks,
+			   uword n_ticks)
 {
   vlib_node_runtime_update_stats (vm, &p->node_runtime,
-				  n_calls, n_vectors, n_clocks);
+				  n_calls, n_vectors, n_clocks, n_ticks);
 }
 
 static clib_error_t *
@@ -959,9 +991,7 @@ dispatch_node (vlib_main_t * vm,
 
   if (1 /* || vm->thread_index == node->thread_index */ )
     {
-      vlib_main_t *stat_vm;
-
-      stat_vm = /* vlib_mains ? vlib_mains[0] : */ vm;
+      u64 pmc_before, pmc_delta;
 
       vlib_elog_main_loop_event (vm, node->node_index,
 				 last_time_stamp,
@@ -969,6 +999,12 @@ dispatch_node (vlib_main_t * vm,
 				 /* is_after */ 0);
 
       /*
+       * To validate accounting: pmc_before = last_time_stamp
+       * perf ticks should equal clocks/pkt...
+       */
+      pmc_before = vlib_node_runtime_perf_counter (vm);
+
+      /*
        * Turn this on if you run into
        * "bad monkey" contexts, and you want to know exactly
        * which nodes they've visited... See ixge.c...
@@ -990,16 +1026,23 @@ dispatch_node (vlib_main_t * vm,
 
       t = clib_cpu_time_now ();
 
+      /*
+       * To validate accounting: pmc_delta = t - pmc_before;
+       * perf ticks should equal clocks/pkt...
+       */
+      pmc_delta = vlib_node_runtime_perf_counter (vm) - pmc_before;
+
       vlib_elog_main_loop_event (vm, node->node_index, t, n,	/* is_after */
 				 1);
 
       vm->main_loop_vectors_processed += n;
       vm->main_loop_nodes_processed += n > 0;
 
-      v = vlib_node_runtime_update_stats (stat_vm, node,
+      v = vlib_node_runtime_update_stats (vm, node,
 					  /* n_calls */ 1,
 					  /* n_vectors */ n,
-					  /* n_clocks */ t - last_time_stamp);
+					  /* n_clocks */ t - last_time_stamp,
+					  pmc_delta /* PMC ticks */ );
 
       /* When in interrupt mode and vector rate crosses threshold switch to
          polling mode. */
@@ -1338,7 +1381,8 @@ dispatch_process (vlib_main_t * vm,
   vlib_process_update_stats (vm, p,
 			     /* n_calls */ !is_suspend,
 			     /* n_vectors */ n_vectors,
-			     /* n_clocks */ t - last_time_stamp);
+			     /* n_clocks */ t - last_time_stamp,
+			     /* pmc_ticks */ 0ULL);
 
   return t;
 }
@@ -1421,7 +1465,8 @@ dispatch_suspended_process (vlib_main_t * vm,
   vlib_process_update_stats (vm, p,
 			     /* n_calls */ !is_suspend,
 			     /* n_vectors */ n_vectors,
-			     /* n_clocks */ t - last_time_stamp);
+			     /* n_clocks */ t - last_time_stamp,
+			     /* pmc_ticks */ 0ULL);
 
   return t;
 }
@@ -1471,6 +1516,9 @@ vlib_main_or_worker_loop (vlib_main_t * vm, int is_main)
   if (!nm->interrupt_threshold_vector_length)
     nm->interrupt_threshold_vector_length = 5;
 
+  /* Make sure the performance monitor counter is disabled */
+  vm->perf_counter_id = ~0;
+
   /* Start all processes. */
   if (is_main)
     {
@@ -1493,6 +1541,9 @@ vlib_main_or_worker_loop (vlib_main_t * vm, int is_main)
 	  vlib_worker_thread_barrier_check ();
 	  vec_foreach (fqm, tm->frame_queue_mains)
 	    vlib_frame_queue_dequeue (vm, fqm);
+	  if (PREDICT_FALSE (vm->worker_thread_main_loop_callback != 0))
+	    ((void (*)(vlib_main_t *)) vm->worker_thread_main_loop_callback)
+	      (vm);
 	}
 
       /* Process pre-input nodes. */
diff --git a/src/vlib/main.h b/src/vlib/main.h
index ce42b6ea442..ddc14df5360 100644
--- a/src/vlib/main.h
+++ b/src/vlib/main.h
@@ -46,6 +46,7 @@
 #include <vppinfra/pool.h>
 #include <vppinfra/random_buffer.h>
 #include <vppinfra/time.h>
+#include <vppinfra/pmc.h>
 
 #include <pthread.h>
 
@@ -81,6 +82,11 @@ typedef struct vlib_main_t
   u32 vector_counts_per_main_loop[2];
   u32 node_counts_per_main_loop[2];
 
+  /* Main loop hw / sw performance counters */
+    u64 (*vlib_node_runtime_perf_counter_cb) (struct vlib_main_t *);
+  int perf_counter_id;
+  int perf_counter_fd;
+
   /* Every so often we switch to the next counter. */
 #define VLIB_LOG2_MAIN_LOOPS_PER_STATS_UPDATE 7
 
@@ -192,6 +198,9 @@ typedef struct vlib_main_t
   void (*queue_signal_callback) (struct vlib_main_t *);
   u8 **argv;
 
+  /* Top of (worker) dispatch loop callback */
+  volatile void (*worker_thread_main_loop_callback) (struct vlib_main_t *);
+
   /* debugging */
   volatile int parked_at_barrier;
 
diff --git a/src/vlib/node.h b/src/vlib/node.h
index 6efb6f3e4fe..fd245d59def 100644
--- a/src/vlib/node.h
+++ b/src/vlib/node.h
@@ -244,6 +244,8 @@ typedef struct
   u64 calls, vectors, clocks, suspends;
   u64 max_clock;
   u64 max_clock_n;
+  u64 perf_counter_ticks;
+  u64 perf_counter_vectors;
 } vlib_node_stats_t;
 
 #define foreach_vlib_node_state					\
@@ -488,6 +490,9 @@ typedef struct vlib_node_runtime_t
   u32 vectors_since_last_overflow;	/**< Number of vector elements
 					  processed by this node. */
 
+  u32 perf_counter_ticks_since_last_overflow; /**< Perf counter ticks */
+  u32 perf_counter_vectors_since_last_overflow;	/**< Perf counter vectors */
+
   u32 next_frame_index;			/**< Start of next frames for this
 					  node. */
 
diff --git a/src/vlib/node_cli.c b/src/vlib/node_cli.c
index 2523b41c404..062854af5bc 100644
--- a/src/vlib/node_cli.c
+++ b/src/vlib/node_cli.c
@@ -148,19 +148,25 @@ format_vlib_node_stats (u8 * s, va_list * va)
   f64 maxc, maxcn;
   u32 maxn;
   u32 indent;
+  u64 pmc_ticks;
+  f64 pmc_ticks_per_packet;
 
   if (!n)
     {
       if (max)
-	return format (s,
-		       "%=30s%=17s%=16s%=16s%=16s%=16s",
-		       "Name", "Max Node Clocks", "Vectors at Max",
-		       "Max Clocks", "Avg Clocks", "Avg Vectors/Call");
+	s = format (s,
+		    "%=30s%=17s%=16s%=16s%=16s%=16s",
+		    "Name", "Max Node Clocks", "Vectors at Max",
+		    "Max Clocks", "Avg Clocks", "Avg Vectors/Call");
       else
-	return format (s,
-		       "%=30s%=12s%=16s%=16s%=16s%=16s%=16s",
-		       "Name", "State", "Calls", "Vectors", "Suspends",
-		       "Clocks", "Vectors/Call");
+	s = format (s,
+		    "%=30s%=12s%=16s%=16s%=16s%=16s%=16s",
+		    "Name", "State", "Calls", "Vectors", "Suspends",
+		    "Clocks", "Vectors/Call");
+      if (vm->perf_counter_id)
+	s = format (s, "%=16s", "Perf Ticks");
+
+      return s;
     }
 
   indent = format_get_indent (s);
@@ -176,6 +182,13 @@ format_vlib_node_stats (u8 * s, va_list * va)
   else
     maxcn = 0.0;
 
+  pmc_ticks = n->stats_total.perf_counter_ticks -
+    n->stats_last_clear.perf_counter_ticks;
+  if (p > 0)
+    pmc_ticks_per_packet = (f64) pmc_ticks / (f64) p;
+  else
+    pmc_ticks_per_packet = 0.0;
+
   /* Clocks per packet, per call or per suspend. */
   x = 0;
   if (p > 0)
@@ -208,6 +221,9 @@ format_vlib_node_stats (u8 * s, va_list * va)
     s = format (s, "%-30v%=12U%16Ld%16Ld%16Ld%16.2e%16.2f", ns,
 		format_vlib_node_state, vm, n, c, p, d, x, v);
 
+  if (pmc_ticks_per_packet > 0.0)
+    s = format (s, "%16.2e", pmc_ticks_per_packet);
+
   if (ns != n->name)
     vec_free (ns);
 
diff --git a/src/vlibapi/node_serialize.c b/src/vlibapi/node_serialize.c
index b50d79e2922..0774eea251e 100644
--- a/src/vlibapi/node_serialize.c
+++ b/src/vlibapi/node_serialize.c
@@ -57,7 +57,7 @@ vlib_node_serialize (vlib_main_t * vm, vlib_node_t *** node_dups, u8 * vector,
   u8 *namep;
   u32 name_bytes;
   uword i, j, k;
-  u64 l, v, c, d;
+  u64 l, v, c, d, pmc;
   state_string_enum_t state_code;
 
   serialize_open_vector (sm, vector);
@@ -77,6 +77,8 @@ vlib_node_serialize (vlib_main_t * vm, vlib_node_t *** node_dups, u8 * vector,
 	  v = n->stats_total.vectors - n->stats_last_clear.vectors;
 	  c = n->stats_total.calls - n->stats_last_clear.calls;
 	  d = n->stats_total.suspends - n->stats_last_clear.suspends;
+	  pmc = n->stats_total.perf_counter_ticks
+	    - n->stats_last_clear.perf_counter_ticks;
 
 	  state_code = STATE_INTERNAL;
 
@@ -149,6 +151,8 @@ vlib_node_serialize (vlib_main_t * vm, vlib_node_t *** node_dups, u8 * vector,
 	      serialize_integer (sm, v, 8);
 	      /* Total suspends */
 	      serialize_integer (sm, d, 8);
+	      /* PMC counter */
+	      serialize_integer (sm, pmc, 8);
 	    }
 	  else			/* no stats */
 	    serialize_likely_small_unsigned_integer (sm, 0);
@@ -167,7 +171,7 @@ vlib_node_unserialize (u8 * vector)
   vlib_node_t **nodes;
   vlib_node_t ***nodes_by_thread = 0;
   int i, j, k;
-  u64 l, v, c, d;
+  u64 l, v, c, d, pmc;
   state_string_enum_t state_code;
   int stats_present;
 
@@ -225,6 +229,9 @@ vlib_node_unserialize (u8 * vector)
 	      /* Total suspends */
 	      unserialize_integer (sm, &d, 8);
 	      node->stats_total.suspends = d;
+	      /* PMC counter */
+	      unserialize_integer (sm, &pmc, 8);
+	      node->stats_total.perf_counter_ticks = pmc;
 	    }
 	}
     }
diff --git a/src/vppinfra/CMakeLists.txt b/src/vppinfra/CMakeLists.txt
index 7103d600780..26368493382 100644
--- a/src/vppinfra/CMakeLists.txt
+++ b/src/vppinfra/CMakeLists.txt
@@ -136,6 +136,7 @@ set(VPPINFRA_HEADERS
   os.h
   pipeline.h
   pool.h
+  pmc.h
   ptclosure.h
   random_buffer.h
   random.h
diff --git a/src/vppinfra/pmc.h b/src/vppinfra/pmc.h
new file mode 100644
index 00000000000..258b925127d
--- /dev/null
+++ b/src/vppinfra/pmc.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2018 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef included_clib_pmc_h
+#define included_clib_pmc_h
+
+#if defined (__x86_64__)
+
+always_inline u64
+clib_rdpmc (int counter_id)
+{
+  u32 a, d;
+
+  asm volatile ("rdpmc":"=a" (a), "=d" (d):"c" (counter_id));
+  return (u64) a + ((u64) d << (u64) 32);
+}
+
+#else
+always_inline u64
+clib_rdpmc (int counter_id)
+{
+  return 0ULL;
+}
+#endif /* __aarch64__ */
+
+#endif /* included_clib_pmc_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
author	Dave Barach <dave@barachs.net>	2018-09-10 12:31:15 -0400
committer	Damjan Marion <dmarion@me.com>	2018-10-22 12:02:04 +0000
commit	4d1a866aff6ceb03025990b6e60b42faf09ef486 (patch)
tree	bec495932876d9649f26179b4c24b6938be43f38 /src
parent	115a3ac59a16f9dcfee92eaecc79cd1fa3320e29 (diff)