aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/plugins/perfmon/perfmon.c62
-rw-r--r--src/plugins/perfmon/perfmon.h14
-rw-r--r--src/plugins/perfmon/perfmon_periodic.c295
-rw-r--r--src/vlib/main.c86
-rw-r--r--src/vlib/main.h5
-rw-r--r--src/vlib/node.h6
-rw-r--r--src/vlib/node_cli.c15
-rw-r--r--src/vlibapi/node_serialize.c11
8 files changed, 284 insertions, 210 deletions
diff --git a/src/plugins/perfmon/perfmon.c b/src/plugins/perfmon/perfmon.c
index c6a80224e0e..359555705aa 100644
--- a/src/plugins/perfmon/perfmon.c
+++ b/src/plugins/perfmon/perfmon.c
@@ -157,10 +157,16 @@ perfmon_init (vlib_main_t * vm)
pm->log_class = vlib_log_register_class ("perfmon", 0);
/* Default data collection interval */
- pm->timeout_interval = 3.0;
- vec_validate (pm->pm_fds, vec_len (vlib_mains) - 1);
- vec_validate (pm->perf_event_pages, vec_len (vlib_mains) - 1);
- vec_validate (pm->rdpmc_indices, vec_len (vlib_mains) - 1);
+ pm->timeout_interval = 2.0; /* seconds */
+ vec_validate (pm->pm_fds, 1);
+ vec_validate (pm->pm_fds[0], vec_len (vlib_mains) - 1);
+ vec_validate (pm->pm_fds[1], vec_len (vlib_mains) - 1);
+ vec_validate (pm->perf_event_pages, 1);
+ vec_validate (pm->perf_event_pages[0], vec_len (vlib_mains) - 1);
+ vec_validate (pm->perf_event_pages[1], vec_len (vlib_mains) - 1);
+ vec_validate (pm->rdpmc_indices, 1);
+ vec_validate (pm->rdpmc_indices[0], vec_len (vlib_mains) - 1);
+ vec_validate (pm->rdpmc_indices[1], vec_len (vlib_mains) - 1);
pm->page_size = getpagesize ();
ht = pm->perfmon_table = 0;
@@ -297,10 +303,12 @@ set_pmc_command_fn (vlib_main_t * vm,
perfmon_main_t *pm = &perfmon_main;
unformat_input_t _line_input, *line_input = &_line_input;
perfmon_event_config_t ec;
+ f64 delay;
u32 timeout_seconds;
u32 deadman;
- vec_reset_length (pm->events_to_collect);
+ vec_reset_length (pm->single_events_to_collect);
+ vec_reset_length (pm->paired_events_to_collect);
pm->ipc_event_index = ~0;
pm->mispredict_event_index = ~0;
@@ -316,28 +324,28 @@ set_pmc_command_fn (vlib_main_t * vm,
ec.name = "instructions";
ec.pe_type = PERF_TYPE_HARDWARE;
ec.pe_config = PERF_COUNT_HW_INSTRUCTIONS;
- pm->ipc_event_index = vec_len (pm->events_to_collect);
- vec_add1 (pm->events_to_collect, ec);
+ pm->ipc_event_index = vec_len (pm->paired_events_to_collect);
+ vec_add1 (pm->paired_events_to_collect, ec);
ec.name = "cpu-cycles";
ec.pe_type = PERF_TYPE_HARDWARE;
ec.pe_config = PERF_COUNT_HW_CPU_CYCLES;
- vec_add1 (pm->events_to_collect, ec);
+ vec_add1 (pm->paired_events_to_collect, ec);
}
else if (unformat (line_input, "branch-mispredict-rate"))
{
ec.name = "branch-misses";
ec.pe_type = PERF_TYPE_HARDWARE;
ec.pe_config = PERF_COUNT_HW_BRANCH_MISSES;
- pm->mispredict_event_index = vec_len (pm->events_to_collect);
- vec_add1 (pm->events_to_collect, ec);
+ pm->mispredict_event_index = vec_len (pm->paired_events_to_collect);
+ vec_add1 (pm->paired_events_to_collect, ec);
ec.name = "branches";
ec.pe_type = PERF_TYPE_HARDWARE;
ec.pe_config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS;
- vec_add1 (pm->events_to_collect, ec);
+ vec_add1 (pm->paired_events_to_collect, ec);
}
else if (unformat (line_input, "%U", unformat_processor_event, pm, &ec))
{
- vec_add1 (pm->events_to_collect, ec);
+ vec_add1 (pm->single_events_to_collect, ec);
}
#define _(type,event,str) \
else if (unformat (line_input, str)) \
@@ -345,7 +353,7 @@ set_pmc_command_fn (vlib_main_t * vm,
ec.name = str; \
ec.pe_type = type; \
ec.pe_config = event; \
- vec_add1 (pm->events_to_collect, ec); \
+ vec_add1 (pm->single_events_to_collect, ec); \
}
foreach_perfmon_event
#undef _
@@ -354,21 +362,33 @@ set_pmc_command_fn (vlib_main_t * vm,
format_unformat_error, line_input);
}
- if (vec_len (pm->events_to_collect) == 0)
+ /* Stick paired events at the front of the (unified) list */
+ if (vec_len (pm->paired_events_to_collect) > 0)
+ {
+ perfmon_event_config_t *tmp;
+ /* first 2n events are pairs... */
+ vec_append (pm->paired_events_to_collect, pm->single_events_to_collect);
+ tmp = pm->single_events_to_collect;
+ pm->single_events_to_collect = pm->paired_events_to_collect;
+ pm->paired_events_to_collect = tmp;
+ }
+
+ if (vec_len (pm->single_events_to_collect) == 0)
return clib_error_return (0, "no events specified...");
+ /* Figure out how long data collection will take */
+ delay =
+ ((f64) vec_len (pm->single_events_to_collect)) * pm->timeout_interval;
+ delay /= 2.0; /* collect 2 stats at once */
+
vlib_cli_output (vm, "Start collection for %d events, wait %.2f seconds",
- vec_len (pm->events_to_collect),
- (f64) (vec_len (pm->events_to_collect))
- * pm->timeout_interval);
+ vec_len (pm->single_events_to_collect), delay);
vlib_process_signal_event (pm->vlib_main, perfmon_periodic_node.index,
PERFMON_START, 0);
/* Coarse-grained wait */
- vlib_process_suspend (vm,
- ((f64) (vec_len (pm->events_to_collect)
- * pm->timeout_interval)));
+ vlib_process_suspend (vm, delay);
deadman = 0;
/* Reasonable to guess that collection may not be quite done... */
@@ -438,7 +458,7 @@ format_capture (u8 * s, va_list * args)
if (i == pm->ipc_event_index)
{
f64 ipc_rate;
- ASSERT (i + 1 < vec_len (c->counter_names));
+ ASSERT ((i + 1) < vec_len (c->counter_names));
if (c->counter_values[i + 1] > 0)
ipc_rate = (f64) c->counter_values[i]
diff --git a/src/plugins/perfmon/perfmon.h b/src/plugins/perfmon/perfmon.h
index 47ee471d5fc..9663dae36d1 100644
--- a/src/plugins/perfmon/perfmon.h
+++ b/src/plugins/perfmon/perfmon.h
@@ -97,8 +97,11 @@ typedef struct
perfmon_cpuid_and_table_t *perfmon_tables;
uword *perfmon_table;
- /* vector of events to collect */
- perfmon_event_config_t *events_to_collect;
+ /* vector of single events to collect */
+ perfmon_event_config_t *single_events_to_collect;
+
+ /* vector of paired events to collect */
+ perfmon_event_config_t *paired_events_to_collect;
/* Base indices of synthetic event tuples */
u32 ipc_event_index;
@@ -109,13 +112,14 @@ typedef struct
/* Current event (index) being collected */
u32 current_event;
- u32 *rdpmc_indices;
+ int n_active;
+ u32 **rdpmc_indices;
/* mmap base / size of (mapped) struct perf_event_mmap_page */
- u8 **perf_event_pages;
+ u8 ***perf_event_pages;
u32 page_size;
/* Current perf_event file descriptors, per thread */
- int *pm_fds;
+ int **pm_fds;
/* Logging */
vlib_log_class_t log_class;
diff --git a/src/plugins/perfmon/perfmon_periodic.c b/src/plugins/perfmon/perfmon_periodic.c
index 4e7e2378320..ae20ac4c62f 100644
--- a/src/plugins/perfmon/perfmon_periodic.c
+++ b/src/plugins/perfmon/perfmon_periodic.c
@@ -31,22 +31,34 @@ perf_event_open (struct perf_event_attr *hw_event, pid_t pid, int cpu,
return ret;
}
-static u64
-read_current_perf_counter (vlib_main_t * vm)
+static void
+read_current_perf_counters (vlib_main_t * vm, u64 * c0, u64 * c1)
{
- if (vm->perf_counter_id)
- return clib_rdpmc (vm->perf_counter_id);
- else
+ int i;
+ u64 *cc;
+ perfmon_main_t *pm = &perfmon_main;
+ uword my_thread_index = vm->thread_index;
+
+ *c0 = *c1 = 0;
+
+ for (i = 0; i < pm->n_active; i++)
{
- u64 sw_value;
- if (read (vm->perf_counter_fd, &sw_value, sizeof (sw_value)) !=
- sizeof (sw_value))
+ cc = (i == 0) ? c0 : c1;
+ if (pm->rdpmc_indices[i][my_thread_index] != ~0)
+ *cc = clib_rdpmc ((int) pm->rdpmc_indices[i][my_thread_index]);
+ else
{
- clib_unix_warning ("counter read failed, disable collection...");
- vm->vlib_node_runtime_perf_counter_cb = 0;
- return 0ULL;
+ u64 sw_value;
+ if (read (pm->pm_fds[i][my_thread_index], &sw_value,
+ sizeof (sw_value)) != sizeof (sw_value))
+ {
+ clib_unix_warning
+ ("counter read failed, disable collection...");
+ vm->vlib_node_runtime_perf_counter_cb = 0;
+ return;
+ }
+ *cc = sw_value;
}
- return sw_value;
}
}
@@ -80,9 +92,11 @@ clear_counters (perfmon_main_t * pm)
for (i = 0; i < vec_len (nm->nodes); i++)
{
n = nm->nodes[i];
- n->stats_total.perf_counter_ticks = 0;
+ n->stats_total.perf_counter0_ticks = 0;
+ n->stats_total.perf_counter1_ticks = 0;
n->stats_total.perf_counter_vectors = 0;
- n->stats_last_clear.perf_counter_ticks = 0;
+ n->stats_last_clear.perf_counter0_ticks = 0;
+ n->stats_last_clear.perf_counter1_ticks = 0;
n->stats_last_clear.perf_counter_vectors = 0;
}
}
@@ -90,7 +104,7 @@ clear_counters (perfmon_main_t * pm)
}
static void
-enable_current_event (perfmon_main_t * pm)
+enable_current_events (perfmon_main_t * pm)
{
struct perf_event_attr pe;
int fd;
@@ -98,91 +112,108 @@ enable_current_event (perfmon_main_t * pm)
perfmon_event_config_t *c;
vlib_main_t *vm = vlib_get_main ();
u32 my_thread_index = vm->thread_index;
+ u32 index;
+ int i, limit = 1;
- c = vec_elt_at_index (pm->events_to_collect, pm->current_event);
-
- memset (&pe, 0, sizeof (struct perf_event_attr));
- pe.type = c->pe_type;
- pe.size = sizeof (struct perf_event_attr);
- pe.config = c->pe_config;
- pe.disabled = 1;
- pe.pinned = 1;
- /*
- * Note: excluding the kernel makes the
- * (software) context-switch counter read 0...
- */
- if (pe.type != PERF_TYPE_SOFTWARE)
- {
- /* Exclude kernel and hypervisor */
- pe.exclude_kernel = 1;
- pe.exclude_hv = 1;
- }
+ if ((pm->current_event + 1) < vec_len (pm->single_events_to_collect))
+ limit = 2;
- fd = perf_event_open (&pe, 0, -1, -1, 0);
- if (fd == -1)
+ for (i = 0; i < limit; i++)
{
- clib_unix_warning ("event open: type %d config %d", c->pe_type,
- c->pe_config);
- return;
- }
+ c = vec_elt_at_index (pm->single_events_to_collect,
+ pm->current_event + i);
+
+ memset (&pe, 0, sizeof (struct perf_event_attr));
+ pe.type = c->pe_type;
+ pe.size = sizeof (struct perf_event_attr);
+ pe.config = c->pe_config;
+ pe.disabled = 1;
+ pe.pinned = 1;
+ /*
+ * Note: excluding the kernel makes the
+ * (software) context-switch counter read 0...
+ */
+ if (pe.type != PERF_TYPE_SOFTWARE)
+ {
+ /* Exclude kernel and hypervisor */
+ pe.exclude_kernel = 1;
+ pe.exclude_hv = 1;
+ }
- if (pe.type != PERF_TYPE_SOFTWARE)
- {
- p = mmap (0, pm->page_size, PROT_READ, MAP_SHARED, fd, 0);
- if (p == MAP_FAILED)
+ fd = perf_event_open (&pe, 0, -1, -1, 0);
+ if (fd == -1)
{
- clib_unix_warning ("mmap");
- close (fd);
+ clib_unix_warning ("event open: type %d config %d", c->pe_type,
+ c->pe_config);
return;
}
- }
- if (ioctl (fd, PERF_EVENT_IOC_RESET, 0) < 0)
- clib_unix_warning ("reset ioctl");
+ if (pe.type != PERF_TYPE_SOFTWARE)
+ {
+ p = mmap (0, pm->page_size, PROT_READ, MAP_SHARED, fd, 0);
+ if (p == MAP_FAILED)
+ {
+ clib_unix_warning ("mmap");
+ close (fd);
+ return;
+ }
+ }
+ else
+ p = 0;
+
+ /*
+ * Software event counters - and others not capable of being
+ * read via the "rdpmc" instruction - will be read
+ * by system calls.
+ */
+ if (pe.type == PERF_TYPE_SOFTWARE || p->cap_user_rdpmc == 0)
+ index = ~0;
+ else
+ index = p->index - 1;
- if (ioctl (fd, PERF_EVENT_IOC_ENABLE, 0) < 0)
- clib_unix_warning ("enable ioctl");
+ if (ioctl (fd, PERF_EVENT_IOC_RESET, 0) < 0)
+ clib_unix_warning ("reset ioctl");
- /*
- * Software event counters - and others not capable of being
- * read via the "rdpmc" instruction - will be read
- * by system calls.
- */
- if (pe.type == PERF_TYPE_SOFTWARE || p->cap_user_rdpmc == 0)
- pm->rdpmc_indices[my_thread_index] = 0;
- else /* use rdpmc instrs */
- pm->rdpmc_indices[my_thread_index] = p->index - 1;
- pm->perf_event_pages[my_thread_index] = (void *) p;
+ if (ioctl (fd, PERF_EVENT_IOC_ENABLE, 0) < 0)
+ clib_unix_warning ("enable ioctl");
- pm->pm_fds[my_thread_index] = fd;
+ pm->rdpmc_indices[i][my_thread_index] = index;
+ pm->perf_event_pages[i][my_thread_index] = (void *) p;
+ pm->pm_fds[i][my_thread_index] = fd;
+ }
+ pm->n_active = i;
/* Enable the main loop counter snapshot mechanism */
- vm->perf_counter_id = pm->rdpmc_indices[my_thread_index];
- vm->perf_counter_fd = fd;
- vm->vlib_node_runtime_perf_counter_cb = read_current_perf_counter;
+ vm->vlib_node_runtime_perf_counter_cb = read_current_perf_counters;
}
static void
-disable_event (perfmon_main_t * pm)
+disable_events (perfmon_main_t * pm)
{
vlib_main_t *vm = vlib_get_main ();
u32 my_thread_index = vm->thread_index;
-
- if (pm->pm_fds[my_thread_index] == 0)
- return;
+ int i;
/* Stop main loop collection */
vm->vlib_node_runtime_perf_counter_cb = 0;
- if (ioctl (pm->pm_fds[my_thread_index], PERF_EVENT_IOC_DISABLE, 0) < 0)
- clib_unix_warning ("disable ioctl");
+ for (i = 0; i < pm->n_active; i++)
+ {
+ if (pm->pm_fds[i][my_thread_index] == 0)
+ continue;
+
+ if (ioctl (pm->pm_fds[i][my_thread_index], PERF_EVENT_IOC_DISABLE, 0) <
+ 0)
+ clib_unix_warning ("disable ioctl");
- if (pm->perf_event_pages[my_thread_index])
- if (munmap (pm->perf_event_pages[my_thread_index], pm->page_size) < 0)
- clib_unix_warning ("munmap");
+ if (pm->perf_event_pages[i][my_thread_index])
+ if (munmap (pm->perf_event_pages[i][my_thread_index],
+ pm->page_size) < 0)
+ clib_unix_warning ("munmap");
- (void) close (pm->pm_fds[my_thread_index]);
- pm->pm_fds[my_thread_index] = 0;
+ (void) close (pm->pm_fds[i][my_thread_index]);
+ pm->pm_fds[i][my_thread_index] = 0;
+ }
}
static void
@@ -190,7 +221,7 @@ worker_thread_start_event (vlib_main_t * vm)
{
perfmon_main_t *pm = &perfmon_main;
- enable_current_event (pm);
+ enable_current_events (pm);
vm->worker_thread_main_loop_callback = 0;
}
@@ -198,7 +229,7 @@ static void
worker_thread_stop_event (vlib_main_t * vm)
{
perfmon_main_t *pm = &perfmon_main;
- disable_event (pm);
+ disable_events (pm);
vm->worker_thread_main_loop_callback = 0;
}
@@ -207,7 +238,7 @@ start_event (perfmon_main_t * pm, f64 now, uword event_data)
{
int i;
pm->current_event = 0;
- if (vec_len (pm->events_to_collect) == 0)
+ if (vec_len (pm->single_events_to_collect) == 0)
{
pm->state = PERFMON_STATE_OFF;
return;
@@ -216,7 +247,7 @@ start_event (perfmon_main_t * pm, f64 now, uword event_data)
clear_counters (pm);
/* Start collection on this thread */
- enable_current_event (pm);
+ enable_current_events (pm);
/* And also on worker threads */
for (i = 1; i < vec_len (vlib_mains); i++)
@@ -231,7 +262,7 @@ start_event (perfmon_main_t * pm, f64 now, uword event_data)
void
scrape_and_clear_counters (perfmon_main_t * pm)
{
- int i, j;
+ int i, j, k;
vlib_main_t *vm = pm->vlib_main;
vlib_main_t *stat_vm;
vlib_node_main_t *nm;
@@ -242,7 +273,6 @@ scrape_and_clear_counters (perfmon_main_t * pm)
perfmon_event_config_t *current_event;
uword *p;
u8 *counter_name;
- u64 counter_value;
u64 vectors_this_counter;
/* snapshoot the nodes, including pm counters */
@@ -272,17 +302,17 @@ scrape_and_clear_counters (perfmon_main_t * pm)
n = nm->nodes[i];
nodes[i] = clib_mem_alloc (sizeof (*n));
clib_memcpy_fast (nodes[i], n, sizeof (*n));
- n->stats_total.perf_counter_ticks = 0;
+ n->stats_total.perf_counter0_ticks = 0;
+ n->stats_total.perf_counter1_ticks = 0;
n->stats_total.perf_counter_vectors = 0;
- n->stats_last_clear.perf_counter_ticks = 0;
+ n->stats_last_clear.perf_counter0_ticks = 0;
+ n->stats_last_clear.perf_counter1_ticks = 0;
n->stats_last_clear.perf_counter_vectors = 0;
}
}
vlib_worker_thread_barrier_release (vm);
- current_event = pm->events_to_collect + pm->current_event;
-
for (j = 0; j < vec_len (vlib_mains); j++)
{
stat_vm = vlib_mains[j];
@@ -296,38 +326,69 @@ scrape_and_clear_counters (perfmon_main_t * pm)
u8 *capture_name;
n = nodes[i];
- if (n->stats_total.perf_counter_ticks == 0)
- {
- clib_mem_free (n);
- continue;
- }
-
- capture_name = format (0, "t%d-%v%c", j, n->name, 0);
- p = hash_get_mem (pm->capture_by_thread_and_node_name,
- capture_name);
+ if (n->stats_total.perf_counter0_ticks == 0 &&
+ n->stats_total.perf_counter1_ticks == 0)
+ goto skip_this_node;
- if (p == 0)
+ for (k = 0; k < 2; k++)
{
- pool_get (pm->capture_pool, c);
- memset (c, 0, sizeof (*c));
- c->thread_and_node_name = capture_name;
- hash_set_mem (pm->capture_by_thread_and_node_name,
- capture_name, c - pm->capture_pool);
+ u64 counter_value, counter_last_clear;
+
+ /*
+ * We collect 2 counters at once, except for the
+ * last counter when the user asks for an odd number of
+ * counters
+ */
+ if ((pm->current_event + k)
+ >= vec_len (pm->single_events_to_collect))
+ break;
+
+ if (k == 0)
+ {
+ counter_value = n->stats_total.perf_counter0_ticks;
+ counter_last_clear =
+ n->stats_last_clear.perf_counter0_ticks;
+ }
+ else
+ {
+ counter_value = n->stats_total.perf_counter1_ticks;
+ counter_last_clear =
+ n->stats_last_clear.perf_counter1_ticks;
+ }
+
+ capture_name = format (0, "t%d-%v%c", j, n->name, 0);
+
+ p = hash_get_mem (pm->capture_by_thread_and_node_name,
+ capture_name);
+
+ if (p == 0)
+ {
+ pool_get (pm->capture_pool, c);
+ memset (c, 0, sizeof (*c));
+ c->thread_and_node_name = capture_name;
+ hash_set_mem (pm->capture_by_thread_and_node_name,
+ capture_name, c - pm->capture_pool);
+ }
+ else
+ {
+ c = pool_elt_at_index (pm->capture_pool, p[0]);
+ vec_free (capture_name);
+ }
+
+ /* Snapshoot counters, etc. into the capture */
+ current_event = pm->single_events_to_collect
+ + pm->current_event + k;
+ counter_name = (u8 *) current_event->name;
+ vectors_this_counter = n->stats_total.perf_counter_vectors -
+ n->stats_last_clear.perf_counter_vectors;
+
+ vec_add1 (c->counter_names, counter_name);
+ vec_add1 (c->counter_values,
+ counter_value - counter_last_clear);
+ vec_add1 (c->vectors_this_counter, vectors_this_counter);
}
- else
- c = pool_elt_at_index (pm->capture_pool, p[0]);
-
- /* Snapshoot counters, etc. into the capture */
- counter_name = (u8 *) current_event->name;
- counter_value = n->stats_total.perf_counter_ticks -
- n->stats_last_clear.perf_counter_ticks;
- vectors_this_counter = n->stats_total.perf_counter_vectors -
- n->stats_last_clear.perf_counter_vectors;
-
- vec_add1 (c->counter_names, counter_name);
- vec_add1 (c->counter_values, counter_value);
- vec_add1 (c->vectors_this_counter, vectors_this_counter);
+ skip_this_node:
clib_mem_free (n);
}
vec_free (nodes);
@@ -339,7 +400,7 @@ static void
handle_timeout (perfmon_main_t * pm, f64 now)
{
int i;
- disable_event (pm);
+ disable_events (pm);
/* And also on worker threads */
for (i = 1; i < vec_len (vlib_mains); i++)
@@ -354,14 +415,14 @@ handle_timeout (perfmon_main_t * pm, f64 now)
if (i > 1)
vlib_process_suspend (pm->vlib_main, 1e-3);
scrape_and_clear_counters (pm);
- pm->current_event++;
- if (pm->current_event >= vec_len (pm->events_to_collect))
+ pm->current_event += pm->n_active;
+ if (pm->current_event >= vec_len (pm->single_events_to_collect))
{
pm->current_event = 0;
pm->state = PERFMON_STATE_OFF;
return;
}
- enable_current_event (pm);
+ enable_current_events (pm);
/* And also on worker threads */
for (i = 1; i < vec_len (vlib_mains); i++)
diff --git a/src/vlib/main.c b/src/vlib/main.c
index 23c4e076e1f..0e480fabe2a 100644
--- a/src/vlib/main.c
+++ b/src/vlib/main.c
@@ -543,15 +543,17 @@ never_inline void
vlib_node_runtime_sync_stats (vlib_main_t * vm,
vlib_node_runtime_t * r,
uword n_calls, uword n_vectors, uword n_clocks,
- uword n_ticks)
+ uword n_ticks0, uword n_ticks1)
{
vlib_node_t *n = vlib_get_node (vm, r->node_index);
n->stats_total.calls += n_calls + r->calls_since_last_overflow;
n->stats_total.vectors += n_vectors + r->vectors_since_last_overflow;
n->stats_total.clocks += n_clocks + r->clocks_since_last_overflow;
- n->stats_total.perf_counter_ticks += n_ticks +
- r->perf_counter_ticks_since_last_overflow;
+ n->stats_total.perf_counter0_ticks += n_ticks0 +
+ r->perf_counter0_ticks_since_last_overflow;
+ n->stats_total.perf_counter1_ticks += n_ticks1 +
+ r->perf_counter1_ticks_since_last_overflow;
n->stats_total.perf_counter_vectors += n_vectors +
r->perf_counter_vectors_since_last_overflow;
n->stats_total.max_clock = r->max_clock;
@@ -560,7 +562,8 @@ vlib_node_runtime_sync_stats (vlib_main_t * vm,
r->calls_since_last_overflow = 0;
r->vectors_since_last_overflow = 0;
r->clocks_since_last_overflow = 0;
- r->perf_counter_ticks_since_last_overflow = 0ULL;
+ r->perf_counter0_ticks_since_last_overflow = 0ULL;
+ r->perf_counter1_ticks_since_last_overflow = 0ULL;
r->perf_counter_vectors_since_last_overflow = 0ULL;
}
@@ -568,12 +571,12 @@ always_inline void __attribute__ ((unused))
vlib_process_sync_stats (vlib_main_t * vm,
vlib_process_t * p,
uword n_calls, uword n_vectors, uword n_clocks,
- uword n_ticks)
+ uword n_ticks0, uword n_ticks1)
{
vlib_node_runtime_t *rt = &p->node_runtime;
vlib_node_t *n = vlib_get_node (vm, rt->node_index);
vlib_node_runtime_sync_stats (vm, rt, n_calls, n_vectors, n_clocks,
- n_ticks);
+ n_ticks0, n_ticks1);
n->stats_total.suspends += p->n_suspends;
p->n_suspends = 0;
}
@@ -599,7 +602,7 @@ vlib_node_sync_stats (vlib_main_t * vm, vlib_node_t * n)
vec_elt_at_index (vm->node_main.nodes_by_type[n->type],
n->runtime_index);
- vlib_node_runtime_sync_stats (vm, rt, 0, 0, 0, 0);
+ vlib_node_runtime_sync_stats (vm, rt, 0, 0, 0, 0, 0);
/* Sync up runtime next frame vector counters with main node structure. */
{
@@ -620,27 +623,30 @@ vlib_node_runtime_update_stats (vlib_main_t * vm,
vlib_node_runtime_t * node,
uword n_calls,
uword n_vectors, uword n_clocks,
- uword n_ticks)
+ uword n_ticks0, uword n_ticks1)
{
u32 ca0, ca1, v0, v1, cl0, cl1, r;
- u32 ptick0, ptick1, pvec0, pvec1;
+ u32 ptick00, ptick01, ptick10, ptick11, pvec0, pvec1;
cl0 = cl1 = node->clocks_since_last_overflow;
ca0 = ca1 = node->calls_since_last_overflow;
v0 = v1 = node->vectors_since_last_overflow;
- ptick0 = ptick1 = node->perf_counter_ticks_since_last_overflow;
+ ptick00 = ptick01 = node->perf_counter0_ticks_since_last_overflow;
+ ptick10 = ptick11 = node->perf_counter1_ticks_since_last_overflow;
pvec0 = pvec1 = node->perf_counter_vectors_since_last_overflow;
ca1 = ca0 + n_calls;
v1 = v0 + n_vectors;
cl1 = cl0 + n_clocks;
- ptick1 = ptick0 + n_ticks;
+ ptick01 = ptick00 + n_ticks0;
+ ptick11 = ptick10 + n_ticks1;
pvec1 = pvec0 + n_vectors;
node->calls_since_last_overflow = ca1;
node->clocks_since_last_overflow = cl1;
node->vectors_since_last_overflow = v1;
- node->perf_counter_ticks_since_last_overflow = ptick1;
+ node->perf_counter0_ticks_since_last_overflow = ptick01;
+ node->perf_counter1_ticks_since_last_overflow = ptick11;
node->perf_counter_vectors_since_last_overflow = pvec1;
node->max_clock_n = node->max_clock > n_clocks ?
@@ -649,38 +655,39 @@ vlib_node_runtime_update_stats (vlib_main_t * vm,
r = vlib_node_runtime_update_main_loop_vector_stats (vm, node, n_vectors);
- if (PREDICT_FALSE (ca1 < ca0 || v1 < v0 || cl1 < cl0) || (ptick1 < ptick0)
- || (pvec1 < pvec0))
+ if (PREDICT_FALSE (ca1 < ca0 || v1 < v0 || cl1 < cl0) || (ptick01 < ptick00)
+ || (ptick11 < ptick10) || (pvec1 < pvec0))
{
node->calls_since_last_overflow = ca0;
node->clocks_since_last_overflow = cl0;
node->vectors_since_last_overflow = v0;
- node->perf_counter_ticks_since_last_overflow = ptick0;
+ node->perf_counter0_ticks_since_last_overflow = ptick00;
+ node->perf_counter1_ticks_since_last_overflow = ptick10;
node->perf_counter_vectors_since_last_overflow = pvec0;
vlib_node_runtime_sync_stats (vm, node, n_calls, n_vectors, n_clocks,
- n_ticks);
+ n_ticks0, n_ticks1);
}
return r;
}
-static inline u64
-vlib_node_runtime_perf_counter (vlib_main_t * vm)
+static inline void
+vlib_node_runtime_perf_counter (vlib_main_t * vm, u64 * pmc0, u64 * pmc1)
{
+ *pmc0 = 0;
+ *pmc1 = 0;
if (PREDICT_FALSE (vm->vlib_node_runtime_perf_counter_cb != 0))
- return ((*vm->vlib_node_runtime_perf_counter_cb) (vm));
- return 0ULL;
+ (*vm->vlib_node_runtime_perf_counter_cb) (vm, pmc0, pmc1);
}
always_inline void
vlib_process_update_stats (vlib_main_t * vm,
vlib_process_t * p,
- uword n_calls, uword n_vectors, uword n_clocks,
- uword n_ticks)
+ uword n_calls, uword n_vectors, uword n_clocks)
{
vlib_node_runtime_update_stats (vm, &p->node_runtime,
- n_calls, n_vectors, n_clocks, n_ticks);
+ n_calls, n_vectors, n_clocks, 0ULL, 0ULL);
}
static clib_error_t *
@@ -1098,6 +1105,8 @@ dispatch_pcap_trace (vlib_main_t * vm,
}
}
+u64 oingo0, oingo1;
+
static_always_inline u64
dispatch_node (vlib_main_t * vm,
vlib_node_runtime_t * node,
@@ -1146,18 +1155,14 @@ dispatch_node (vlib_main_t * vm,
if (1 /* || vm->thread_index == node->thread_index */ )
{
- u64 pmc_before, pmc_delta;
+ u64 pmc_before[2], pmc_after[2], pmc_delta[2];
vlib_elog_main_loop_event (vm, node->node_index,
last_time_stamp,
frame ? frame->n_vectors : 0,
/* is_after */ 0);
- /*
- * To validate accounting: pmc_before = last_time_stamp
- * perf ticks should equal clocks/pkt...
- */
- pmc_before = vlib_node_runtime_perf_counter (vm);
+ vlib_node_runtime_perf_counter (vm, &pmc_before[0], &pmc_before[1]);
/*
* Turn this on if you run into
@@ -1191,7 +1196,10 @@ dispatch_node (vlib_main_t * vm,
* To validate accounting: pmc_delta = t - pmc_before;
* perf ticks should equal clocks/pkt...
*/
- pmc_delta = vlib_node_runtime_perf_counter (vm) - pmc_before;
+ vlib_node_runtime_perf_counter (vm, &pmc_after[0], &pmc_after[1]);
+
+ pmc_delta[0] = pmc_after[0] - pmc_before[0];
+ pmc_delta[1] = pmc_after[1] - pmc_before[1];
vlib_elog_main_loop_event (vm, node->node_index, t, n, /* is_after */
1);
@@ -1199,11 +1207,18 @@ dispatch_node (vlib_main_t * vm,
vm->main_loop_vectors_processed += n;
vm->main_loop_nodes_processed += n > 0;
+ if (pmc_delta[0] || pmc_delta[1])
+ {
+ oingo0 += pmc_delta[0];
+ oingo1 += pmc_delta[1];
+ }
+
v = vlib_node_runtime_update_stats (vm, node,
/* n_calls */ 1,
/* n_vectors */ n,
/* n_clocks */ t - last_time_stamp,
- pmc_delta /* PMC ticks */ );
+ pmc_delta[0] /* PMC0 */ ,
+ pmc_delta[1] /* PMC1 */ );
/* When in interrupt mode and vector rate crosses threshold switch to
polling mode. */
@@ -1542,8 +1557,7 @@ dispatch_process (vlib_main_t * vm,
vlib_process_update_stats (vm, p,
/* n_calls */ !is_suspend,
/* n_vectors */ n_vectors,
- /* n_clocks */ t - last_time_stamp,
- /* pmc_ticks */ 0ULL);
+ /* n_clocks */ t - last_time_stamp);
return t;
}
@@ -1626,8 +1640,7 @@ dispatch_suspended_process (vlib_main_t * vm,
vlib_process_update_stats (vm, p,
/* n_calls */ !is_suspend,
/* n_vectors */ n_vectors,
- /* n_clocks */ t - last_time_stamp,
- /* pmc_ticks */ 0ULL);
+ /* n_clocks */ t - last_time_stamp);
return t;
}
@@ -1677,9 +1690,6 @@ vlib_main_or_worker_loop (vlib_main_t * vm, int is_main)
if (!nm->interrupt_threshold_vector_length)
nm->interrupt_threshold_vector_length = 5;
- /* Make sure the performance monitor counter is disabled */
- vm->perf_counter_id = ~0;
-
/* Start all processes. */
if (is_main)
{
diff --git a/src/vlib/main.h b/src/vlib/main.h
index 91661fdaaad..4c6d0f490dd 100644
--- a/src/vlib/main.h
+++ b/src/vlib/main.h
@@ -84,9 +84,8 @@ typedef struct vlib_main_t
u32 node_counts_per_main_loop[2];
/* Main loop hw / sw performance counters */
- u64 (*vlib_node_runtime_perf_counter_cb) (struct vlib_main_t *);
- int perf_counter_id;
- int perf_counter_fd;
+ void (*vlib_node_runtime_perf_counter_cb) (struct vlib_main_t *,
+ u64 *, u64 *);
/* Every so often we switch to the next counter. */
#define VLIB_LOG2_MAIN_LOOPS_PER_STATS_UPDATE 7
diff --git a/src/vlib/node.h b/src/vlib/node.h
index f41eb60aa2c..8bb89f44bc7 100644
--- a/src/vlib/node.h
+++ b/src/vlib/node.h
@@ -258,7 +258,8 @@ typedef struct
u64 calls, vectors, clocks, suspends;
u64 max_clock;
u64 max_clock_n;
- u64 perf_counter_ticks;
+ u64 perf_counter0_ticks;
+ u64 perf_counter1_ticks;
u64 perf_counter_vectors;
} vlib_node_stats_t;
@@ -507,7 +508,8 @@ typedef struct vlib_node_runtime_t
u32 vectors_since_last_overflow; /**< Number of vector elements
processed by this node. */
- u32 perf_counter_ticks_since_last_overflow; /**< Perf counter ticks */
+ u32 perf_counter0_ticks_since_last_overflow; /**< Perf counter 0 ticks */
+ u32 perf_counter1_ticks_since_last_overflow; /**< Perf counter 1 ticks */
u32 perf_counter_vectors_since_last_overflow; /**< Perf counter vectors */
u32 next_frame_index; /**< Start of next frames for this
diff --git a/src/vlib/node_cli.c b/src/vlib/node_cli.c
index 062854af5bc..ad17c1d6f4d 100644
--- a/src/vlib/node_cli.c
+++ b/src/vlib/node_cli.c
@@ -148,8 +148,6 @@ format_vlib_node_stats (u8 * s, va_list * va)
f64 maxc, maxcn;
u32 maxn;
u32 indent;
- u64 pmc_ticks;
- f64 pmc_ticks_per_packet;
if (!n)
{
@@ -163,9 +161,6 @@ format_vlib_node_stats (u8 * s, va_list * va)
"%=30s%=12s%=16s%=16s%=16s%=16s%=16s",
"Name", "State", "Calls", "Vectors", "Suspends",
"Clocks", "Vectors/Call");
- if (vm->perf_counter_id)
- s = format (s, "%=16s", "Perf Ticks");
-
return s;
}
@@ -182,13 +177,6 @@ format_vlib_node_stats (u8 * s, va_list * va)
else
maxcn = 0.0;
- pmc_ticks = n->stats_total.perf_counter_ticks -
- n->stats_last_clear.perf_counter_ticks;
- if (p > 0)
- pmc_ticks_per_packet = (f64) pmc_ticks / (f64) p;
- else
- pmc_ticks_per_packet = 0.0;
-
/* Clocks per packet, per call or per suspend. */
x = 0;
if (p > 0)
@@ -221,9 +209,6 @@ format_vlib_node_stats (u8 * s, va_list * va)
s = format (s, "%-30v%=12U%16Ld%16Ld%16Ld%16.2e%16.2f", ns,
format_vlib_node_state, vm, n, c, p, d, x, v);
- if (pmc_ticks_per_packet > 0.0)
- s = format (s, "%16.2e", pmc_ticks_per_packet);
-
if (ns != n->name)
vec_free (ns);
diff --git a/src/vlibapi/node_serialize.c b/src/vlibapi/node_serialize.c
index 0774eea251e..b50d79e2922 100644
--- a/src/vlibapi/node_serialize.c
+++ b/src/vlibapi/node_serialize.c
@@ -57,7 +57,7 @@ vlib_node_serialize (vlib_main_t * vm, vlib_node_t *** node_dups, u8 * vector,
u8 *namep;
u32 name_bytes;
uword i, j, k;
- u64 l, v, c, d, pmc;
+ u64 l, v, c, d;
state_string_enum_t state_code;
serialize_open_vector (sm, vector);
@@ -77,8 +77,6 @@ vlib_node_serialize (vlib_main_t * vm, vlib_node_t *** node_dups, u8 * vector,
v = n->stats_total.vectors - n->stats_last_clear.vectors;
c = n->stats_total.calls - n->stats_last_clear.calls;
d = n->stats_total.suspends - n->stats_last_clear.suspends;
- pmc = n->stats_total.perf_counter_ticks
- - n->stats_last_clear.perf_counter_ticks;
state_code = STATE_INTERNAL;
@@ -151,8 +149,6 @@ vlib_node_serialize (vlib_main_t * vm, vlib_node_t *** node_dups, u8 * vector,
serialize_integer (sm, v, 8);
/* Total suspends */
serialize_integer (sm, d, 8);
- /* PMC counter */
- serialize_integer (sm, pmc, 8);
}
else /* no stats */
serialize_likely_small_unsigned_integer (sm, 0);
@@ -171,7 +167,7 @@ vlib_node_unserialize (u8 * vector)
vlib_node_t **nodes;
vlib_node_t ***nodes_by_thread = 0;
int i, j, k;
- u64 l, v, c, d, pmc;
+ u64 l, v, c, d;
state_string_enum_t state_code;
int stats_present;
@@ -229,9 +225,6 @@ vlib_node_unserialize (u8 * vector)
/* Total suspends */
unserialize_integer (sm, &d, 8);
node->stats_total.suspends = d;
- /* PMC counter */
- unserialize_integer (sm, &pmc, 8);
- node->stats_total.perf_counter_ticks = pmc;
}
}
}