/*
* perfmon_periodic.c - skeleton plug-in periodic function
*
* Copyright (c) <current-year> <your-organization>
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <vlib/vlib.h>
#include <vppinfra/error.h>
#include <perfmon/perfmon.h>
#include <asm/unistd.h>
#include <sys/ioctl.h>
/* "not in glibc" */
static long
perf_event_open (struct perf_event_attr *hw_event, pid_t pid, int cpu,
int group_fd, unsigned long flags)
{
int ret;
ret = syscall (__NR_perf_event_open, hw_event, pid, cpu, group_fd, flags);
return ret;
}
static void
read_current_perf_counters (vlib_main_t * vm, u64 * c0, u64 * c1,
vlib_node_runtime_t * node,
vlib_frame_t * frame, int before_or_after)
{
int i;
u64 *cc;
perfmon_main_t *pm = &perfmon_main;
uword my_thread_index = vm->thread_index;
*c0 = *c1 = 0;
for (i = 0; i < pm->n_active; i++)
{
cc = (i == 0) ? c0 : c1;
if (pm->rdpmc_indices[i][my_thread_index] != ~0)
*cc = clib_rdpmc ((int) pm->rdpmc_indices[i][my_thread_index]);
else
{
u64 sw_value;
int read_result;
if ((read_result = read (pm->pm_fds[i][my_thread_index], &sw_value,
sizeof (sw_value)) != sizeof (sw_value)))
{
clib_unix_warning
("counter read returned %d, expected %d",
read_result, sizeof (sw_value));
clib_callback_enable_disable
(vm->vlib_node_runtime_perf_counter_cbs,
vm->vlib_node_runtime_perf_counter_cb_tmp,
vm->worker_thread_main_loop_callback_lock,
read_current_perf_counters, 0 /* enable */ );
return;
}
*cc = sw_value;
}
}
}
static void
clear_counters (perfmon_main_t * pm)
{
int i, j;
vlib_main_t *vm = pm->vlib_main;
vlib_main_t *stat_vm;
vlib_node_main_t *nm;
vlib_node_t *n;
vlib_worker_thread_barrier_sync (vm);
for (j = 0; j < vec_len (vlib_mains); j++)
{
stat_vm = vlib_mains[j];
if (stat_vm == 0)
continue;
nm = &stat_vm->node_main;
/* Clear the node runtime perfmon counters */
for (i = 0; i < vec_len (nm->nodes); i++)
{
n = nm->nodes[i];
vlib_node_sync_stats (stat_vm, n);
}
/* And clear the node perfmon counters */
for (i = 0; i < vec_len (nm->nodes); i++)
{
n = nm->nodes[i];
n->stats_total.perf_counter0_ticks = 0;
n->stats_total.perf_counter1_ticks = 0;
n->stats_total.perf_counter_vectors = 0;
n->stats_last_clear.perf_counter0_ticks = 0;
n->stats_last_clear.perf_counter1_ticks = 0;
n->stats_last_clear.perf_counter_vectors = 0;
}
}
vlib_worker_thread_barrier_release (vm);
}
static void
enable_current_events (perfmon_main_t * pm)
{
struct perf_event_attr pe;
int fd;
struct perf_event_mmap_page *p = 0;
perfmon_event_config_t *c;
vlib_main_t *vm = vlib_get_main ();
u32 my_thread_index = vm->thread_index;
u32 index;
int i, limit = 1;
int cpu;
if ((pm->current_event + 1) < vec_len (pm->single_events_to_collect))
limit = 2;
for (i = 0; i < limit; i++)
{
vec_validate (pm->pm_fds[i], vec_len (vlib_mains) - 1);
vec_validate (pm->perf_event_pages[i], vec_len (vlib_mains) - 1);
vec_validate (pm->rdpmc_indices[i], vec_len (vlib_mains) - 1);
c = vec_elt_at_index (pm->single_events_to_collect,
pm->current_event + i);
memset (&pe, 0, sizeof (struct perf_event_attr));
pe.type = c->pe_type;
pe.size = sizeof (struct perf_event_attr);
pe.config = c->pe_config;
pe.disabled = 1;
pe.pinned = 1;
/*
* Note: excluding the kernel makes the
* (software) context-switch counter read 0...
*/
if (pe.type != PERF_TYPE_SOFTWARE)
{
/* Exclude kernel and hypervisor */
pe.exclude_kernel = 1;
pe.exclude_hv = 1;
}
cpu = vm->cpu_id;
fd = perf_event_open (&pe, 0, cpu, -1, 0);
if (fd == -1)
{
clib_unix_warning ("event open: type %d config %d", c->pe_type,
c->pe_config);
return;
}
if (pe.type != PERF_TYPE_SOFTWARE)
{
p = mmap (0, pm->page_size, PROT_READ, MAP_SHARED, fd, 0);
if (p == MAP_FAILED)
{
clib_unix_warning ("mmap");
close (fd);
return;
}
CLIB_MEM_UNPOISON (p, pm->page_size);
}
else
p = 0;
if (ioctl (fd, PERF_EVENT_IOC_RESET, 0) < 0)
clib_unix_warning ("reset ioctl");
if (ioctl (fd, PERF_EVENT_IOC_ENABLE, 0) < 0)
clib_unix_warning ("enable ioctl");
pm->perf_event_pages[i][my_thread_index] = (void *) p;
pm->pm_fds[i][my_thread_index] = fd;
}
/*
* Hardware events must be all opened and enabled before aquiring
* pmc indices, otherwise the pmc indices might be out-dated.
*/
for (i = 0; i < limit; i++)
{
p =
(struct perf_event_mmap_page *)
pm->perf_event_pages[i][my_thread_index];
/*
* Software event counters - and others not capable of being
* read via the "rdpmc" instruction - will be read
* by system calls.
*/
if (p == 0 || p->cap_user_rdpmc == 0)
index = ~0;
else
index = p->index - 1;
pm->rdpmc_indices[i][my_thread_index] = index;
}
pm->n_active = i;
/* Enable the main loop counter snapshot mechanism */
clib_callback_enable_disable
(vm->vlib_node_runtime_perf_counter_cbs,
vm->vlib_node_runtime_perf_counter_cb_tmp,
vm->worker_thread_main_loop_callback_lock,
read_current_perf_counters, 1 /* enable */ );
}
static void
disable_events (perfmon_main_t * pm)
{
vlib_main_t *vm = vlib_get_main ();
u32 my_thread_index = vm->thread_index;
int i;
/* Stop main loop collection */
clib_callback_enable_disable
(vm->vlib_node_runtime_perf_counter_cbs,
vm->vlib_node_runtime_perf_counter_cb_tmp,
vm->worker_thread_main_loop_callback_lock,
read_current_perf_counters, 0 /* enable */ );
for (i = 0; i < pm->n_active; i++)
{
if (pm->pm_fds[i][my_thread_index] == 0)
continue;
if (ioctl (pm->pm_fds[i][my_thread_index], PERF_EVENT_IOC_DISABLE, 0) <
0)
clib_unix_warning ("disable ioctl");
if (pm->perf_event_pages[i][my_thread_index])
{
if (munmap (pm->perf_event_pages[i][my_thread_index],
pm->page_size) < 0)
clib_unix_warning ("munmap");
CLIB_MEM_POISON (pm->perf_event_pages[i][my_thread_index],
pm->page_size);
pm->perf_event_pages[i][my_thread_index] = 0;
}
(void) close (pm->pm_fds[i][my_thread_index]);
pm->pm_fds[i][my_thread_index] = 0;
}
}
static void
worker_thread_start_event (vlib_main_t * vm)
{
perfmon_main_t *pm = &perfmon_main;
clib_callback_enable_disable (vm->worker_thread_main_loop_callbacks,
vm->worker_thread_main_loop_callback_tmp,
vm->worker_thread_main_loop_callback_lock,
worker_thread_start_event, 0 /* enable */ );
enable_current_events (pm);
}
static void
worker_thread_stop_event (vlib_main_t * vm)
{
perfmon_main_t *pm = &perfmon_main;
clib_callback_enable_disable (vm->worker_thread_main_loop_callbacks,
vm->worker_thread_main_loop_callback_tmp,
vm->worker_thread_main_loop_callback_lock,
worker_thread_stop_event, 0 /* enable */ );
disable_events (pm);
}
static void
start_event (perfmon_main_t * pm, f64 now, uword event_data)
{
int i;
int last_set;
int all = 0;
pm->current_event = 0;
if (vec_len (pm->single_events_to_collect) == 0)
{
pm->state = PERFMON_STATE_OFF;
return;
}
last_set = clib_bitmap_last_set (pm->thread_bitmap);
all = (last_set == ~0);
pm->state = PERFMON_STATE_RUNNING;
clear_counters (pm);
/* Start collection on thread 0? */
if (all || clib_bitmap_get (pm->thread_bitmap, 0))
{
/* Start collection on this thread */
enable_current_events (pm);
}
/* And also on worker threads */
for (i = 1; i < vec_len (vlib_mains); i++)
{
if (vlib_mains[i] == 0)
continue;
if (all || clib_bitmap_get (pm->thread_bitmap, i))
clib_callback_enable_disable
(vlib_mains[i]->worker_thread_main_loop_callbacks,
vlib_mains[i]->worker_thread_main_loop_callback_tmp,
vlib_mains[i]->worker_thread_main_loop_callback_lock,
(void *) worker_thread_start_event, 1 /* enable */ );
}
}
void
scrape_and_clear_counters (perfmon_main_t * pm)
{
int i, j, k;
vlib_main_t *vm = pm->vlib_main;
vlib_main_t *stat_vm;
vlib_node_main_t *nm;
vlib_node_t ***node_dups = 0;
vlib_node_t **nodes;
vlib_node_t *n;
perfmon_capture_t *c;
perfmon_event_config_t *current_event;
uword *p;
u8 *counter_name;
u64 vectors_this_counter;
/* snapshoot the nodes, including pm counters */
vlib_worker_thread_barrier_sync (vm);
for (j = 0; j < vec_len (vlib_mains); j++)
{
stat_vm = vlib_mains[j];
if (stat_vm == 0)
continue;
nm = &stat_vm->node_main;
for (i = 0; i < vec_len (nm->nodes); i++)
{
n = nm->nodes[i];
vlib_node_sync_stats (stat_vm, n);
}
nodes = 0;
vec_validate (nodes, vec_len (nm->nodes) - 1);
vec_add1 (node_dups, nodes);
/* Snapshoot and clear the per-node perfmon counters */
for (i = 0; i < vec_len (nm->nodes); i++)
{
n = nm->nodes[i];
nodes[i] = clib_mem_alloc (sizeof (*n));
clib_memcpy_fast (nodes[i], n, sizeof (*n));
n->stats_total.perf_counter0_ticks = 0;
n->stats_total.perf_counter1_ticks = 0;
n->stats_total.perf_counter_vectors = 0;
n->stats_last_clear.perf_counter0_ticks = 0;
n->stats_last_clear.perf_counter1_ticks = 0;
n->stats_last_clear.perf_counter_vectors = 0;
}
}
vlib_worker_thread_barrier_release (vm);
for (j = 0; j < vec_len (vlib_mains); j++)
{
stat_vm = vlib_mains[j];
if (stat_vm == 0)
continue;
nodes = node_dups[j];
for (i = 0; i < vec_len (nodes); i++)
{
u8 *capture_name;
n = nodes[i];
if (n->stats_total.perf_counter0_ticks == 0 &&
n->stats_total.perf_counter1_ticks == 0)
goto skip_this_node;
for (k = 0; k < 2; k++)
{
u64 counter_value, counter_last_clear;
/*
* We collect 2 counters at once, except for the
* last counter when the user asks for an odd number of
* counters
*/
if ((pm->current_event + k)
>= vec_len (pm->single_events_to_collect))
break;
if (k == 0)
{
counter_value = n->stats_total.perf_counter0_ticks;
counter_last_clear =
n->stats_last_clear.perf_counter0_ticks;
}
else
{
counter_value = n->stats_total.perf_counter1_ticks;
counter_last_clear =
n->stats_last_clear.perf_counter1_ticks;
}
capture_name = format (0, "t%d-%v%c", j, n->name, 0);
p = hash_get_mem (pm->capture_by_thread_and_node_name,
capture_name);
if (p == 0)
{
pool_get (pm->capture_pool, c);
memset (c, 0, sizeof (*c));
c->thread_and_node_name = capture_name;
hash_set_mem (pm->capture_by_thread_and_node_name,
capture_name, c - pm->capture_pool);
}
else
{
c = pool_elt_at_index (pm->capture_pool, p[0]);
vec_free (capture_name);
}
/* Snapshoot counters, etc. into the capture */
current_event = pm->single_events_to_collect
+ pm->current_event + k;
counter_name = (u8 *) current_event->name;
vectors_this_counter = n->stats_total.perf_counter_vectors -
n->stats_last_clear.perf_counter_vectors;
vec_add1 (c->counter_names, counter_name);
vec_add1 (c->counter_values,
counter_value - counter_last_clear);
vec_add1 (c->vectors_this_counter, vectors_this_counter);
}
skip_this_node:
clib_mem_free (n);
}
vec_free (nodes);
}
vec_free (node_dups);
}
static void
handle_timeout (vlib_main_t * vm, perfmon_main_t * pm, f64 now)
{
int i;
int last_set, all;
last_set = clib_bitmap_last_set (pm->thread_bitmap);
all = (last_set == ~0);
if (all || clib_bitmap_get (pm->thread_bitmap, 0))
disable_events (pm);
/* And also on worker threads */
for (i = 1; i < vec_len (vlib_mains); i++)
{
if (vlib_mains[i] == 0)
continue;
if (all || clib_bitmap_get (pm->thread_bitmap, i))
clib_callback_enable_disable
(vlib_mains[i]->worker_thread_main_loop_callbacks,
vlib_mains[i]->worker_thread_main_loop_callback_tmp,
vlib_mains[i]->worker_thread_main_loop_callback_lock,
(void *) worker_thread_stop_event, 1 /* enable */ );
}
/* Make sure workers have stopped collection */
if (i > 1)
{
f64 deadman = vlib_time_now (vm) + 1.0;
for (i = 1; i < vec_len (vlib_mains); i++)
{
/* Has the worker actually stopped collecting data? */
while (clib_callback_is_set
(vlib_mains[i]->worker_thread_main_loop_callbacks,
vlib_mains[i]->worker_thread_main_loop_callback_lock,
read_current_perf_counters))
{
if (vlib_time_now (vm) > deadman)
{
clib_warning ("Thread %d deadman timeout!", i);
break;
}
vlib_process_suspend (pm->vlib_main, 1e-3);
}
}
}
scrape_and_clear_counters (pm);
pm->current_event += pm->n_active;
if (pm->current_event >= vec_len (pm->single_events_to_collect))
{
pm->current_event = 0;
pm->state = PERFMON_STATE_OFF;
return;
}
if (all || clib_bitmap_get (pm->thread_bitmap, 0))
enable_current_events (pm);
/* And also on worker threads */
for (i = 1; i < vec_len (vlib_mains); i++)
{
if (vlib_mains[i] == 0)
continue;
if (all || clib_bitmap_get (pm->thread_bitmap, i))
clib_callback_enable_disable
(vlib_mains[i]->worker_thread_main_loop_callbacks,
vlib_mains[i]->worker_thread_main_loop_callback_tmp,
vlib_mains[i]->worker_thread_main_loop_callback_lock,
worker_thread_start_event, 1 /* enable */ );
}
}
static uword
perfmon_periodic_process (vlib_main_t * vm,
vlib_node_runtime_t * rt, vlib_frame_t * f)
{
perfmon_main_t *pm = &perfmon_main;
f64 now;
uword *event_data = 0;
uword event_type;
int i;
while (1)
{
if (pm->state == PERFMON_STATE_RUNNING)
vlib_process_wait_for_event_or_clock (vm, pm->timeout_interval);
else
vlib_process_wait_for_event (vm);
now = vlib_time_now (vm);
event_type = vlib_process_get_events (vm, (uword **) & event_data);
switch (event_type)
{
case PERFMON_START:
for (i = 0; i < vec_len (event_data); i++)
start_event (pm, now, event_data[i]);
break;
/* Handle timeout */
case ~0:
handle_timeout (vm, pm, now);
break;
default:
clib_warning ("Unexpected event %d", event_type);
break;
}
vec_reset_length (event_data);
}
return 0; /* or not */
}
/* *INDENT-OFF* */
VLIB_REGISTER_NODE (perfmon_periodic_node) =
{
.function = perfmon_periodic_process,
.type = VLIB_NODE_TYPE_PROCESS,
.name = "perfmon-periodic-process",
};
/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
*
* Local Variables:
* eval: (c-set-style "gnu")
* End:
*/