/* * perfmon_periodic.c - skeleton plug-in periodic function * * Copyright (c) <current-year> <your-organization> * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include <vlib/vlib.h> #include <vppinfra/error.h> #include <perfmon/perfmon.h> #include <asm/unistd.h> #include <sys/ioctl.h> /* "not in glibc" */ static long perf_event_open (struct perf_event_attr *hw_event, pid_t pid, int cpu, int group_fd, unsigned long flags) { int ret; ret = syscall (__NR_perf_event_open, hw_event, pid, cpu, group_fd, flags); return ret; } static void read_current_perf_counters (vlib_main_t * vm, u64 * c0, u64 * c1, vlib_node_runtime_t * node, vlib_frame_t * frame, int before_or_after) { int i; u64 *cc; perfmon_main_t *pm = &perfmon_main; uword my_thread_index = vm->thread_index; *c0 = *c1 = 0; for (i = 0; i < pm->n_active; i++) { cc = (i == 0) ? c0 : c1; if (pm->rdpmc_indices[i][my_thread_index] != ~0) *cc = clib_rdpmc ((int) pm->rdpmc_indices[i][my_thread_index]); else { u64 sw_value; int read_result; if ((read_result = read (pm->pm_fds[i][my_thread_index], &sw_value, sizeof (sw_value)) != sizeof (sw_value))) { clib_unix_warning ("counter read returned %d, expected %d", read_result, sizeof (sw_value)); clib_callback_enable_disable (vm->vlib_node_runtime_perf_counter_cbs, vm->vlib_node_runtime_perf_counter_cb_tmp, vm->worker_thread_main_loop_callback_lock, read_current_perf_counters, 0 /* enable */ ); return; } *cc = sw_value; } } } static void clear_counters (perfmon_main_t * pm) { int i, j; vlib_main_t *vm = pm->vlib_main; vlib_main_t *stat_vm; vlib_node_main_t *nm; vlib_node_t *n; vlib_worker_thread_barrier_sync (vm); for (j = 0; j < vec_len (vlib_mains); j++) { stat_vm = vlib_mains[j]; if (stat_vm == 0) continue; nm = &stat_vm->node_main; /* Clear the node runtime perfmon counters */ for (i = 0; i < vec_len (nm->nodes); i++) { n = nm->nodes[i]; vlib_node_sync_stats (stat_vm, n); } /* And clear the node perfmon counters */ for (i = 0; i < vec_len (nm->nodes); i++) { n = nm->nodes[i]; n->stats_total.perf_counter0_ticks = 0; n->stats_total.perf_counter1_ticks = 0; n->stats_total.perf_counter_vectors = 0; n->stats_last_clear.perf_counter0_ticks = 0; n->stats_last_clear.perf_counter1_ticks = 0; n->stats_last_clear.perf_counter_vectors = 0; } } vlib_worker_thread_barrier_release (vm); } static void enable_current_events (perfmon_main_t * pm) { struct perf_event_attr pe; int fd; struct perf_event_mmap_page *p = 0; perfmon_event_config_t *c; vlib_main_t *vm = vlib_get_main (); u32 my_thread_index = vm->thread_index; u32 index; int i, limit = 1; int cpu; if ((pm->current_event + 1) < vec_len (pm->single_events_to_collect)) limit = 2; for (i = 0; i < limit; i++) { c = vec_elt_at_index (pm->single_events_to_collect, pm->current_event + i); memset (&pe, 0, sizeof (struct perf_event_attr)); pe.type = c->pe_type; pe.size = sizeof (struct perf_event_attr); pe.config = c->pe_config; pe.disabled = 1; pe.pinned = 1; /* * Note: excluding the kernel makes the * (software) context-switch counter read 0... */ if (pe.type != PERF_TYPE_SOFTWARE) { /* Exclude kernel and hypervisor */ pe.exclude_kernel = 1; pe.exclude_hv = 1; } cpu = vm->cpu_id; fd = perf_event_open (&pe, 0, cpu, -1, 0); if (fd == -1) { clib_unix_warning ("event open: type %d config %d", c->pe_type, c->pe_config); return; } if (pe.type != PERF_TYPE_SOFTWARE) { p = mmap (0, pm->page_size, PROT_READ, MAP_SHARED, fd, 0); if (p == MAP_FAILED) { clib_unix_warning ("mmap"); close (fd); return; } } else p = 0; if (ioctl (fd, PERF_EVENT_IOC_RESET, 0) < 0) clib_unix_warning ("reset ioctl"); if (ioctl (fd, PERF_EVENT_IOC_ENABLE, 0) < 0) clib_unix_warning ("enable ioctl"); pm->perf_event_pages[i][my_thread_index] = (void *) p; pm->pm_fds[i][my_thread_index] = fd; } /* * Hardware events must be all opened and enabled before aquiring * pmc indices, otherwise the pmc indices might be out-dated. */ for (i = 0; i < limit; i++) { p = (struct perf_event_mmap_page *) pm->perf_event_pages[i][my_thread_index]; /* * Software event counters - and others not capable of being * read via the "rdpmc" instruction - will be read * by system calls. */ if (p == 0 || p->cap_user_rdpmc == 0) index = ~0; else index = p->index - 1; pm->rdpmc_indices[i][my_thread_index] = index; } pm->n_active = i; /* Enable the main loop counter snapshot mechanism */ clib_callback_enable_disable (vm->vlib_node_runtime_perf_counter_cbs, vm->vlib_node_runtime_perf_counter_cb_tmp, vm->worker_thread_main_loop_callback_lock, read_current_perf_counters, 1 /* enable */ ); } static void disable_events (perfmon_main_t * pm) { vlib_main_t *vm = vlib_get_main (); u32 my_thread_index = vm->thread_index; int i; /* Stop main loop collection */ clib_callback_enable_disable (vm->vlib_node_runtime_perf_counter_cbs, vm->vlib_node_runtime_perf_counter_cb_tmp, vm->worker_thread_main_loop_callback_lock, read_current_perf_counters, 0 /* enable */ ); for (i = 0; i < pm->n_active; i++) { if (pm->pm_fds[i][my_thread_index] == 0) continue; if (ioctl (pm->pm_fds[i][my_thread_index], PERF_EVENT_IOC_DISABLE, 0) < 0) clib_unix_warning ("disable ioctl"); if (pm->perf_event_pages[i][my_thread_index]) if (munmap (pm->perf_event_pages[i][my_thread_index], pm->page_size) < 0) clib_unix_warning ("munmap"); (void) close (pm->pm_fds[i][my_thread_index]); pm->pm_fds[i][my_thread_index] = 0; } } static void worker_thread_start_event (vlib_main_t * vm) { perfmon_main_t *pm = &perfmon_main; clib_callback_enable_disable (vm->worker_thread_main_loop_callbacks, vm->worker_thread_main_loop_callback_tmp, vm->worker_thread_main_loop_callback_lock, worker_thread_start_event, 0 /* enable */ ); enable_current_events (pm); } static void worker_thread_stop_event (vlib_main_t * vm) { perfmon_main_t *pm = &perfmon_main; clib_callback_enable_disable (vm->worker_thread_main_loop_callbacks, vm->worker_thread_main_loop_callback_tmp, vm->worker_thread_main_loop_callback_lock, worker_thread_stop_event, 0 /* enable */ ); disable_events (pm); } static void start_event (perfmon_main_t * pm, f64 now, uword event_data) { int i; int last_set; int all = 0; pm->current_event = 0; if (vec_len (pm->single_events_to_collect) == 0) { pm->state = PERFMON_STATE_OFF; return; } last_set = clib_bitmap_last_set (pm->thread_bitmap); all = (last_set == ~0); pm->state = PERFMON_STATE_RUNNING; clear_counters (pm); /* Start collection on thread 0? */ if (all || clib_bitmap_get (pm->thread_bitmap, 0)) { /* Start collection on this thread */ enable_current_events (pm); } /* And also on worker threads */ for (i = 1; i < vec_len (vlib_mains); i++) { if (vlib_mains[i] == 0) continue; if (all || clib_bitmap_get (pm->thread_bitmap, i)) clib_callback_enable_disable (vlib_mains[i]->worker_thread_main_loop_callbacks, vlib_mains[i]->worker_thread_main_loop_callback_tmp, vlib_mains[i]->worker_thread_main_loop_callback_lock, (void *) worker_thread_start_event, 1 /* enable */ ); } } void scrape_and_clear_counters (perfmon_main_t * pm) { int i, j, k; vlib_main_t *vm = pm->vlib_main; vlib_main_t *stat_vm; vlib_node_main_t *nm; vlib_node_t ***node_dups = 0; vlib_node_t **nodes; vlib_node_t *n; perfmon_capture_t *c; perfmon_event_config_t *current_event; uword *p; u8 *counter_name; u64 vectors_this_counter; /* snapshoot the nodes, including pm counters */ vlib_worker_thread_barrier_sync (vm); for (j = 0; j < vec_len (vlib_mains); j++) { stat_vm = vlib_mains[j]; if (stat_vm == 0) continue; nm = &stat_vm->node_main; for (i = 0; i < vec_len (nm->nodes); i++) { n = nm->nodes[i]; vlib_node_sync_stats (stat_vm, n); } nodes = 0; vec_validate (nodes, vec_len (nm->nodes) - 1); vec_add1 (node_dups, nodes); /* Snapshoot and clear the per-node perfmon counters */ for (i = 0; i < vec_len (nm->nodes); i++) { n = nm->nodes[i]; nodes[i] = clib_mem_alloc (sizeof (*n)); clib_memcpy_fast (nodes[i], n, sizeof (*n)); n->stats_total.perf_counter0_ticks = 0; n->stats_total.perf_counter1_ticks = 0; n->stats_total.perf_counter_vectors = 0; n->stats_last_clear.perf_counter0_ticks = 0; n->stats_last_clear.perf_counter1_ticks = 0; n->stats_last_clear.perf_counter_vectors = 0; } } vlib_worker_thread_barrier_release (vm); for (j = 0; j < vec_len (vlib_mains); j++) { stat_vm = vlib_mains[j]; if (stat_vm == 0) continue; nodes = node_dups[j]; for (i = 0; i < vec_len (nodes); i++) { u8 *capture_name; n = nodes[i]; if (n->stats_total.perf_counter0_ticks == 0 && n->stats_total.perf_counter1_ticks == 0) goto skip_this_node; for (k = 0; k < 2; k++) { u64 counter_value, counter_last_clear; /* * We collect 2 counters at once, except for the * last counter when the user asks for an odd number of * counters */ if ((pm->current_event + k) >= vec_len (pm->single_events_to_collect)) break; if (k == 0) { counter_value = n->stats_total.perf_counter0_ticks; counter_last_clear = n->stats_last_clear.perf_counter0_ticks; } else { counter_value = n->stats_total.perf_counter1_ticks; counter_last_clear = n->stats_last_clear.perf_counter1_ticks; } capture_name = format (0, "t%d-%v%c", j, n->name, 0); p = hash_get_mem (pm->capture_by_thread_and_node_name, capture_name); if (p == 0) { pool_get (pm->capture_pool, c); memset (c, 0, sizeof (*c)); c->thread_and_node_name = capture_name; hash_set_mem (pm->capture_by_thread_and_node_name, capture_name, c - pm->capture_pool); } else { c = pool_elt_at_index (pm->capture_pool, p[0]); vec_free (capture_name); } /* Snapshoot counters, etc. into the capture */ current_event = pm->single_events_to_collect + pm->current_event + k; counter_name = (u8 *) current_event->name; vectors_this_counter = n->stats_total.perf_counter_vectors - n->stats_last_clear.perf_counter_vectors; vec_add1 (c->counter_names, counter_name); vec_add1 (c->counter_values, counter_value - counter_last_clear); vec_add1 (c->vectors_this_counter, vectors_this_counter); } skip_this_node: clib_mem_free (n); } vec_free (nodes); } vec_free (node_dups); } static void handle_timeout (vlib_main_t * vm, perfmon_main_t * pm, f64 now) { int i; int last_set, all; last_set = clib_bitmap_last_set (pm->thread_bitmap); all = (last_set == ~0); if (all || clib_bitmap_get (pm->thread_bitmap, 0)) disable_events (pm); /* And also on worker threads */ for (i = 1; i < vec_len (vlib_mains); i++) { if (vlib_mains[i] == 0) continue; if (all || clib_bitmap_get (pm->thread_bitmap, i)) clib_callback_enable_disable (vlib_mains[i]->worker_thread_main_loop_callbacks, vlib_mains[i]->worker_thread_main_loop_callback_tmp, vlib_mains[i]->worker_thread_main_loop_callback_lock, (void *) worker_thread_stop_event, 1 /* enable */ ); } /* Make sure workers have stopped collection */ if (i > 1) { f64 deadman = vlib_time_now (vm) + 1.0; for (i = 1; i < vec_len (vlib_mains); i++) { /* Has the worker actually stopped collecting data? */ while (clib_callback_is_set (vlib_mains[i]->worker_thread_main_loop_callbacks, vlib_mains[i]->worker_thread_main_loop_callback_lock, read_current_perf_counters)) { if (vlib_time_now (vm) > deadman) { clib_warning ("Thread %d deadman timeout!", i); break; } vlib_process_suspend (pm->vlib_main, 1e-3); } } } scrape_and_clear_counters (pm); pm->current_event += pm->n_active; if (pm->current_event >= vec_len (pm->single_events_to_collect)) { pm->current_event = 0; pm->state = PERFMON_STATE_OFF; return; } if (all || clib_bitmap_get (pm->thread_bitmap, 0)) enable_current_events (pm); /* And also on worker threads */ for (i = 1; i < vec_len (vlib_mains); i++) { if (vlib_mains[i] == 0) continue; if (all || clib_bitmap_get (pm->thread_bitmap, i)) clib_callback_enable_disable (vlib_mains[i]->worker_thread_main_loop_callbacks, vlib_mains[i]->worker_thread_main_loop_callback_tmp, vlib_mains[i]->worker_thread_main_loop_callback_lock, worker_thread_start_event, 1 /* enable */ ); } } static uword perfmon_periodic_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f) { perfmon_main_t *pm = &perfmon_main; f64 now; uword *event_data = 0; uword event_type; int i; while (1) { if (pm->state == PERFMON_STATE_RUNNING) vlib_process_wait_for_event_or_clock (vm, pm->timeout_interval); else vlib_process_wait_for_event (vm); now = vlib_time_now (vm); event_type = vlib_process_get_events (vm, (uword **) & event_data); switch (event_type) { case PERFMON_START: for (i = 0; i < vec_len (event_data); i++) start_event (pm, now, event_data[i]); break; /* Handle timeout */ case ~0: handle_timeout (vm, pm, now); break; default: clib_warning ("Unexpected event %d", event_type); break; } vec_reset_length (event_data); } return 0; /* or not */ } /* *INDENT-OFF* */ VLIB_REGISTER_NODE (perfmon_periodic_node) = { .function = perfmon_periodic_process, .type = VLIB_NODE_TYPE_PROCESS, .name = "perfmon-periodic-process", }; /* *INDENT-ON* */ /* * fd.io coding-style-patch-verification: ON * * Local Variables: * eval: (c-set-style "gnu") * End: */