diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/plugins/perfmon/perfmon.c | 62 | ||||
-rw-r--r-- | src/plugins/perfmon/perfmon.h | 14 | ||||
-rw-r--r-- | src/plugins/perfmon/perfmon_periodic.c | 295 | ||||
-rw-r--r-- | src/vlib/main.c | 86 | ||||
-rw-r--r-- | src/vlib/main.h | 5 | ||||
-rw-r--r-- | src/vlib/node.h | 6 | ||||
-rw-r--r-- | src/vlib/node_cli.c | 15 | ||||
-rw-r--r-- | src/vlibapi/node_serialize.c | 11 |
8 files changed, 284 insertions, 210 deletions
diff --git a/src/plugins/perfmon/perfmon.c b/src/plugins/perfmon/perfmon.c index c6a80224e0e..359555705aa 100644 --- a/src/plugins/perfmon/perfmon.c +++ b/src/plugins/perfmon/perfmon.c @@ -157,10 +157,16 @@ perfmon_init (vlib_main_t * vm) pm->log_class = vlib_log_register_class ("perfmon", 0); /* Default data collection interval */ - pm->timeout_interval = 3.0; - vec_validate (pm->pm_fds, vec_len (vlib_mains) - 1); - vec_validate (pm->perf_event_pages, vec_len (vlib_mains) - 1); - vec_validate (pm->rdpmc_indices, vec_len (vlib_mains) - 1); + pm->timeout_interval = 2.0; /* seconds */ + vec_validate (pm->pm_fds, 1); + vec_validate (pm->pm_fds[0], vec_len (vlib_mains) - 1); + vec_validate (pm->pm_fds[1], vec_len (vlib_mains) - 1); + vec_validate (pm->perf_event_pages, 1); + vec_validate (pm->perf_event_pages[0], vec_len (vlib_mains) - 1); + vec_validate (pm->perf_event_pages[1], vec_len (vlib_mains) - 1); + vec_validate (pm->rdpmc_indices, 1); + vec_validate (pm->rdpmc_indices[0], vec_len (vlib_mains) - 1); + vec_validate (pm->rdpmc_indices[1], vec_len (vlib_mains) - 1); pm->page_size = getpagesize (); ht = pm->perfmon_table = 0; @@ -297,10 +303,12 @@ set_pmc_command_fn (vlib_main_t * vm, perfmon_main_t *pm = &perfmon_main; unformat_input_t _line_input, *line_input = &_line_input; perfmon_event_config_t ec; + f64 delay; u32 timeout_seconds; u32 deadman; - vec_reset_length (pm->events_to_collect); + vec_reset_length (pm->single_events_to_collect); + vec_reset_length (pm->paired_events_to_collect); pm->ipc_event_index = ~0; pm->mispredict_event_index = ~0; @@ -316,28 +324,28 @@ set_pmc_command_fn (vlib_main_t * vm, ec.name = "instructions"; ec.pe_type = PERF_TYPE_HARDWARE; ec.pe_config = PERF_COUNT_HW_INSTRUCTIONS; - pm->ipc_event_index = vec_len (pm->events_to_collect); - vec_add1 (pm->events_to_collect, ec); + pm->ipc_event_index = vec_len (pm->paired_events_to_collect); + vec_add1 (pm->paired_events_to_collect, ec); ec.name = "cpu-cycles"; ec.pe_type = PERF_TYPE_HARDWARE; ec.pe_config = PERF_COUNT_HW_CPU_CYCLES; - vec_add1 (pm->events_to_collect, ec); + vec_add1 (pm->paired_events_to_collect, ec); } else if (unformat (line_input, "branch-mispredict-rate")) { ec.name = "branch-misses"; ec.pe_type = PERF_TYPE_HARDWARE; ec.pe_config = PERF_COUNT_HW_BRANCH_MISSES; - pm->mispredict_event_index = vec_len (pm->events_to_collect); - vec_add1 (pm->events_to_collect, ec); + pm->mispredict_event_index = vec_len (pm->paired_events_to_collect); + vec_add1 (pm->paired_events_to_collect, ec); ec.name = "branches"; ec.pe_type = PERF_TYPE_HARDWARE; ec.pe_config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS; - vec_add1 (pm->events_to_collect, ec); + vec_add1 (pm->paired_events_to_collect, ec); } else if (unformat (line_input, "%U", unformat_processor_event, pm, &ec)) { - vec_add1 (pm->events_to_collect, ec); + vec_add1 (pm->single_events_to_collect, ec); } #define _(type,event,str) \ else if (unformat (line_input, str)) \ @@ -345,7 +353,7 @@ set_pmc_command_fn (vlib_main_t * vm, ec.name = str; \ ec.pe_type = type; \ ec.pe_config = event; \ - vec_add1 (pm->events_to_collect, ec); \ + vec_add1 (pm->single_events_to_collect, ec); \ } foreach_perfmon_event #undef _ @@ -354,21 +362,33 @@ set_pmc_command_fn (vlib_main_t * vm, format_unformat_error, line_input); } - if (vec_len (pm->events_to_collect) == 0) + /* Stick paired events at the front of the (unified) list */ + if (vec_len (pm->paired_events_to_collect) > 0) + { + perfmon_event_config_t *tmp; + /* first 2n events are pairs... */ + vec_append (pm->paired_events_to_collect, pm->single_events_to_collect); + tmp = pm->single_events_to_collect; + pm->single_events_to_collect = pm->paired_events_to_collect; + pm->paired_events_to_collect = tmp; + } + + if (vec_len (pm->single_events_to_collect) == 0) return clib_error_return (0, "no events specified..."); + /* Figure out how long data collection will take */ + delay = + ((f64) vec_len (pm->single_events_to_collect)) * pm->timeout_interval; + delay /= 2.0; /* collect 2 stats at once */ + vlib_cli_output (vm, "Start collection for %d events, wait %.2f seconds", - vec_len (pm->events_to_collect), - (f64) (vec_len (pm->events_to_collect)) - * pm->timeout_interval); + vec_len (pm->single_events_to_collect), delay); vlib_process_signal_event (pm->vlib_main, perfmon_periodic_node.index, PERFMON_START, 0); /* Coarse-grained wait */ - vlib_process_suspend (vm, - ((f64) (vec_len (pm->events_to_collect) - * pm->timeout_interval))); + vlib_process_suspend (vm, delay); deadman = 0; /* Reasonable to guess that collection may not be quite done... */ @@ -438,7 +458,7 @@ format_capture (u8 * s, va_list * args) if (i == pm->ipc_event_index) { f64 ipc_rate; - ASSERT (i + 1 < vec_len (c->counter_names)); + ASSERT ((i + 1) < vec_len (c->counter_names)); if (c->counter_values[i + 1] > 0) ipc_rate = (f64) c->counter_values[i] diff --git a/src/plugins/perfmon/perfmon.h b/src/plugins/perfmon/perfmon.h index 47ee471d5fc..9663dae36d1 100644 --- a/src/plugins/perfmon/perfmon.h +++ b/src/plugins/perfmon/perfmon.h @@ -97,8 +97,11 @@ typedef struct perfmon_cpuid_and_table_t *perfmon_tables; uword *perfmon_table; - /* vector of events to collect */ - perfmon_event_config_t *events_to_collect; + /* vector of single events to collect */ + perfmon_event_config_t *single_events_to_collect; + + /* vector of paired events to collect */ + perfmon_event_config_t *paired_events_to_collect; /* Base indices of synthetic event tuples */ u32 ipc_event_index; @@ -109,13 +112,14 @@ typedef struct /* Current event (index) being collected */ u32 current_event; - u32 *rdpmc_indices; + int n_active; + u32 **rdpmc_indices; /* mmap base / size of (mapped) struct perf_event_mmap_page */ - u8 **perf_event_pages; + u8 ***perf_event_pages; u32 page_size; /* Current perf_event file descriptors, per thread */ - int *pm_fds; + int **pm_fds; /* Logging */ vlib_log_class_t log_class; diff --git a/src/plugins/perfmon/perfmon_periodic.c b/src/plugins/perfmon/perfmon_periodic.c index 4e7e2378320..ae20ac4c62f 100644 --- a/src/plugins/perfmon/perfmon_periodic.c +++ b/src/plugins/perfmon/perfmon_periodic.c @@ -31,22 +31,34 @@ perf_event_open (struct perf_event_attr *hw_event, pid_t pid, int cpu, return ret; } -static u64 -read_current_perf_counter (vlib_main_t * vm) +static void +read_current_perf_counters (vlib_main_t * vm, u64 * c0, u64 * c1) { - if (vm->perf_counter_id) - return clib_rdpmc (vm->perf_counter_id); - else + int i; + u64 *cc; + perfmon_main_t *pm = &perfmon_main; + uword my_thread_index = vm->thread_index; + + *c0 = *c1 = 0; + + for (i = 0; i < pm->n_active; i++) { - u64 sw_value; - if (read (vm->perf_counter_fd, &sw_value, sizeof (sw_value)) != - sizeof (sw_value)) + cc = (i == 0) ? c0 : c1; + if (pm->rdpmc_indices[i][my_thread_index] != ~0) + *cc = clib_rdpmc ((int) pm->rdpmc_indices[i][my_thread_index]); + else { - clib_unix_warning ("counter read failed, disable collection..."); - vm->vlib_node_runtime_perf_counter_cb = 0; - return 0ULL; + u64 sw_value; + if (read (pm->pm_fds[i][my_thread_index], &sw_value, + sizeof (sw_value)) != sizeof (sw_value)) + { + clib_unix_warning + ("counter read failed, disable collection..."); + vm->vlib_node_runtime_perf_counter_cb = 0; + return; + } + *cc = sw_value; } - return sw_value; } } @@ -80,9 +92,11 @@ clear_counters (perfmon_main_t * pm) for (i = 0; i < vec_len (nm->nodes); i++) { n = nm->nodes[i]; - n->stats_total.perf_counter_ticks = 0; + n->stats_total.perf_counter0_ticks = 0; + n->stats_total.perf_counter1_ticks = 0; n->stats_total.perf_counter_vectors = 0; - n->stats_last_clear.perf_counter_ticks = 0; + n->stats_last_clear.perf_counter0_ticks = 0; + n->stats_last_clear.perf_counter1_ticks = 0; n->stats_last_clear.perf_counter_vectors = 0; } } @@ -90,7 +104,7 @@ clear_counters (perfmon_main_t * pm) } static void -enable_current_event (perfmon_main_t * pm) +enable_current_events (perfmon_main_t * pm) { struct perf_event_attr pe; int fd; @@ -98,91 +112,108 @@ enable_current_event (perfmon_main_t * pm) perfmon_event_config_t *c; vlib_main_t *vm = vlib_get_main (); u32 my_thread_index = vm->thread_index; + u32 index; + int i, limit = 1; - c = vec_elt_at_index (pm->events_to_collect, pm->current_event); - - memset (&pe, 0, sizeof (struct perf_event_attr)); - pe.type = c->pe_type; - pe.size = sizeof (struct perf_event_attr); - pe.config = c->pe_config; - pe.disabled = 1; - pe.pinned = 1; - /* - * Note: excluding the kernel makes the - * (software) context-switch counter read 0... - */ - if (pe.type != PERF_TYPE_SOFTWARE) - { - /* Exclude kernel and hypervisor */ - pe.exclude_kernel = 1; - pe.exclude_hv = 1; - } + if ((pm->current_event + 1) < vec_len (pm->single_events_to_collect)) + limit = 2; - fd = perf_event_open (&pe, 0, -1, -1, 0); - if (fd == -1) + for (i = 0; i < limit; i++) { - clib_unix_warning ("event open: type %d config %d", c->pe_type, - c->pe_config); - return; - } + c = vec_elt_at_index (pm->single_events_to_collect, + pm->current_event + i); + + memset (&pe, 0, sizeof (struct perf_event_attr)); + pe.type = c->pe_type; + pe.size = sizeof (struct perf_event_attr); + pe.config = c->pe_config; + pe.disabled = 1; + pe.pinned = 1; + /* + * Note: excluding the kernel makes the + * (software) context-switch counter read 0... + */ + if (pe.type != PERF_TYPE_SOFTWARE) + { + /* Exclude kernel and hypervisor */ + pe.exclude_kernel = 1; + pe.exclude_hv = 1; + } - if (pe.type != PERF_TYPE_SOFTWARE) - { - p = mmap (0, pm->page_size, PROT_READ, MAP_SHARED, fd, 0); - if (p == MAP_FAILED) + fd = perf_event_open (&pe, 0, -1, -1, 0); + if (fd == -1) { - clib_unix_warning ("mmap"); - close (fd); + clib_unix_warning ("event open: type %d config %d", c->pe_type, + c->pe_config); return; } - } - if (ioctl (fd, PERF_EVENT_IOC_RESET, 0) < 0) - clib_unix_warning ("reset ioctl"); + if (pe.type != PERF_TYPE_SOFTWARE) + { + p = mmap (0, pm->page_size, PROT_READ, MAP_SHARED, fd, 0); + if (p == MAP_FAILED) + { + clib_unix_warning ("mmap"); + close (fd); + return; + } + } + else + p = 0; + + /* + * Software event counters - and others not capable of being + * read via the "rdpmc" instruction - will be read + * by system calls. + */ + if (pe.type == PERF_TYPE_SOFTWARE || p->cap_user_rdpmc == 0) + index = ~0; + else + index = p->index - 1; - if (ioctl (fd, PERF_EVENT_IOC_ENABLE, 0) < 0) - clib_unix_warning ("enable ioctl"); + if (ioctl (fd, PERF_EVENT_IOC_RESET, 0) < 0) + clib_unix_warning ("reset ioctl"); - /* - * Software event counters - and others not capable of being - * read via the "rdpmc" instruction - will be read - * by system calls. - */ - if (pe.type == PERF_TYPE_SOFTWARE || p->cap_user_rdpmc == 0) - pm->rdpmc_indices[my_thread_index] = 0; - else /* use rdpmc instrs */ - pm->rdpmc_indices[my_thread_index] = p->index - 1; - pm->perf_event_pages[my_thread_index] = (void *) p; + if (ioctl (fd, PERF_EVENT_IOC_ENABLE, 0) < 0) + clib_unix_warning ("enable ioctl"); - pm->pm_fds[my_thread_index] = fd; + pm->rdpmc_indices[i][my_thread_index] = index; + pm->perf_event_pages[i][my_thread_index] = (void *) p; + pm->pm_fds[i][my_thread_index] = fd; + } + pm->n_active = i; /* Enable the main loop counter snapshot mechanism */ - vm->perf_counter_id = pm->rdpmc_indices[my_thread_index]; - vm->perf_counter_fd = fd; - vm->vlib_node_runtime_perf_counter_cb = read_current_perf_counter; + vm->vlib_node_runtime_perf_counter_cb = read_current_perf_counters; } static void -disable_event (perfmon_main_t * pm) +disable_events (perfmon_main_t * pm) { vlib_main_t *vm = vlib_get_main (); u32 my_thread_index = vm->thread_index; - - if (pm->pm_fds[my_thread_index] == 0) - return; + int i; /* Stop main loop collection */ vm->vlib_node_runtime_perf_counter_cb = 0; - if (ioctl (pm->pm_fds[my_thread_index], PERF_EVENT_IOC_DISABLE, 0) < 0) - clib_unix_warning ("disable ioctl"); + for (i = 0; i < pm->n_active; i++) + { + if (pm->pm_fds[i][my_thread_index] == 0) + continue; + + if (ioctl (pm->pm_fds[i][my_thread_index], PERF_EVENT_IOC_DISABLE, 0) < + 0) + clib_unix_warning ("disable ioctl"); - if (pm->perf_event_pages[my_thread_index]) - if (munmap (pm->perf_event_pages[my_thread_index], pm->page_size) < 0) - clib_unix_warning ("munmap"); + if (pm->perf_event_pages[i][my_thread_index]) + if (munmap (pm->perf_event_pages[i][my_thread_index], + pm->page_size) < 0) + clib_unix_warning ("munmap"); - (void) close (pm->pm_fds[my_thread_index]); - pm->pm_fds[my_thread_index] = 0; + (void) close (pm->pm_fds[i][my_thread_index]); + pm->pm_fds[i][my_thread_index] = 0; + } } static void @@ -190,7 +221,7 @@ worker_thread_start_event (vlib_main_t * vm) { perfmon_main_t *pm = &perfmon_main; - enable_current_event (pm); + enable_current_events (pm); vm->worker_thread_main_loop_callback = 0; } @@ -198,7 +229,7 @@ static void worker_thread_stop_event (vlib_main_t * vm) { perfmon_main_t *pm = &perfmon_main; - disable_event (pm); + disable_events (pm); vm->worker_thread_main_loop_callback = 0; } @@ -207,7 +238,7 @@ start_event (perfmon_main_t * pm, f64 now, uword event_data) { int i; pm->current_event = 0; - if (vec_len (pm->events_to_collect) == 0) + if (vec_len (pm->single_events_to_collect) == 0) { pm->state = PERFMON_STATE_OFF; return; @@ -216,7 +247,7 @@ start_event (perfmon_main_t * pm, f64 now, uword event_data) clear_counters (pm); /* Start collection on this thread */ - enable_current_event (pm); + enable_current_events (pm); /* And also on worker threads */ for (i = 1; i < vec_len (vlib_mains); i++) @@ -231,7 +262,7 @@ start_event (perfmon_main_t * pm, f64 now, uword event_data) void scrape_and_clear_counters (perfmon_main_t * pm) { - int i, j; + int i, j, k; vlib_main_t *vm = pm->vlib_main; vlib_main_t *stat_vm; vlib_node_main_t *nm; @@ -242,7 +273,6 @@ scrape_and_clear_counters (perfmon_main_t * pm) perfmon_event_config_t *current_event; uword *p; u8 *counter_name; - u64 counter_value; u64 vectors_this_counter; /* snapshoot the nodes, including pm counters */ @@ -272,17 +302,17 @@ scrape_and_clear_counters (perfmon_main_t * pm) n = nm->nodes[i]; nodes[i] = clib_mem_alloc (sizeof (*n)); clib_memcpy_fast (nodes[i], n, sizeof (*n)); - n->stats_total.perf_counter_ticks = 0; + n->stats_total.perf_counter0_ticks = 0; + n->stats_total.perf_counter1_ticks = 0; n->stats_total.perf_counter_vectors = 0; - n->stats_last_clear.perf_counter_ticks = 0; + n->stats_last_clear.perf_counter0_ticks = 0; + n->stats_last_clear.perf_counter1_ticks = 0; n->stats_last_clear.perf_counter_vectors = 0; } } vlib_worker_thread_barrier_release (vm); - current_event = pm->events_to_collect + pm->current_event; - for (j = 0; j < vec_len (vlib_mains); j++) { stat_vm = vlib_mains[j]; @@ -296,38 +326,69 @@ scrape_and_clear_counters (perfmon_main_t * pm) u8 *capture_name; n = nodes[i]; - if (n->stats_total.perf_counter_ticks == 0) - { - clib_mem_free (n); - continue; - } - - capture_name = format (0, "t%d-%v%c", j, n->name, 0); - p = hash_get_mem (pm->capture_by_thread_and_node_name, - capture_name); + if (n->stats_total.perf_counter0_ticks == 0 && + n->stats_total.perf_counter1_ticks == 0) + goto skip_this_node; - if (p == 0) + for (k = 0; k < 2; k++) { - pool_get (pm->capture_pool, c); - memset (c, 0, sizeof (*c)); - c->thread_and_node_name = capture_name; - hash_set_mem (pm->capture_by_thread_and_node_name, - capture_name, c - pm->capture_pool); + u64 counter_value, counter_last_clear; + + /* + * We collect 2 counters at once, except for the + * last counter when the user asks for an odd number of + * counters + */ + if ((pm->current_event + k) + >= vec_len (pm->single_events_to_collect)) + break; + + if (k == 0) + { + counter_value = n->stats_total.perf_counter0_ticks; + counter_last_clear = + n->stats_last_clear.perf_counter0_ticks; + } + else + { + counter_value = n->stats_total.perf_counter1_ticks; + counter_last_clear = + n->stats_last_clear.perf_counter1_ticks; + } + + capture_name = format (0, "t%d-%v%c", j, n->name, 0); + + p = hash_get_mem (pm->capture_by_thread_and_node_name, + capture_name); + + if (p == 0) + { + pool_get (pm->capture_pool, c); + memset (c, 0, sizeof (*c)); + c->thread_and_node_name = capture_name; + hash_set_mem (pm->capture_by_thread_and_node_name, + capture_name, c - pm->capture_pool); + } + else + { + c = pool_elt_at_index (pm->capture_pool, p[0]); + vec_free (capture_name); + } + + /* Snapshoot counters, etc. into the capture */ + current_event = pm->single_events_to_collect + + pm->current_event + k; + counter_name = (u8 *) current_event->name; + vectors_this_counter = n->stats_total.perf_counter_vectors - + n->stats_last_clear.perf_counter_vectors; + + vec_add1 (c->counter_names, counter_name); + vec_add1 (c->counter_values, + counter_value - counter_last_clear); + vec_add1 (c->vectors_this_counter, vectors_this_counter); } - else - c = pool_elt_at_index (pm->capture_pool, p[0]); - - /* Snapshoot counters, etc. into the capture */ - counter_name = (u8 *) current_event->name; - counter_value = n->stats_total.perf_counter_ticks - - n->stats_last_clear.perf_counter_ticks; - vectors_this_counter = n->stats_total.perf_counter_vectors - - n->stats_last_clear.perf_counter_vectors; - - vec_add1 (c->counter_names, counter_name); - vec_add1 (c->counter_values, counter_value); - vec_add1 (c->vectors_this_counter, vectors_this_counter); + skip_this_node: clib_mem_free (n); } vec_free (nodes); @@ -339,7 +400,7 @@ static void handle_timeout (perfmon_main_t * pm, f64 now) { int i; - disable_event (pm); + disable_events (pm); /* And also on worker threads */ for (i = 1; i < vec_len (vlib_mains); i++) @@ -354,14 +415,14 @@ handle_timeout (perfmon_main_t * pm, f64 now) if (i > 1) vlib_process_suspend (pm->vlib_main, 1e-3); scrape_and_clear_counters (pm); - pm->current_event++; - if (pm->current_event >= vec_len (pm->events_to_collect)) + pm->current_event += pm->n_active; + if (pm->current_event >= vec_len (pm->single_events_to_collect)) { pm->current_event = 0; pm->state = PERFMON_STATE_OFF; return; } - enable_current_event (pm); + enable_current_events (pm); /* And also on worker threads */ for (i = 1; i < vec_len (vlib_mains); i++) diff --git a/src/vlib/main.c b/src/vlib/main.c index 23c4e076e1f..0e480fabe2a 100644 --- a/src/vlib/main.c +++ b/src/vlib/main.c @@ -543,15 +543,17 @@ never_inline void vlib_node_runtime_sync_stats (vlib_main_t * vm, vlib_node_runtime_t * r, uword n_calls, uword n_vectors, uword n_clocks, - uword n_ticks) + uword n_ticks0, uword n_ticks1) { vlib_node_t *n = vlib_get_node (vm, r->node_index); n->stats_total.calls += n_calls + r->calls_since_last_overflow; n->stats_total.vectors += n_vectors + r->vectors_since_last_overflow; n->stats_total.clocks += n_clocks + r->clocks_since_last_overflow; - n->stats_total.perf_counter_ticks += n_ticks + - r->perf_counter_ticks_since_last_overflow; + n->stats_total.perf_counter0_ticks += n_ticks0 + + r->perf_counter0_ticks_since_last_overflow; + n->stats_total.perf_counter1_ticks += n_ticks1 + + r->perf_counter1_ticks_since_last_overflow; n->stats_total.perf_counter_vectors += n_vectors + r->perf_counter_vectors_since_last_overflow; n->stats_total.max_clock = r->max_clock; @@ -560,7 +562,8 @@ vlib_node_runtime_sync_stats (vlib_main_t * vm, r->calls_since_last_overflow = 0; r->vectors_since_last_overflow = 0; r->clocks_since_last_overflow = 0; - r->perf_counter_ticks_since_last_overflow = 0ULL; + r->perf_counter0_ticks_since_last_overflow = 0ULL; + r->perf_counter1_ticks_since_last_overflow = 0ULL; r->perf_counter_vectors_since_last_overflow = 0ULL; } @@ -568,12 +571,12 @@ always_inline void __attribute__ ((unused)) vlib_process_sync_stats (vlib_main_t * vm, vlib_process_t * p, uword n_calls, uword n_vectors, uword n_clocks, - uword n_ticks) + uword n_ticks0, uword n_ticks1) { vlib_node_runtime_t *rt = &p->node_runtime; vlib_node_t *n = vlib_get_node (vm, rt->node_index); vlib_node_runtime_sync_stats (vm, rt, n_calls, n_vectors, n_clocks, - n_ticks); + n_ticks0, n_ticks1); n->stats_total.suspends += p->n_suspends; p->n_suspends = 0; } @@ -599,7 +602,7 @@ vlib_node_sync_stats (vlib_main_t * vm, vlib_node_t * n) vec_elt_at_index (vm->node_main.nodes_by_type[n->type], n->runtime_index); - vlib_node_runtime_sync_stats (vm, rt, 0, 0, 0, 0); + vlib_node_runtime_sync_stats (vm, rt, 0, 0, 0, 0, 0); /* Sync up runtime next frame vector counters with main node structure. */ { @@ -620,27 +623,30 @@ vlib_node_runtime_update_stats (vlib_main_t * vm, vlib_node_runtime_t * node, uword n_calls, uword n_vectors, uword n_clocks, - uword n_ticks) + uword n_ticks0, uword n_ticks1) { u32 ca0, ca1, v0, v1, cl0, cl1, r; - u32 ptick0, ptick1, pvec0, pvec1; + u32 ptick00, ptick01, ptick10, ptick11, pvec0, pvec1; cl0 = cl1 = node->clocks_since_last_overflow; ca0 = ca1 = node->calls_since_last_overflow; v0 = v1 = node->vectors_since_last_overflow; - ptick0 = ptick1 = node->perf_counter_ticks_since_last_overflow; + ptick00 = ptick01 = node->perf_counter0_ticks_since_last_overflow; + ptick10 = ptick11 = node->perf_counter1_ticks_since_last_overflow; pvec0 = pvec1 = node->perf_counter_vectors_since_last_overflow; ca1 = ca0 + n_calls; v1 = v0 + n_vectors; cl1 = cl0 + n_clocks; - ptick1 = ptick0 + n_ticks; + ptick01 = ptick00 + n_ticks0; + ptick11 = ptick10 + n_ticks1; pvec1 = pvec0 + n_vectors; node->calls_since_last_overflow = ca1; node->clocks_since_last_overflow = cl1; node->vectors_since_last_overflow = v1; - node->perf_counter_ticks_since_last_overflow = ptick1; + node->perf_counter0_ticks_since_last_overflow = ptick01; + node->perf_counter1_ticks_since_last_overflow = ptick11; node->perf_counter_vectors_since_last_overflow = pvec1; node->max_clock_n = node->max_clock > n_clocks ? @@ -649,38 +655,39 @@ vlib_node_runtime_update_stats (vlib_main_t * vm, r = vlib_node_runtime_update_main_loop_vector_stats (vm, node, n_vectors); - if (PREDICT_FALSE (ca1 < ca0 || v1 < v0 || cl1 < cl0) || (ptick1 < ptick0) - || (pvec1 < pvec0)) + if (PREDICT_FALSE (ca1 < ca0 || v1 < v0 || cl1 < cl0) || (ptick01 < ptick00) + || (ptick11 < ptick10) || (pvec1 < pvec0)) { node->calls_since_last_overflow = ca0; node->clocks_since_last_overflow = cl0; node->vectors_since_last_overflow = v0; - node->perf_counter_ticks_since_last_overflow = ptick0; + node->perf_counter0_ticks_since_last_overflow = ptick00; + node->perf_counter1_ticks_since_last_overflow = ptick10; node->perf_counter_vectors_since_last_overflow = pvec0; vlib_node_runtime_sync_stats (vm, node, n_calls, n_vectors, n_clocks, - n_ticks); + n_ticks0, n_ticks1); } return r; } -static inline u64 -vlib_node_runtime_perf_counter (vlib_main_t * vm) +static inline void +vlib_node_runtime_perf_counter (vlib_main_t * vm, u64 * pmc0, u64 * pmc1) { + *pmc0 = 0; + *pmc1 = 0; if (PREDICT_FALSE (vm->vlib_node_runtime_perf_counter_cb != 0)) - return ((*vm->vlib_node_runtime_perf_counter_cb) (vm)); - return 0ULL; + (*vm->vlib_node_runtime_perf_counter_cb) (vm, pmc0, pmc1); } always_inline void vlib_process_update_stats (vlib_main_t * vm, vlib_process_t * p, - uword n_calls, uword n_vectors, uword n_clocks, - uword n_ticks) + uword n_calls, uword n_vectors, uword n_clocks) { vlib_node_runtime_update_stats (vm, &p->node_runtime, - n_calls, n_vectors, n_clocks, n_ticks); + n_calls, n_vectors, n_clocks, 0ULL, 0ULL); } static clib_error_t * @@ -1098,6 +1105,8 @@ dispatch_pcap_trace (vlib_main_t * vm, } } +u64 oingo0, oingo1; + static_always_inline u64 dispatch_node (vlib_main_t * vm, vlib_node_runtime_t * node, @@ -1146,18 +1155,14 @@ dispatch_node (vlib_main_t * vm, if (1 /* || vm->thread_index == node->thread_index */ ) { - u64 pmc_before, pmc_delta; + u64 pmc_before[2], pmc_after[2], pmc_delta[2]; vlib_elog_main_loop_event (vm, node->node_index, last_time_stamp, frame ? frame->n_vectors : 0, /* is_after */ 0); - /* - * To validate accounting: pmc_before = last_time_stamp - * perf ticks should equal clocks/pkt... - */ - pmc_before = vlib_node_runtime_perf_counter (vm); + vlib_node_runtime_perf_counter (vm, &pmc_before[0], &pmc_before[1]); /* * Turn this on if you run into @@ -1191,7 +1196,10 @@ dispatch_node (vlib_main_t * vm, * To validate accounting: pmc_delta = t - pmc_before; * perf ticks should equal clocks/pkt... */ - pmc_delta = vlib_node_runtime_perf_counter (vm) - pmc_before; + vlib_node_runtime_perf_counter (vm, &pmc_after[0], &pmc_after[1]); + + pmc_delta[0] = pmc_after[0] - pmc_before[0]; + pmc_delta[1] = pmc_after[1] - pmc_before[1]; vlib_elog_main_loop_event (vm, node->node_index, t, n, /* is_after */ 1); @@ -1199,11 +1207,18 @@ dispatch_node (vlib_main_t * vm, vm->main_loop_vectors_processed += n; vm->main_loop_nodes_processed += n > 0; + if (pmc_delta[0] || pmc_delta[1]) + { + oingo0 += pmc_delta[0]; + oingo1 += pmc_delta[1]; + } + v = vlib_node_runtime_update_stats (vm, node, /* n_calls */ 1, /* n_vectors */ n, /* n_clocks */ t - last_time_stamp, - pmc_delta /* PMC ticks */ ); + pmc_delta[0] /* PMC0 */ , + pmc_delta[1] /* PMC1 */ ); /* When in interrupt mode and vector rate crosses threshold switch to polling mode. */ @@ -1542,8 +1557,7 @@ dispatch_process (vlib_main_t * vm, vlib_process_update_stats (vm, p, /* n_calls */ !is_suspend, /* n_vectors */ n_vectors, - /* n_clocks */ t - last_time_stamp, - /* pmc_ticks */ 0ULL); + /* n_clocks */ t - last_time_stamp); return t; } @@ -1626,8 +1640,7 @@ dispatch_suspended_process (vlib_main_t * vm, vlib_process_update_stats (vm, p, /* n_calls */ !is_suspend, /* n_vectors */ n_vectors, - /* n_clocks */ t - last_time_stamp, - /* pmc_ticks */ 0ULL); + /* n_clocks */ t - last_time_stamp); return t; } @@ -1677,9 +1690,6 @@ vlib_main_or_worker_loop (vlib_main_t * vm, int is_main) if (!nm->interrupt_threshold_vector_length) nm->interrupt_threshold_vector_length = 5; - /* Make sure the performance monitor counter is disabled */ - vm->perf_counter_id = ~0; - /* Start all processes. */ if (is_main) { diff --git a/src/vlib/main.h b/src/vlib/main.h index 91661fdaaad..4c6d0f490dd 100644 --- a/src/vlib/main.h +++ b/src/vlib/main.h @@ -84,9 +84,8 @@ typedef struct vlib_main_t u32 node_counts_per_main_loop[2]; /* Main loop hw / sw performance counters */ - u64 (*vlib_node_runtime_perf_counter_cb) (struct vlib_main_t *); - int perf_counter_id; - int perf_counter_fd; + void (*vlib_node_runtime_perf_counter_cb) (struct vlib_main_t *, + u64 *, u64 *); /* Every so often we switch to the next counter. */ #define VLIB_LOG2_MAIN_LOOPS_PER_STATS_UPDATE 7 diff --git a/src/vlib/node.h b/src/vlib/node.h index f41eb60aa2c..8bb89f44bc7 100644 --- a/src/vlib/node.h +++ b/src/vlib/node.h @@ -258,7 +258,8 @@ typedef struct u64 calls, vectors, clocks, suspends; u64 max_clock; u64 max_clock_n; - u64 perf_counter_ticks; + u64 perf_counter0_ticks; + u64 perf_counter1_ticks; u64 perf_counter_vectors; } vlib_node_stats_t; @@ -507,7 +508,8 @@ typedef struct vlib_node_runtime_t u32 vectors_since_last_overflow; /**< Number of vector elements processed by this node. */ - u32 perf_counter_ticks_since_last_overflow; /**< Perf counter ticks */ + u32 perf_counter0_ticks_since_last_overflow; /**< Perf counter 0 ticks */ + u32 perf_counter1_ticks_since_last_overflow; /**< Perf counter 1 ticks */ u32 perf_counter_vectors_since_last_overflow; /**< Perf counter vectors */ u32 next_frame_index; /**< Start of next frames for this diff --git a/src/vlib/node_cli.c b/src/vlib/node_cli.c index 062854af5bc..ad17c1d6f4d 100644 --- a/src/vlib/node_cli.c +++ b/src/vlib/node_cli.c @@ -148,8 +148,6 @@ format_vlib_node_stats (u8 * s, va_list * va) f64 maxc, maxcn; u32 maxn; u32 indent; - u64 pmc_ticks; - f64 pmc_ticks_per_packet; if (!n) { @@ -163,9 +161,6 @@ format_vlib_node_stats (u8 * s, va_list * va) "%=30s%=12s%=16s%=16s%=16s%=16s%=16s", "Name", "State", "Calls", "Vectors", "Suspends", "Clocks", "Vectors/Call"); - if (vm->perf_counter_id) - s = format (s, "%=16s", "Perf Ticks"); - return s; } @@ -182,13 +177,6 @@ format_vlib_node_stats (u8 * s, va_list * va) else maxcn = 0.0; - pmc_ticks = n->stats_total.perf_counter_ticks - - n->stats_last_clear.perf_counter_ticks; - if (p > 0) - pmc_ticks_per_packet = (f64) pmc_ticks / (f64) p; - else - pmc_ticks_per_packet = 0.0; - /* Clocks per packet, per call or per suspend. */ x = 0; if (p > 0) @@ -221,9 +209,6 @@ format_vlib_node_stats (u8 * s, va_list * va) s = format (s, "%-30v%=12U%16Ld%16Ld%16Ld%16.2e%16.2f", ns, format_vlib_node_state, vm, n, c, p, d, x, v); - if (pmc_ticks_per_packet > 0.0) - s = format (s, "%16.2e", pmc_ticks_per_packet); - if (ns != n->name) vec_free (ns); diff --git a/src/vlibapi/node_serialize.c b/src/vlibapi/node_serialize.c index 0774eea251e..b50d79e2922 100644 --- a/src/vlibapi/node_serialize.c +++ b/src/vlibapi/node_serialize.c @@ -57,7 +57,7 @@ vlib_node_serialize (vlib_main_t * vm, vlib_node_t *** node_dups, u8 * vector, u8 *namep; u32 name_bytes; uword i, j, k; - u64 l, v, c, d, pmc; + u64 l, v, c, d; state_string_enum_t state_code; serialize_open_vector (sm, vector); @@ -77,8 +77,6 @@ vlib_node_serialize (vlib_main_t * vm, vlib_node_t *** node_dups, u8 * vector, v = n->stats_total.vectors - n->stats_last_clear.vectors; c = n->stats_total.calls - n->stats_last_clear.calls; d = n->stats_total.suspends - n->stats_last_clear.suspends; - pmc = n->stats_total.perf_counter_ticks - - n->stats_last_clear.perf_counter_ticks; state_code = STATE_INTERNAL; @@ -151,8 +149,6 @@ vlib_node_serialize (vlib_main_t * vm, vlib_node_t *** node_dups, u8 * vector, serialize_integer (sm, v, 8); /* Total suspends */ serialize_integer (sm, d, 8); - /* PMC counter */ - serialize_integer (sm, pmc, 8); } else /* no stats */ serialize_likely_small_unsigned_integer (sm, 0); @@ -171,7 +167,7 @@ vlib_node_unserialize (u8 * vector) vlib_node_t **nodes; vlib_node_t ***nodes_by_thread = 0; int i, j, k; - u64 l, v, c, d, pmc; + u64 l, v, c, d; state_string_enum_t state_code; int stats_present; @@ -229,9 +225,6 @@ vlib_node_unserialize (u8 * vector) /* Total suspends */ unserialize_integer (sm, &d, 8); node->stats_total.suspends = d; - /* PMC counter */ - unserialize_integer (sm, &pmc, 8); - node->stats_total.perf_counter_ticks = pmc; } } } |