From 4d1a866aff6ceb03025990b6e60b42faf09ef486 Mon Sep 17 00:00:00 2001 From: Dave Barach Date: Mon, 10 Sep 2018 12:31:15 -0400 Subject: X86_64 perf counter plugin Change-Id: Ie5a00c15ee9536cc61afab57f6cadc1aa1972f3c Signed-off-by: Dave Barach --- src/vlib/main.c | 83 ++++++++++++++++++++++++++++++++++++++++++----------- src/vlib/main.h | 9 ++++++ src/vlib/node.h | 5 ++++ src/vlib/node_cli.c | 32 +++++++++++++++------ 4 files changed, 105 insertions(+), 24 deletions(-) (limited to 'src/vlib') diff --git a/src/vlib/main.c b/src/vlib/main.c index 62599437f35..273ae5256a5 100644 --- a/src/vlib/main.c +++ b/src/vlib/main.c @@ -540,29 +540,38 @@ vlib_put_next_frame (vlib_main_t * vm, never_inline void vlib_node_runtime_sync_stats (vlib_main_t * vm, vlib_node_runtime_t * r, - uword n_calls, uword n_vectors, uword n_clocks) + uword n_calls, uword n_vectors, uword n_clocks, + uword n_ticks) { vlib_node_t *n = vlib_get_node (vm, r->node_index); n->stats_total.calls += n_calls + r->calls_since_last_overflow; n->stats_total.vectors += n_vectors + r->vectors_since_last_overflow; n->stats_total.clocks += n_clocks + r->clocks_since_last_overflow; + n->stats_total.perf_counter_ticks += n_ticks + + r->perf_counter_ticks_since_last_overflow; + n->stats_total.perf_counter_vectors += n_vectors + + r->perf_counter_vectors_since_last_overflow; n->stats_total.max_clock = r->max_clock; n->stats_total.max_clock_n = r->max_clock_n; r->calls_since_last_overflow = 0; r->vectors_since_last_overflow = 0; r->clocks_since_last_overflow = 0; + r->perf_counter_ticks_since_last_overflow = 0ULL; + r->perf_counter_vectors_since_last_overflow = 0ULL; } always_inline void __attribute__ ((unused)) vlib_process_sync_stats (vlib_main_t * vm, vlib_process_t * p, - uword n_calls, uword n_vectors, uword n_clocks) + uword n_calls, uword n_vectors, uword n_clocks, + uword n_ticks) { vlib_node_runtime_t *rt = &p->node_runtime; vlib_node_t *n = vlib_get_node (vm, rt->node_index); - vlib_node_runtime_sync_stats (vm, rt, n_calls, n_vectors, n_clocks); + vlib_node_runtime_sync_stats (vm, rt, n_calls, n_vectors, n_clocks, + n_ticks); n->stats_total.suspends += p->n_suspends; p->n_suspends = 0; } @@ -588,7 +597,7 @@ vlib_node_sync_stats (vlib_main_t * vm, vlib_node_t * n) vec_elt_at_index (vm->node_main.nodes_by_type[n->type], n->runtime_index); - vlib_node_runtime_sync_stats (vm, rt, 0, 0, 0); + vlib_node_runtime_sync_stats (vm, rt, 0, 0, 0, 0); /* Sync up runtime next frame vector counters with main node structure. */ { @@ -608,45 +617,68 @@ always_inline u32 vlib_node_runtime_update_stats (vlib_main_t * vm, vlib_node_runtime_t * node, uword n_calls, - uword n_vectors, uword n_clocks) + uword n_vectors, uword n_clocks, + uword n_ticks) { u32 ca0, ca1, v0, v1, cl0, cl1, r; + u32 ptick0, ptick1, pvec0, pvec1; cl0 = cl1 = node->clocks_since_last_overflow; ca0 = ca1 = node->calls_since_last_overflow; v0 = v1 = node->vectors_since_last_overflow; + ptick0 = ptick1 = node->perf_counter_ticks_since_last_overflow; + pvec0 = pvec1 = node->perf_counter_vectors_since_last_overflow; ca1 = ca0 + n_calls; v1 = v0 + n_vectors; cl1 = cl0 + n_clocks; + ptick1 = ptick0 + n_ticks; + pvec1 = pvec0 + n_vectors; node->calls_since_last_overflow = ca1; node->clocks_since_last_overflow = cl1; node->vectors_since_last_overflow = v1; + node->perf_counter_ticks_since_last_overflow = ptick1; + node->perf_counter_vectors_since_last_overflow = pvec1; + node->max_clock_n = node->max_clock > n_clocks ? node->max_clock_n : n_vectors; node->max_clock = node->max_clock > n_clocks ? node->max_clock : n_clocks; r = vlib_node_runtime_update_main_loop_vector_stats (vm, node, n_vectors); - if (PREDICT_FALSE (ca1 < ca0 || v1 < v0 || cl1 < cl0)) + if (PREDICT_FALSE (ca1 < ca0 || v1 < v0 || cl1 < cl0) || (ptick1 < ptick0) + || (pvec1 < pvec0)) { node->calls_since_last_overflow = ca0; node->clocks_since_last_overflow = cl0; node->vectors_since_last_overflow = v0; - vlib_node_runtime_sync_stats (vm, node, n_calls, n_vectors, n_clocks); + node->perf_counter_ticks_since_last_overflow = ptick0; + node->perf_counter_vectors_since_last_overflow = pvec0; + + vlib_node_runtime_sync_stats (vm, node, n_calls, n_vectors, n_clocks, + n_ticks); } return r; } +static inline u64 +vlib_node_runtime_perf_counter (vlib_main_t * vm) +{ + if (PREDICT_FALSE (vm->vlib_node_runtime_perf_counter_cb != 0)) + return ((*vm->vlib_node_runtime_perf_counter_cb) (vm)); + return 0ULL; +} + always_inline void vlib_process_update_stats (vlib_main_t * vm, vlib_process_t * p, - uword n_calls, uword n_vectors, uword n_clocks) + uword n_calls, uword n_vectors, uword n_clocks, + uword n_ticks) { vlib_node_runtime_update_stats (vm, &p->node_runtime, - n_calls, n_vectors, n_clocks); + n_calls, n_vectors, n_clocks, n_ticks); } static clib_error_t * @@ -959,15 +991,19 @@ dispatch_node (vlib_main_t * vm, if (1 /* || vm->thread_index == node->thread_index */ ) { - vlib_main_t *stat_vm; - - stat_vm = /* vlib_mains ? vlib_mains[0] : */ vm; + u64 pmc_before, pmc_delta; vlib_elog_main_loop_event (vm, node->node_index, last_time_stamp, frame ? frame->n_vectors : 0, /* is_after */ 0); + /* + * To validate accounting: pmc_before = last_time_stamp + * perf ticks should equal clocks/pkt... + */ + pmc_before = vlib_node_runtime_perf_counter (vm); + /* * Turn this on if you run into * "bad monkey" contexts, and you want to know exactly @@ -990,16 +1026,23 @@ dispatch_node (vlib_main_t * vm, t = clib_cpu_time_now (); + /* + * To validate accounting: pmc_delta = t - pmc_before; + * perf ticks should equal clocks/pkt... + */ + pmc_delta = vlib_node_runtime_perf_counter (vm) - pmc_before; + vlib_elog_main_loop_event (vm, node->node_index, t, n, /* is_after */ 1); vm->main_loop_vectors_processed += n; vm->main_loop_nodes_processed += n > 0; - v = vlib_node_runtime_update_stats (stat_vm, node, + v = vlib_node_runtime_update_stats (vm, node, /* n_calls */ 1, /* n_vectors */ n, - /* n_clocks */ t - last_time_stamp); + /* n_clocks */ t - last_time_stamp, + pmc_delta /* PMC ticks */ ); /* When in interrupt mode and vector rate crosses threshold switch to polling mode. */ @@ -1338,7 +1381,8 @@ dispatch_process (vlib_main_t * vm, vlib_process_update_stats (vm, p, /* n_calls */ !is_suspend, /* n_vectors */ n_vectors, - /* n_clocks */ t - last_time_stamp); + /* n_clocks */ t - last_time_stamp, + /* pmc_ticks */ 0ULL); return t; } @@ -1421,7 +1465,8 @@ dispatch_suspended_process (vlib_main_t * vm, vlib_process_update_stats (vm, p, /* n_calls */ !is_suspend, /* n_vectors */ n_vectors, - /* n_clocks */ t - last_time_stamp); + /* n_clocks */ t - last_time_stamp, + /* pmc_ticks */ 0ULL); return t; } @@ -1471,6 +1516,9 @@ vlib_main_or_worker_loop (vlib_main_t * vm, int is_main) if (!nm->interrupt_threshold_vector_length) nm->interrupt_threshold_vector_length = 5; + /* Make sure the performance monitor counter is disabled */ + vm->perf_counter_id = ~0; + /* Start all processes. */ if (is_main) { @@ -1493,6 +1541,9 @@ vlib_main_or_worker_loop (vlib_main_t * vm, int is_main) vlib_worker_thread_barrier_check (); vec_foreach (fqm, tm->frame_queue_mains) vlib_frame_queue_dequeue (vm, fqm); + if (PREDICT_FALSE (vm->worker_thread_main_loop_callback != 0)) + ((void (*)(vlib_main_t *)) vm->worker_thread_main_loop_callback) + (vm); } /* Process pre-input nodes. */ diff --git a/src/vlib/main.h b/src/vlib/main.h index ce42b6ea442..ddc14df5360 100644 --- a/src/vlib/main.h +++ b/src/vlib/main.h @@ -46,6 +46,7 @@ #include #include #include +#include #include @@ -81,6 +82,11 @@ typedef struct vlib_main_t u32 vector_counts_per_main_loop[2]; u32 node_counts_per_main_loop[2]; + /* Main loop hw / sw performance counters */ + u64 (*vlib_node_runtime_perf_counter_cb) (struct vlib_main_t *); + int perf_counter_id; + int perf_counter_fd; + /* Every so often we switch to the next counter. */ #define VLIB_LOG2_MAIN_LOOPS_PER_STATS_UPDATE 7 @@ -192,6 +198,9 @@ typedef struct vlib_main_t void (*queue_signal_callback) (struct vlib_main_t *); u8 **argv; + /* Top of (worker) dispatch loop callback */ + volatile void (*worker_thread_main_loop_callback) (struct vlib_main_t *); + /* debugging */ volatile int parked_at_barrier; diff --git a/src/vlib/node.h b/src/vlib/node.h index 6efb6f3e4fe..fd245d59def 100644 --- a/src/vlib/node.h +++ b/src/vlib/node.h @@ -244,6 +244,8 @@ typedef struct u64 calls, vectors, clocks, suspends; u64 max_clock; u64 max_clock_n; + u64 perf_counter_ticks; + u64 perf_counter_vectors; } vlib_node_stats_t; #define foreach_vlib_node_state \ @@ -488,6 +490,9 @@ typedef struct vlib_node_runtime_t u32 vectors_since_last_overflow; /**< Number of vector elements processed by this node. */ + u32 perf_counter_ticks_since_last_overflow; /**< Perf counter ticks */ + u32 perf_counter_vectors_since_last_overflow; /**< Perf counter vectors */ + u32 next_frame_index; /**< Start of next frames for this node. */ diff --git a/src/vlib/node_cli.c b/src/vlib/node_cli.c index 2523b41c404..062854af5bc 100644 --- a/src/vlib/node_cli.c +++ b/src/vlib/node_cli.c @@ -148,19 +148,25 @@ format_vlib_node_stats (u8 * s, va_list * va) f64 maxc, maxcn; u32 maxn; u32 indent; + u64 pmc_ticks; + f64 pmc_ticks_per_packet; if (!n) { if (max) - return format (s, - "%=30s%=17s%=16s%=16s%=16s%=16s", - "Name", "Max Node Clocks", "Vectors at Max", - "Max Clocks", "Avg Clocks", "Avg Vectors/Call"); + s = format (s, + "%=30s%=17s%=16s%=16s%=16s%=16s", + "Name", "Max Node Clocks", "Vectors at Max", + "Max Clocks", "Avg Clocks", "Avg Vectors/Call"); else - return format (s, - "%=30s%=12s%=16s%=16s%=16s%=16s%=16s", - "Name", "State", "Calls", "Vectors", "Suspends", - "Clocks", "Vectors/Call"); + s = format (s, + "%=30s%=12s%=16s%=16s%=16s%=16s%=16s", + "Name", "State", "Calls", "Vectors", "Suspends", + "Clocks", "Vectors/Call"); + if (vm->perf_counter_id) + s = format (s, "%=16s", "Perf Ticks"); + + return s; } indent = format_get_indent (s); @@ -176,6 +182,13 @@ format_vlib_node_stats (u8 * s, va_list * va) else maxcn = 0.0; + pmc_ticks = n->stats_total.perf_counter_ticks - + n->stats_last_clear.perf_counter_ticks; + if (p > 0) + pmc_ticks_per_packet = (f64) pmc_ticks / (f64) p; + else + pmc_ticks_per_packet = 0.0; + /* Clocks per packet, per call or per suspend. */ x = 0; if (p > 0) @@ -208,6 +221,9 @@ format_vlib_node_stats (u8 * s, va_list * va) s = format (s, "%-30v%=12U%16Ld%16Ld%16Ld%16.2e%16.2f", ns, format_vlib_node_state, vm, n, c, p, d, x, v); + if (pmc_ticks_per_packet > 0.0) + s = format (s, "%16.2e", pmc_ticks_per_packet); + if (ns != n->name) vec_free (ns); -- cgit 1.2.3-korg