summaryrefslogtreecommitdiffstats
path: root/src/vlib
diff options
context:
space:
mode:
authorDave Barach <dave@barachs.net>2018-09-10 12:31:15 -0400
committerDamjan Marion <dmarion@me.com>2018-10-22 12:02:04 +0000
commit4d1a866aff6ceb03025990b6e60b42faf09ef486 (patch)
treebec495932876d9649f26179b4c24b6938be43f38 /src/vlib
parent115a3ac59a16f9dcfee92eaecc79cd1fa3320e29 (diff)
X86_64 perf counter plugin
Change-Id: Ie5a00c15ee9536cc61afab57f6cadc1aa1972f3c Signed-off-by: Dave Barach <dave@barachs.net>
Diffstat (limited to 'src/vlib')
-rw-r--r--src/vlib/main.c83
-rw-r--r--src/vlib/main.h9
-rw-r--r--src/vlib/node.h5
-rw-r--r--src/vlib/node_cli.c32
4 files changed, 105 insertions, 24 deletions
diff --git a/src/vlib/main.c b/src/vlib/main.c
index 62599437f35..273ae5256a5 100644
--- a/src/vlib/main.c
+++ b/src/vlib/main.c
@@ -540,29 +540,38 @@ vlib_put_next_frame (vlib_main_t * vm,
never_inline void
vlib_node_runtime_sync_stats (vlib_main_t * vm,
vlib_node_runtime_t * r,
- uword n_calls, uword n_vectors, uword n_clocks)
+ uword n_calls, uword n_vectors, uword n_clocks,
+ uword n_ticks)
{
vlib_node_t *n = vlib_get_node (vm, r->node_index);
n->stats_total.calls += n_calls + r->calls_since_last_overflow;
n->stats_total.vectors += n_vectors + r->vectors_since_last_overflow;
n->stats_total.clocks += n_clocks + r->clocks_since_last_overflow;
+ n->stats_total.perf_counter_ticks += n_ticks +
+ r->perf_counter_ticks_since_last_overflow;
+ n->stats_total.perf_counter_vectors += n_vectors +
+ r->perf_counter_vectors_since_last_overflow;
n->stats_total.max_clock = r->max_clock;
n->stats_total.max_clock_n = r->max_clock_n;
r->calls_since_last_overflow = 0;
r->vectors_since_last_overflow = 0;
r->clocks_since_last_overflow = 0;
+ r->perf_counter_ticks_since_last_overflow = 0ULL;
+ r->perf_counter_vectors_since_last_overflow = 0ULL;
}
always_inline void __attribute__ ((unused))
vlib_process_sync_stats (vlib_main_t * vm,
vlib_process_t * p,
- uword n_calls, uword n_vectors, uword n_clocks)
+ uword n_calls, uword n_vectors, uword n_clocks,
+ uword n_ticks)
{
vlib_node_runtime_t *rt = &p->node_runtime;
vlib_node_t *n = vlib_get_node (vm, rt->node_index);
- vlib_node_runtime_sync_stats (vm, rt, n_calls, n_vectors, n_clocks);
+ vlib_node_runtime_sync_stats (vm, rt, n_calls, n_vectors, n_clocks,
+ n_ticks);
n->stats_total.suspends += p->n_suspends;
p->n_suspends = 0;
}
@@ -588,7 +597,7 @@ vlib_node_sync_stats (vlib_main_t * vm, vlib_node_t * n)
vec_elt_at_index (vm->node_main.nodes_by_type[n->type],
n->runtime_index);
- vlib_node_runtime_sync_stats (vm, rt, 0, 0, 0);
+ vlib_node_runtime_sync_stats (vm, rt, 0, 0, 0, 0);
/* Sync up runtime next frame vector counters with main node structure. */
{
@@ -608,45 +617,68 @@ always_inline u32
vlib_node_runtime_update_stats (vlib_main_t * vm,
vlib_node_runtime_t * node,
uword n_calls,
- uword n_vectors, uword n_clocks)
+ uword n_vectors, uword n_clocks,
+ uword n_ticks)
{
u32 ca0, ca1, v0, v1, cl0, cl1, r;
+ u32 ptick0, ptick1, pvec0, pvec1;
cl0 = cl1 = node->clocks_since_last_overflow;
ca0 = ca1 = node->calls_since_last_overflow;
v0 = v1 = node->vectors_since_last_overflow;
+ ptick0 = ptick1 = node->perf_counter_ticks_since_last_overflow;
+ pvec0 = pvec1 = node->perf_counter_vectors_since_last_overflow;
ca1 = ca0 + n_calls;
v1 = v0 + n_vectors;
cl1 = cl0 + n_clocks;
+ ptick1 = ptick0 + n_ticks;
+ pvec1 = pvec0 + n_vectors;
node->calls_since_last_overflow = ca1;
node->clocks_since_last_overflow = cl1;
node->vectors_since_last_overflow = v1;
+ node->perf_counter_ticks_since_last_overflow = ptick1;
+ node->perf_counter_vectors_since_last_overflow = pvec1;
+
node->max_clock_n = node->max_clock > n_clocks ?
node->max_clock_n : n_vectors;
node->max_clock = node->max_clock > n_clocks ? node->max_clock : n_clocks;
r = vlib_node_runtime_update_main_loop_vector_stats (vm, node, n_vectors);
- if (PREDICT_FALSE (ca1 < ca0 || v1 < v0 || cl1 < cl0))
+ if (PREDICT_FALSE (ca1 < ca0 || v1 < v0 || cl1 < cl0) || (ptick1 < ptick0)
+ || (pvec1 < pvec0))
{
node->calls_since_last_overflow = ca0;
node->clocks_since_last_overflow = cl0;
node->vectors_since_last_overflow = v0;
- vlib_node_runtime_sync_stats (vm, node, n_calls, n_vectors, n_clocks);
+ node->perf_counter_ticks_since_last_overflow = ptick0;
+ node->perf_counter_vectors_since_last_overflow = pvec0;
+
+ vlib_node_runtime_sync_stats (vm, node, n_calls, n_vectors, n_clocks,
+ n_ticks);
}
return r;
}
+static inline u64
+vlib_node_runtime_perf_counter (vlib_main_t * vm)
+{
+ if (PREDICT_FALSE (vm->vlib_node_runtime_perf_counter_cb != 0))
+ return ((*vm->vlib_node_runtime_perf_counter_cb) (vm));
+ return 0ULL;
+}
+
always_inline void
vlib_process_update_stats (vlib_main_t * vm,
vlib_process_t * p,
- uword n_calls, uword n_vectors, uword n_clocks)
+ uword n_calls, uword n_vectors, uword n_clocks,
+ uword n_ticks)
{
vlib_node_runtime_update_stats (vm, &p->node_runtime,
- n_calls, n_vectors, n_clocks);
+ n_calls, n_vectors, n_clocks, n_ticks);
}
static clib_error_t *
@@ -959,9 +991,7 @@ dispatch_node (vlib_main_t * vm,
if (1 /* || vm->thread_index == node->thread_index */ )
{
- vlib_main_t *stat_vm;
-
- stat_vm = /* vlib_mains ? vlib_mains[0] : */ vm;
+ u64 pmc_before, pmc_delta;
vlib_elog_main_loop_event (vm, node->node_index,
last_time_stamp,
@@ -969,6 +999,12 @@ dispatch_node (vlib_main_t * vm,
/* is_after */ 0);
/*
+ * To validate accounting: pmc_before = last_time_stamp
+ * perf ticks should equal clocks/pkt...
+ */
+ pmc_before = vlib_node_runtime_perf_counter (vm);
+
+ /*
* Turn this on if you run into
* "bad monkey" contexts, and you want to know exactly
* which nodes they've visited... See ixge.c...
@@ -990,16 +1026,23 @@ dispatch_node (vlib_main_t * vm,
t = clib_cpu_time_now ();
+ /*
+ * To validate accounting: pmc_delta = t - pmc_before;
+ * perf ticks should equal clocks/pkt...
+ */
+ pmc_delta = vlib_node_runtime_perf_counter (vm) - pmc_before;
+
vlib_elog_main_loop_event (vm, node->node_index, t, n, /* is_after */
1);
vm->main_loop_vectors_processed += n;
vm->main_loop_nodes_processed += n > 0;
- v = vlib_node_runtime_update_stats (stat_vm, node,
+ v = vlib_node_runtime_update_stats (vm, node,
/* n_calls */ 1,
/* n_vectors */ n,
- /* n_clocks */ t - last_time_stamp);
+ /* n_clocks */ t - last_time_stamp,
+ pmc_delta /* PMC ticks */ );
/* When in interrupt mode and vector rate crosses threshold switch to
polling mode. */
@@ -1338,7 +1381,8 @@ dispatch_process (vlib_main_t * vm,
vlib_process_update_stats (vm, p,
/* n_calls */ !is_suspend,
/* n_vectors */ n_vectors,
- /* n_clocks */ t - last_time_stamp);
+ /* n_clocks */ t - last_time_stamp,
+ /* pmc_ticks */ 0ULL);
return t;
}
@@ -1421,7 +1465,8 @@ dispatch_suspended_process (vlib_main_t * vm,
vlib_process_update_stats (vm, p,
/* n_calls */ !is_suspend,
/* n_vectors */ n_vectors,
- /* n_clocks */ t - last_time_stamp);
+ /* n_clocks */ t - last_time_stamp,
+ /* pmc_ticks */ 0ULL);
return t;
}
@@ -1471,6 +1516,9 @@ vlib_main_or_worker_loop (vlib_main_t * vm, int is_main)
if (!nm->interrupt_threshold_vector_length)
nm->interrupt_threshold_vector_length = 5;
+ /* Make sure the performance monitor counter is disabled */
+ vm->perf_counter_id = ~0;
+
/* Start all processes. */
if (is_main)
{
@@ -1493,6 +1541,9 @@ vlib_main_or_worker_loop (vlib_main_t * vm, int is_main)
vlib_worker_thread_barrier_check ();
vec_foreach (fqm, tm->frame_queue_mains)
vlib_frame_queue_dequeue (vm, fqm);
+ if (PREDICT_FALSE (vm->worker_thread_main_loop_callback != 0))
+ ((void (*)(vlib_main_t *)) vm->worker_thread_main_loop_callback)
+ (vm);
}
/* Process pre-input nodes. */
diff --git a/src/vlib/main.h b/src/vlib/main.h
index ce42b6ea442..ddc14df5360 100644
--- a/src/vlib/main.h
+++ b/src/vlib/main.h
@@ -46,6 +46,7 @@
#include <vppinfra/pool.h>
#include <vppinfra/random_buffer.h>
#include <vppinfra/time.h>
+#include <vppinfra/pmc.h>
#include <pthread.h>
@@ -81,6 +82,11 @@ typedef struct vlib_main_t
u32 vector_counts_per_main_loop[2];
u32 node_counts_per_main_loop[2];
+ /* Main loop hw / sw performance counters */
+ u64 (*vlib_node_runtime_perf_counter_cb) (struct vlib_main_t *);
+ int perf_counter_id;
+ int perf_counter_fd;
+
/* Every so often we switch to the next counter. */
#define VLIB_LOG2_MAIN_LOOPS_PER_STATS_UPDATE 7
@@ -192,6 +198,9 @@ typedef struct vlib_main_t
void (*queue_signal_callback) (struct vlib_main_t *);
u8 **argv;
+ /* Top of (worker) dispatch loop callback */
+ volatile void (*worker_thread_main_loop_callback) (struct vlib_main_t *);
+
/* debugging */
volatile int parked_at_barrier;
diff --git a/src/vlib/node.h b/src/vlib/node.h
index 6efb6f3e4fe..fd245d59def 100644
--- a/src/vlib/node.h
+++ b/src/vlib/node.h
@@ -244,6 +244,8 @@ typedef struct
u64 calls, vectors, clocks, suspends;
u64 max_clock;
u64 max_clock_n;
+ u64 perf_counter_ticks;
+ u64 perf_counter_vectors;
} vlib_node_stats_t;
#define foreach_vlib_node_state \
@@ -488,6 +490,9 @@ typedef struct vlib_node_runtime_t
u32 vectors_since_last_overflow; /**< Number of vector elements
processed by this node. */
+ u32 perf_counter_ticks_since_last_overflow; /**< Perf counter ticks */
+ u32 perf_counter_vectors_since_last_overflow; /**< Perf counter vectors */
+
u32 next_frame_index; /**< Start of next frames for this
node. */
diff --git a/src/vlib/node_cli.c b/src/vlib/node_cli.c
index 2523b41c404..062854af5bc 100644
--- a/src/vlib/node_cli.c
+++ b/src/vlib/node_cli.c
@@ -148,19 +148,25 @@ format_vlib_node_stats (u8 * s, va_list * va)
f64 maxc, maxcn;
u32 maxn;
u32 indent;
+ u64 pmc_ticks;
+ f64 pmc_ticks_per_packet;
if (!n)
{
if (max)
- return format (s,
- "%=30s%=17s%=16s%=16s%=16s%=16s",
- "Name", "Max Node Clocks", "Vectors at Max",
- "Max Clocks", "Avg Clocks", "Avg Vectors/Call");
+ s = format (s,
+ "%=30s%=17s%=16s%=16s%=16s%=16s",
+ "Name", "Max Node Clocks", "Vectors at Max",
+ "Max Clocks", "Avg Clocks", "Avg Vectors/Call");
else
- return format (s,
- "%=30s%=12s%=16s%=16s%=16s%=16s%=16s",
- "Name", "State", "Calls", "Vectors", "Suspends",
- "Clocks", "Vectors/Call");
+ s = format (s,
+ "%=30s%=12s%=16s%=16s%=16s%=16s%=16s",
+ "Name", "State", "Calls", "Vectors", "Suspends",
+ "Clocks", "Vectors/Call");
+ if (vm->perf_counter_id)
+ s = format (s, "%=16s", "Perf Ticks");
+
+ return s;
}
indent = format_get_indent (s);
@@ -176,6 +182,13 @@ format_vlib_node_stats (u8 * s, va_list * va)
else
maxcn = 0.0;
+ pmc_ticks = n->stats_total.perf_counter_ticks -
+ n->stats_last_clear.perf_counter_ticks;
+ if (p > 0)
+ pmc_ticks_per_packet = (f64) pmc_ticks / (f64) p;
+ else
+ pmc_ticks_per_packet = 0.0;
+
/* Clocks per packet, per call or per suspend. */
x = 0;
if (p > 0)
@@ -208,6 +221,9 @@ format_vlib_node_stats (u8 * s, va_list * va)
s = format (s, "%-30v%=12U%16Ld%16Ld%16Ld%16.2e%16.2f", ns,
format_vlib_node_state, vm, n, c, p, d, x, v);
+ if (pmc_ticks_per_packet > 0.0)
+ s = format (s, "%16.2e", pmc_ticks_per_packet);
+
if (ns != n->name)
vec_free (ns);