From 6c81f5a2493ff65b4dacfef45db8a1ee459a738f Mon Sep 17 00:00:00 2001 From: Tom Seidenberg Date: Fri, 10 Jul 2020 15:49:03 +0000 Subject: misc: add callback hooks and refactor pmc Callbacks for monitoring and performance measurement: - Add new callback list type, with context - Add callbacks for API, CLI, and barrier sync - Modify node dispatch callback to pass plugin-specific context - Modify perfmon plugin to keep PMC samples local to the plugin - Include process nodes in dispatch callback - Pass dispatch function return value to callback Type: refactor Signed-off-by: Tom Seidenberg Change-Id: I28b06c58490611e08d76ff5b01b2347ba2109b22 --- src/vlib/cli.c | 7 ++++ src/vlib/cli.h | 8 ++++- src/vlib/init.h | 13 +++++++ src/vlib/main.c | 95 ++++++++++++++++----------------------------------- src/vlib/main.h | 79 ++++++++++++++++++++++++++++++++++++------ src/vlib/node.h | 7 ---- src/vlib/node_funcs.h | 4 +++ src/vlib/threads.c | 16 +++++++++ src/vlib/threads.h | 9 +++++ 9 files changed, 155 insertions(+), 83 deletions(-) (limited to 'src/vlib') diff --git a/src/vlib/cli.c b/src/vlib/cli.c index d14ea683fb9..2697c0ae083 100644 --- a/src/vlib/cli.c +++ b/src/vlib/cli.c @@ -39,6 +39,7 @@ #include #include +#include #include #include #include @@ -563,10 +564,16 @@ vlib_cli_dispatch_sub_commands (vlib_main_t * vm, if (!c->is_mp_safe) vlib_worker_thread_barrier_sync (vm); + if (PREDICT_FALSE (vec_len (cm->perf_counter_cbs) != 0)) + clib_call_callbacks (cm->perf_counter_cbs, cm, + c - cm->commands, 0 /* before */ ); c->hit_counter++; c_error = c->function (vm, si, c); + if (PREDICT_FALSE (vec_len (cm->perf_counter_cbs) != 0)) + clib_call_callbacks (cm->perf_counter_cbs, cm, + c - cm->commands, 1 /* after */ ); if (!c->is_mp_safe) vlib_worker_thread_barrier_release (vm); diff --git a/src/vlib/cli.h b/src/vlib/cli.h index df9ed7212bc..0a8ef9d78d7 100644 --- a/src/vlib/cli.h +++ b/src/vlib/cli.h @@ -132,7 +132,7 @@ typedef struct vlib_cli_command_t typedef void (vlib_cli_output_function_t) (uword arg, u8 * buffer, uword buffer_bytes); -typedef struct +typedef struct vlib_cli_main_t { /* Vector of all known commands. */ vlib_cli_command_t *commands; @@ -146,6 +146,12 @@ typedef struct /* index vector, to sort commands, etc. */ u32 *sort_vector; + + /* performance counter callback */ + void (**perf_counter_cbs) + (struct vlib_cli_main_t *, u32 id, int before_or_after); + void (**perf_counter_cbs_tmp) + (struct vlib_cli_main_t *, u32 id, int before_or_after); } vlib_cli_main_t; #ifndef CLIB_MARCH_VARIANT diff --git a/src/vlib/init.h b/src/vlib/init.h index fc638013efc..68ac2f36717 100644 --- a/src/vlib/init.h +++ b/src/vlib/init.h @@ -317,6 +317,19 @@ static void __vlib_rm_config_function_##x (void) \ _error; \ }) +#define vlib_call_main_loop_enter_function(vm, x) \ + ({ \ + extern vlib_init_function_t * VLIB_MAIN_LOOP_ENTER_FUNCTION_SYMBOL (x); \ + vlib_init_function_t * _f = VLIB_MAIN_LOOP_ENTER_FUNCTION_SYMBOL (x); \ + clib_error_t * _error = 0; \ + if (! hash_get (vm->init_functions_called, _f)) \ + { \ + hash_set1 (vm->init_functions_called, _f); \ + _error = _f (vm); \ + } \ + _error; \ + }) + /* External functions. */ clib_error_t *vlib_call_all_init_functions (struct vlib_main_t *vm); clib_error_t *vlib_call_all_config_functions (struct vlib_main_t *vm, diff --git a/src/vlib/main.c b/src/vlib/main.c index 8d7c6c09275..cb651e43a75 100644 --- a/src/vlib/main.c +++ b/src/vlib/main.c @@ -568,41 +568,29 @@ vlib_put_next_frame (vlib_main_t * vm, never_inline void vlib_node_runtime_sync_stats (vlib_main_t * vm, vlib_node_runtime_t * r, - uword n_calls, uword n_vectors, uword n_clocks, - uword n_ticks0, uword n_ticks1) + uword n_calls, uword n_vectors, uword n_clocks) { vlib_node_t *n = vlib_get_node (vm, r->node_index); n->stats_total.calls += n_calls + r->calls_since_last_overflow; n->stats_total.vectors += n_vectors + r->vectors_since_last_overflow; n->stats_total.clocks += n_clocks + r->clocks_since_last_overflow; - n->stats_total.perf_counter0_ticks += n_ticks0 + - r->perf_counter0_ticks_since_last_overflow; - n->stats_total.perf_counter1_ticks += n_ticks1 + - r->perf_counter1_ticks_since_last_overflow; - n->stats_total.perf_counter_vectors += n_vectors + - r->perf_counter_vectors_since_last_overflow; n->stats_total.max_clock = r->max_clock; n->stats_total.max_clock_n = r->max_clock_n; r->calls_since_last_overflow = 0; r->vectors_since_last_overflow = 0; r->clocks_since_last_overflow = 0; - r->perf_counter0_ticks_since_last_overflow = 0ULL; - r->perf_counter1_ticks_since_last_overflow = 0ULL; - r->perf_counter_vectors_since_last_overflow = 0ULL; } always_inline void __attribute__ ((unused)) vlib_process_sync_stats (vlib_main_t * vm, vlib_process_t * p, - uword n_calls, uword n_vectors, uword n_clocks, - uword n_ticks0, uword n_ticks1) + uword n_calls, uword n_vectors, uword n_clocks) { vlib_node_runtime_t *rt = &p->node_runtime; vlib_node_t *n = vlib_get_node (vm, rt->node_index); - vlib_node_runtime_sync_stats (vm, rt, n_calls, n_vectors, n_clocks, - n_ticks0, n_ticks1); + vlib_node_runtime_sync_stats (vm, rt, n_calls, n_vectors, n_clocks); n->stats_total.suspends += p->n_suspends; p->n_suspends = 0; } @@ -628,7 +616,7 @@ vlib_node_sync_stats (vlib_main_t * vm, vlib_node_t * n) vec_elt_at_index (vm->node_main.nodes_by_type[n->type], n->runtime_index); - vlib_node_runtime_sync_stats (vm, rt, 0, 0, 0, 0, 0); + vlib_node_runtime_sync_stats (vm, rt, 0, 0, 0); /* Sync up runtime next frame vector counters with main node structure. */ { @@ -648,32 +636,21 @@ always_inline u32 vlib_node_runtime_update_stats (vlib_main_t * vm, vlib_node_runtime_t * node, uword n_calls, - uword n_vectors, uword n_clocks, - uword n_ticks0, uword n_ticks1) + uword n_vectors, uword n_clocks) { u32 ca0, ca1, v0, v1, cl0, cl1, r; - u32 ptick00, ptick01, ptick10, ptick11, pvec0, pvec1; cl0 = cl1 = node->clocks_since_last_overflow; ca0 = ca1 = node->calls_since_last_overflow; v0 = v1 = node->vectors_since_last_overflow; - ptick00 = ptick01 = node->perf_counter0_ticks_since_last_overflow; - ptick10 = ptick11 = node->perf_counter1_ticks_since_last_overflow; - pvec0 = pvec1 = node->perf_counter_vectors_since_last_overflow; ca1 = ca0 + n_calls; v1 = v0 + n_vectors; cl1 = cl0 + n_clocks; - ptick01 = ptick00 + n_ticks0; - ptick11 = ptick10 + n_ticks1; - pvec1 = pvec0 + n_vectors; node->calls_since_last_overflow = ca1; node->clocks_since_last_overflow = cl1; node->vectors_since_last_overflow = v1; - node->perf_counter0_ticks_since_last_overflow = ptick01; - node->perf_counter1_ticks_since_last_overflow = ptick11; - node->perf_counter_vectors_since_last_overflow = pvec1; node->max_clock_n = node->max_clock > n_clocks ? node->max_clock_n : n_vectors; @@ -681,42 +658,25 @@ vlib_node_runtime_update_stats (vlib_main_t * vm, r = vlib_node_runtime_update_main_loop_vector_stats (vm, node, n_vectors); - if (PREDICT_FALSE (ca1 < ca0 || v1 < v0 || cl1 < cl0) || (ptick01 < ptick00) - || (ptick11 < ptick10) || (pvec1 < pvec0)) + if (PREDICT_FALSE (ca1 < ca0 || v1 < v0 || cl1 < cl0)) { node->calls_since_last_overflow = ca0; node->clocks_since_last_overflow = cl0; node->vectors_since_last_overflow = v0; - node->perf_counter0_ticks_since_last_overflow = ptick00; - node->perf_counter1_ticks_since_last_overflow = ptick10; - node->perf_counter_vectors_since_last_overflow = pvec0; - vlib_node_runtime_sync_stats (vm, node, n_calls, n_vectors, n_clocks, - n_ticks0, n_ticks1); + vlib_node_runtime_sync_stats (vm, node, n_calls, n_vectors, n_clocks); } return r; } -always_inline void -vlib_node_runtime_perf_counter (vlib_main_t * vm, u64 * pmc0, u64 * pmc1, - vlib_node_runtime_t * node, - vlib_frame_t * frame, int before_or_after) -{ - *pmc0 = 0; - *pmc1 = 0; - if (PREDICT_FALSE (vec_len (vm->vlib_node_runtime_perf_counter_cbs) != 0)) - clib_call_callbacks (vm->vlib_node_runtime_perf_counter_cbs, vm, pmc0, - pmc1, node, frame, before_or_after); -} - always_inline void vlib_process_update_stats (vlib_main_t * vm, vlib_process_t * p, uword n_calls, uword n_vectors, uword n_clocks) { vlib_node_runtime_update_stats (vm, &p->node_runtime, - n_calls, n_vectors, n_clocks, 0ULL, 0ULL); + n_calls, n_vectors, n_clocks); } static clib_error_t * @@ -1166,7 +1126,6 @@ dispatch_node (vlib_main_t * vm, u64 t; vlib_node_main_t *nm = &vm->node_main; vlib_next_frame_t *nf; - u64 pmc_before[2], pmc_after[2], pmc_delta[2]; if (CLIB_DEBUG > 0) { @@ -1206,8 +1165,8 @@ dispatch_node (vlib_main_t * vm, last_time_stamp, frame ? frame->n_vectors : 0, /* is_after */ 0); - vlib_node_runtime_perf_counter (vm, &pmc_before[0], &pmc_before[1], - node, frame, 0 /* before */ ); + vlib_node_runtime_perf_counter (vm, node, frame, 0, last_time_stamp, + VLIB_NODE_RUNTIME_PERF_BEFORE); /* * Turn this on if you run into @@ -1237,15 +1196,8 @@ dispatch_node (vlib_main_t * vm, t = clib_cpu_time_now (); - /* - * To validate accounting: pmc_delta = t - pmc_before; - * perf ticks should equal clocks/pkt... - */ - vlib_node_runtime_perf_counter (vm, &pmc_after[0], &pmc_after[1], node, - frame, 1 /* after */ ); - - pmc_delta[0] = pmc_after[0] - pmc_before[0]; - pmc_delta[1] = pmc_after[1] - pmc_before[1]; + vlib_node_runtime_perf_counter (vm, node, frame, n, t, + VLIB_NODE_RUNTIME_PERF_AFTER); vlib_elog_main_loop_event (vm, node->node_index, t, n, 1 /* is_after */ ); @@ -1255,9 +1207,7 @@ dispatch_node (vlib_main_t * vm, v = vlib_node_runtime_update_stats (vm, node, /* n_calls */ 1, /* n_vectors */ n, - /* n_clocks */ t - last_time_stamp, - pmc_delta[0] /* PMC0 */ , - pmc_delta[1] /* PMC1 */ ); + /* n_clocks */ t - last_time_stamp); /* When in interrupt mode and vector rate crosses threshold switch to polling mode. */ @@ -1579,6 +1529,9 @@ dispatch_process (vlib_main_t * vm, old_process_index = nm->current_process_index; nm->current_process_index = node->runtime_index; + vlib_node_runtime_perf_counter (vm, node_runtime, f, 0, last_time_stamp, + VLIB_NODE_RUNTIME_PERF_BEFORE); + n_vectors = vlib_process_startup (vm, p, f); nm->current_process_index = old_process_index; @@ -1618,6 +1571,9 @@ dispatch_process (vlib_main_t * vm, vlib_elog_main_loop_event (vm, node_runtime->node_index, t, is_suspend, /* is_after */ 1); + vlib_node_runtime_perf_counter (vm, node_runtime, f, n_vectors, t, + VLIB_NODE_RUNTIME_PERF_AFTER); + vlib_process_update_stats (vm, p, /* n_calls */ !is_suspend, /* n_vectors */ n_vectors, @@ -1668,6 +1624,9 @@ dispatch_suspended_process (vlib_main_t * vm, /* Save away current process for suspend. */ nm->current_process_index = node->runtime_index; + vlib_node_runtime_perf_counter (vm, node_runtime, f, 0, last_time_stamp, + VLIB_NODE_RUNTIME_PERF_BEFORE); + n_vectors = vlib_process_resume (vm, p); t = clib_cpu_time_now (); @@ -1701,6 +1660,9 @@ dispatch_suspended_process (vlib_main_t * vm, vlib_elog_main_loop_event (vm, node_runtime->node_index, t, !is_suspend, /* is_after */ 1); + vlib_node_runtime_perf_counter (vm, node_runtime, f, n_vectors, t, + VLIB_NODE_RUNTIME_PERF_AFTER); + vlib_process_update_stats (vm, p, /* n_calls */ !is_suspend, /* n_vectors */ n_vectors, @@ -1831,11 +1793,14 @@ vlib_main_or_worker_loop (vlib_main_t * vm, int is_main) else frame_queue_check_counter--; } - if (PREDICT_FALSE (vec_len (vm->worker_thread_main_loop_callbacks))) - clib_call_callbacks (vm->worker_thread_main_loop_callbacks, vm); } + if (PREDICT_FALSE (vec_len (vm->worker_thread_main_loop_callbacks))) + clib_call_callbacks (vm->worker_thread_main_loop_callbacks, vm, + cpu_time_now); + /* Process pre-input nodes. */ + cpu_time_now = clib_cpu_time_now (); vec_foreach (n, nm->nodes_by_type[VLIB_NODE_TYPE_PRE_INPUT]) cpu_time_now = dispatch_node (vm, n, VLIB_NODE_TYPE_PRE_INPUT, diff --git a/src/vlib/main.h b/src/vlib/main.h index 2e070aa6d64..f7a4a1c912a 100644 --- a/src/vlib/main.h +++ b/src/vlib/main.h @@ -40,6 +40,7 @@ #ifndef included_vlib_main_h #define included_vlib_main_h +#include #include #include #include @@ -80,6 +81,42 @@ typedef struct u32 trace_filter_set_index; } vlib_trace_filter_t; +typedef enum +{ + VLIB_NODE_RUNTIME_PERF_BEFORE, + VLIB_NODE_RUNTIME_PERF_AFTER, + VLIB_NODE_RUNTIME_PERF_RESET, +} vlib_node_runtime_perf_call_type_t; + +typedef struct +{ + struct vlib_main_t *vm; + vlib_node_runtime_t *node; + vlib_frame_t *frame; + uword packets; + u64 cpu_time_now; + vlib_node_runtime_perf_call_type_t call_type; +} vlib_node_runtime_perf_callback_args_t; + +struct vlib_node_runtime_perf_callback_data_t; + +typedef void (*vlib_node_runtime_perf_callback_fp_t) + (struct vlib_node_runtime_perf_callback_data_t * data, + vlib_node_runtime_perf_callback_args_t * args); + +typedef struct vlib_node_runtime_perf_callback_data_t +{ + vlib_node_runtime_perf_callback_fp_t fp; + union + { + void *v; + u64 u; + } u[3]; +} vlib_node_runtime_perf_callback_data_t; + +clib_callback_data_typedef (vlib_node_runtime_perf_callback_set_t, + vlib_node_runtime_perf_callback_data_t); + typedef struct vlib_main_t { CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); @@ -112,14 +149,8 @@ typedef struct vlib_main_t u32 internal_node_last_vectors_per_main_loop; /* Main loop hw / sw performance counters */ - void (**vlib_node_runtime_perf_counter_cbs) (struct vlib_main_t *, - u64 *, u64 *, - vlib_node_runtime_t *, - vlib_frame_t *, int); - void (**vlib_node_runtime_perf_counter_cb_tmp) (struct vlib_main_t *, - u64 *, u64 *, - vlib_node_runtime_t *, - vlib_frame_t *, int); + vlib_node_runtime_perf_callback_set_t vlib_node_runtime_perf_callbacks; + /* Every so often we switch to the next counter. */ #define VLIB_LOG2_MAIN_LOOPS_PER_STATS_UPDATE 7 @@ -234,9 +265,10 @@ typedef struct vlib_main_t u8 **argv; /* Top of (worker) dispatch loop callback */ - void (**volatile worker_thread_main_loop_callbacks) (struct vlib_main_t *); + void (**volatile worker_thread_main_loop_callbacks) + (struct vlib_main_t *, u64 t); void (**volatile worker_thread_main_loop_callback_tmp) - (struct vlib_main_t *); + (struct vlib_main_t *, u64 t); clib_spinlock_t worker_thread_main_loop_callback_lock; /* debugging */ @@ -268,6 +300,12 @@ typedef struct vlib_main_t /* Earliest barrier can be closed again */ f64 barrier_no_close_before; + /* Barrier counter callback */ + void (**volatile barrier_perf_callbacks) + (struct vlib_main_t *, u64 t, int leave); + void (**volatile barrier_perf_callbacks_tmp) + (struct vlib_main_t *, u64 t, int leave); + /* Need to check the frame queues */ volatile uword check_frame_queues; @@ -399,6 +437,27 @@ vlib_last_vectors_per_main_loop (vlib_main_t * vm) return vm->internal_node_last_vectors_per_main_loop; } +always_inline void +vlib_node_runtime_perf_counter (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * frame, uword n, u64 t, + vlib_node_runtime_perf_call_type_t call_type) +{ + vlib_node_runtime_perf_callback_data_t *v = + clib_callback_data_check_and_get (&vm->vlib_node_runtime_perf_callbacks); + if (vec_len (v)) + { + vlib_node_runtime_perf_callback_args_t args = { + .vm = vm, + .node = node, + .frame = frame, + .packets = n, + .cpu_time_now = t, + .call_type = call_type, + }; + clib_callback_data_call_vec (v, &args); + } +} + always_inline void vlib_set_queue_signal_callback (vlib_main_t * vm, void (*fp) (vlib_main_t *)) { diff --git a/src/vlib/node.h b/src/vlib/node.h index 9c4cadd56f7..f7155aeda86 100644 --- a/src/vlib/node.h +++ b/src/vlib/node.h @@ -235,9 +235,6 @@ typedef struct u64 calls, vectors, clocks, suspends; u64 max_clock; u64 max_clock_n; - u64 perf_counter0_ticks; - u64 perf_counter1_ticks; - u64 perf_counter_vectors; } vlib_node_stats_t; #define foreach_vlib_node_state \ @@ -484,10 +481,6 @@ typedef struct vlib_node_runtime_t u32 vectors_since_last_overflow; /**< Number of vector elements processed by this node. */ - u32 perf_counter0_ticks_since_last_overflow; /**< Perf counter 0 ticks */ - u32 perf_counter1_ticks_since_last_overflow; /**< Perf counter 1 ticks */ - u32 perf_counter_vectors_since_last_overflow; /**< Perf counter vectors */ - u32 next_frame_index; /**< Start of next frames for this node. */ diff --git a/src/vlib/node_funcs.h b/src/vlib/node_funcs.h index 89f212374e9..dfeba17ab09 100644 --- a/src/vlib/node_funcs.h +++ b/src/vlib/node_funcs.h @@ -201,6 +201,10 @@ vlib_node_set_state (vlib_main_t * vm, u32 node_index, nm->input_node_counts_by_state[new_state] += 1; } + if (PREDICT_FALSE (r->state == VLIB_NODE_STATE_DISABLED)) + vlib_node_runtime_perf_counter (vm, r, 0, 0, 0, + VLIB_NODE_RUNTIME_PERF_RESET); + n->state = new_state; r->state = new_state; } diff --git a/src/vlib/threads.c b/src/vlib/threads.c index a8c1a1a207c..4df550e61fb 100644 --- a/src/vlib/threads.c +++ b/src/vlib/threads.c @@ -700,6 +700,9 @@ start_workers (vlib_main_t * vm) clib_mem_alloc_aligned (CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES); vm->elog_main.lock[0] = 0; + clib_callback_data_init (&vm->vlib_node_runtime_perf_callbacks, + &vm->worker_thread_main_loop_callback_lock); + if (n_vlib_mains > 1) { /* Replace hand-crafted length-1 vector with a real vector */ @@ -734,6 +737,7 @@ start_workers (vlib_main_t * vm) vm->barrier_no_close_before = 0; worker_thread_index = 1; + clib_spinlock_init (&vm->worker_thread_main_loop_callback_lock); for (i = 0; i < vec_len (tm->registrations); i++) { @@ -790,6 +794,11 @@ start_workers (vlib_main_t * vm) _vec_len (vm_clone->pending_rpc_requests) = 0; clib_memset (&vm_clone->random_buffer, 0, sizeof (vm_clone->random_buffer)); + clib_spinlock_init + (&vm_clone->worker_thread_main_loop_callback_lock); + clib_callback_data_init + (&vm_clone->vlib_node_runtime_perf_callbacks, + &vm_clone->worker_thread_main_loop_callback_lock); nm = &vlib_mains[0]->node_main; nm_clone = &vm_clone->node_main; @@ -1466,6 +1475,10 @@ vlib_worker_thread_barrier_sync_int (vlib_main_t * vm, const char *func_name) return; } + if (PREDICT_FALSE (vec_len (vm->barrier_perf_callbacks) != 0)) + clib_call_callbacks (vm->barrier_perf_callbacks, vm, + vm->clib_time.last_cpu_time, 0 /* enter */ ); + /* * Need data to decide if we're working hard enough to honor * the barrier hold-down timer. @@ -1629,6 +1642,9 @@ vlib_worker_thread_barrier_release (vlib_main_t * vm) barrier_trace_release (t_entry, t_closed_total, t_update_main); + if (PREDICT_FALSE (vec_len (vm->barrier_perf_callbacks) != 0)) + clib_call_callbacks (vm->barrier_perf_callbacks, vm, + vm->clib_time.last_cpu_time, 1 /* leave */ ); } /* diff --git a/src/vlib/threads.h b/src/vlib/threads.h index c1188cea933..e8d416997b0 100644 --- a/src/vlib/threads.h +++ b/src/vlib/threads.h @@ -16,6 +16,7 @@ #define included_vlib_threads_h #include +#include #include extern vlib_main_t **vlib_mains; @@ -400,6 +401,10 @@ vlib_worker_thread_barrier_check (void) u32 thread_index = vm->thread_index; f64 t = vlib_time_now (vm); + if (PREDICT_FALSE (vec_len (vm->barrier_perf_callbacks) != 0)) + clib_call_callbacks (vm->barrier_perf_callbacks, vm, + vm->clib_time.last_cpu_time, 0 /* enter */ ); + if (PREDICT_FALSE (vlib_worker_threads->barrier_elog_enabled)) { vlib_worker_thread_t *w = vlib_worker_threads + thread_index; @@ -498,6 +503,10 @@ vlib_worker_thread_barrier_check (void) ed->thread_index = thread_index; ed->duration = (int) (1000000.0 * t); } + + if (PREDICT_FALSE (vec_len (vm->barrier_perf_callbacks) != 0)) + clib_call_callbacks (vm->barrier_perf_callbacks, vm, + vm->clib_time.last_cpu_time, 1 /* leave */ ); } } -- cgit 1.2.3-korg