From 6c81f5a2493ff65b4dacfef45db8a1ee459a738f Mon Sep 17 00:00:00 2001 From: Tom Seidenberg Date: Fri, 10 Jul 2020 15:49:03 +0000 Subject: misc: add callback hooks and refactor pmc Callbacks for monitoring and performance measurement: - Add new callback list type, with context - Add callbacks for API, CLI, and barrier sync - Modify node dispatch callback to pass plugin-specific context - Modify perfmon plugin to keep PMC samples local to the plugin - Include process nodes in dispatch callback - Pass dispatch function return value to callback Type: refactor Signed-off-by: Tom Seidenberg Change-Id: I28b06c58490611e08d76ff5b01b2347ba2109b22 --- src/plugins/mdata/mdata.c | 22 ++- src/plugins/perfmon/CMakeLists.txt | 18 +- src/plugins/perfmon/perfmon.c | 33 ++-- src/plugins/perfmon/perfmon.h | 38 +++- src/plugins/perfmon/perfmon_intel.h | 4 + src/plugins/perfmon/perfmon_intel_skl.c | 59 ++++++ src/plugins/perfmon/perfmon_intel_skx.c | 59 ++++++ src/plugins/perfmon/perfmon_periodic.c | 219 +++++++++------------- src/plugins/perfmon/perfmon_plugin.c | 38 ++++ src/vlib/cli.c | 7 + src/vlib/cli.h | 8 +- src/vlib/init.h | 13 ++ src/vlib/main.c | 95 +++------- src/vlib/main.h | 79 +++++++- src/vlib/node.h | 7 - src/vlib/node_funcs.h | 4 + src/vlib/threads.c | 16 ++ src/vlib/threads.h | 9 + src/vlibapi/api_common.h | 8 +- src/vlibapi/api_shared.c | 14 ++ src/vnet/interface.c | 4 + src/vppinfra/CMakeLists.txt | 1 + src/vppinfra/callback.h | 7 +- src/vppinfra/callback_data.h | 315 ++++++++++++++++++++++++++++++++ 24 files changed, 825 insertions(+), 252 deletions(-) create mode 100644 src/plugins/perfmon/perfmon_plugin.c create mode 100644 src/vppinfra/callback_data.h diff --git a/src/plugins/mdata/mdata.c b/src/plugins/mdata/mdata.c index fc5bbfbb571..f74564eb33c 100644 --- a/src/plugins/mdata/mdata.c +++ b/src/plugins/mdata/mdata.c @@ -21,6 +21,7 @@ #include #include +#include #include #include @@ -42,9 +43,8 @@ static mdata_t mdata_none; before_or_after: 0 => before, 1=> after */ static void -mdata_trace_callback (vlib_main_t * vm, u64 * c0, u64 * c1, - vlib_node_runtime_t * node, - vlib_frame_t * frame, int before_or_after) +mdata_trace_callback (vlib_node_runtime_perf_callback_data_t * data, + vlib_node_runtime_perf_callback_args_t * args) { int i; mdata_main_t *mm = &mdata_main; @@ -53,6 +53,12 @@ mdata_trace_callback (vlib_main_t * vm, u64 * c0, u64 * c1, u32 n_left_from; mdata_t *before, *modifies; u8 *after; + vlib_main_t *vm = args->vm; + vlib_frame_t *frame = args->frame; + vlib_node_runtime_t *node = args->node; + + if (PREDICT_FALSE (args->call_type == VLIB_NODE_RUNTIME_PERF_RESET)) + return; /* Input nodes don't have frames, etc. */ if (frame == 0) @@ -68,7 +74,7 @@ mdata_trace_callback (vlib_main_t * vm, u64 * c0, u64 * c1, vlib_get_buffers (vm, from, bufs, n_left_from); b = bufs; - if (before_or_after == 1 /* after */ ) + if (args->call_type == VLIB_NODE_RUNTIME_PERF_AFTER) goto after_pass; /* Resize the per-thread "before" vector to cover the current frame */ @@ -152,11 +158,9 @@ mdata_enable_disable (mdata_main_t * mmp, int enable_disable) if (vlib_mains[i] == 0) continue; - clib_callback_enable_disable - (vlib_mains[i]->vlib_node_runtime_perf_counter_cbs, - vlib_mains[i]->vlib_node_runtime_perf_counter_cb_tmp, - vlib_mains[i]->worker_thread_main_loop_callback_lock, - (void *) mdata_trace_callback, enable_disable); + clib_callback_data_enable_disable + (&vlib_mains[i]->vlib_node_runtime_perf_callbacks, + mdata_trace_callback, enable_disable); } return rv; diff --git a/src/plugins/perfmon/CMakeLists.txt b/src/plugins/perfmon/CMakeLists.txt index a3f045f75f3..69e225b4a3f 100644 --- a/src/plugins/perfmon/CMakeLists.txt +++ b/src/plugins/perfmon/CMakeLists.txt @@ -11,7 +11,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -add_vpp_plugin(perfmon +add_vpp_library (perfcore SOURCES perfmon.c perfmon_periodic.c @@ -32,6 +32,22 @@ add_vpp_plugin(perfmon perfmon_intel_wsm_ep_dp.c perfmon_intel_wsm_ep_sp.c perfmon_intel_wsm_ex.c + + INSTALL_HEADERS + perfmon.h + + LINK_LIBRARIES + vppinfra + vlib + vnet +) + +add_vpp_plugin(perfmon + SOURCES + perfmon_plugin.c + + LINK_LIBRARIES + perfcore ) option(VPP_BUILD_MAPFILE_TOOL "Build perfmon mapfile utility." OFF) diff --git a/src/plugins/perfmon/perfmon.c b/src/plugins/perfmon/perfmon.c index 7e276c30810..525a864b584 100644 --- a/src/plugins/perfmon/perfmon.c +++ b/src/plugins/perfmon/perfmon.c @@ -16,7 +16,6 @@ */ #include -#include #include #include @@ -98,6 +97,7 @@ perfmon_init (vlib_main_t * vm) u32 cpuid; u8 model, stepping; perfmon_intel_pmc_event_t *ev; + int i; pm->vlib_main = vm; pm->vnet_main = vnet_get_main (); @@ -109,9 +109,17 @@ perfmon_init (vlib_main_t * vm) /* Default data collection interval */ pm->timeout_interval = 2.0; /* seconds */ - vec_validate (pm->pm_fds, 1); - vec_validate (pm->perf_event_pages, 1); - vec_validate (pm->rdpmc_indices, 1); + + vec_validate (pm->threads, vlib_get_thread_main ()->n_vlib_mains - 1); + for (i = 0; i < vec_len (pm->threads); i++) + { + perfmon_thread_t *pt = clib_mem_alloc_aligned + (sizeof (perfmon_thread_t), CLIB_CACHE_LINE_BYTES); + clib_memset (pt, 0, sizeof (*pt)); + pm->threads[i] = pt; + pt->pm_fds[0] = -1; + pt->pm_fds[1] = -1; + } pm->page_size = getpagesize (); pm->perfmon_table = 0; @@ -147,18 +155,7 @@ perfmon_init (vlib_main_t * vm) VLIB_INIT_FUNCTION (perfmon_init); -/* *INDENT-OFF* */ -VLIB_PLUGIN_REGISTER () = -{ - .version = VPP_BUILD_VER, - .description = "Performance Monitor", -#if !defined(__x86_64__) - .default_disabled = 1, -#endif -}; -/* *INDENT-ON* */ - -static uword +uword unformat_processor_event (unformat_input_t * input, va_list * args) { perfmon_main_t *pm = va_arg (*args, perfmon_main_t *); @@ -185,6 +182,10 @@ unformat_processor_event (unformat_input_t * input, va_list * args) pe_config |= pm->perfmon_table[idx].event_code[0]; pe_config |= pm->perfmon_table[idx].umask << 8; + pe_config |= pm->perfmon_table[idx].edge << 18; + pe_config |= pm->perfmon_table[idx].anyt << 21; + pe_config |= pm->perfmon_table[idx].inv << 23; + pe_config |= pm->perfmon_table[idx].cmask << 24; ep->name = (char *) hp->key; ep->pe_type = PERF_TYPE_RAW; diff --git a/src/plugins/perfmon/perfmon.h b/src/plugins/perfmon/perfmon.h index 000e3c2849c..c8782023597 100644 --- a/src/plugins/perfmon/perfmon.h +++ b/src/plugins/perfmon/perfmon.h @@ -76,6 +76,32 @@ typedef struct u8 *value; } name_value_pair_t; +typedef struct +{ + u64 ticks[2]; + u64 vectors; +} perfmon_counters_t; + +typedef struct +{ + CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); + + /* Current counters */ + u64 c[2]; + + /* Current perf_event file descriptors, per thread */ + int pm_fds[2]; + + /* mmap base of mapped struct perf_event_mmap_page */ + u8 *perf_event_pages[2]; + + u32 rdpmc_indices[2]; + + /* vector of counters by node index */ + perfmon_counters_t *counters; + +} perfmon_thread_t; + typedef struct { /* API message ID base */ @@ -112,17 +138,15 @@ typedef struct /* Current event (index) being collected */ u32 current_event; int n_active; - u32 **rdpmc_indices; - /* mmap base / size of (mapped) struct perf_event_mmap_page */ - u8 ***perf_event_pages; + /* mmap size of (mapped) struct perf_event_mmap_page */ u32 page_size; - /* Current perf_event file descriptors, per thread */ - int **pm_fds; - /* thread bitmap */ uword *thread_bitmap; + /* per-thread data */ + perfmon_thread_t **threads; + /* Logging */ vlib_log_class_t log_class; @@ -137,6 +161,8 @@ extern perfmon_main_t perfmon_main; extern vlib_node_registration_t perfmon_periodic_node; uword *perfmon_parse_table (perfmon_main_t * pm, char *path, char *filename); +uword unformat_processor_event (unformat_input_t * input, va_list * args); + /* Periodic function events */ #define PERFMON_START 1 diff --git a/src/plugins/perfmon/perfmon_intel.h b/src/plugins/perfmon/perfmon_intel.h index 6bb849244d5..475309124ea 100644 --- a/src/plugins/perfmon/perfmon_intel.h +++ b/src/plugins/perfmon/perfmon_intel.h @@ -25,6 +25,10 @@ typedef struct { u8 event_code[2]; u8 umask; + u8 cmask; + u8 inv; + u8 anyt; + u8 edge; char *event_name; } perfmon_intel_pmc_event_t; diff --git a/src/plugins/perfmon/perfmon_intel_skl.c b/src/plugins/perfmon/perfmon_intel_skl.c index 726dbb4dd8c..b1c03140651 100644 --- a/src/plugins/perfmon/perfmon_intel_skl.c +++ b/src/plugins/perfmon/perfmon_intel_skl.c @@ -88,6 +88,7 @@ static perfmon_intel_pmc_event_t event_table[] = { { .event_code = {0x0D}, .umask = 0x01, + .anyt = 1, .event_name = "int_misc.recovery_cycles_any", }, { @@ -103,6 +104,8 @@ static perfmon_intel_pmc_event_t event_table[] = { { .event_code = {0x0E}, .umask = 0x01, + .cmask = 1, + .inv = 1, .event_name = "uops_issued.stall_cycles", }, { @@ -233,6 +236,7 @@ static perfmon_intel_pmc_event_t event_table[] = { { .event_code = {0x3C}, .umask = 0x00, + .anyt = 1, .event_name = "cpu_clk_unhalted.thread_p_any", }, { @@ -248,6 +252,7 @@ static perfmon_intel_pmc_event_t event_table[] = { { .event_code = {0x3C}, .umask = 0x01, + .anyt = 1, .event_name = "cpu_clk_thread_unhalted.ref_xclk_any", }, { @@ -268,6 +273,7 @@ static perfmon_intel_pmc_event_t event_table[] = { { .event_code = {0x48}, .umask = 0x01, + .cmask = 1, .event_name = "l1d_pend_miss.pending", }, { @@ -305,6 +311,12 @@ static perfmon_intel_pmc_event_t event_table[] = { .umask = 0x0E, .event_name = "dtlb_store_misses.walk_completed", }, + { + .event_code = {0x49}, + .umask = 0x10, + .cmask = 1, + .event_name = "dtlb_store_misses.walk_active", + }, { .event_code = {0x49}, .umask = 0x10, @@ -403,6 +415,8 @@ static perfmon_intel_pmc_event_t event_table[] = { { .event_code = {0x5E}, .umask = 0x01, + .cmask = 1, + .inv = 1, .event_name = "rs_events.empty_end", }, { @@ -413,6 +427,7 @@ static perfmon_intel_pmc_event_t event_table[] = { { .event_code = {0x60}, .umask = 0x01, + .cmask = 1, .event_name = "offcore_requests_outstanding.cycles_with_demand_data_rd", }, { @@ -423,6 +438,7 @@ static perfmon_intel_pmc_event_t event_table[] = { { .event_code = {0x60}, .umask = 0x02, + .cmask = 1, .event_name = "offcore_requests_outstanding.cycles_with_demand_code_rd", }, { @@ -433,6 +449,7 @@ static perfmon_intel_pmc_event_t event_table[] = { { .event_code = {0x60}, .umask = 0x04, + .cmask = 1, .event_name = "offcore_requests_outstanding.cycles_with_demand_rfo", }, { @@ -443,6 +460,7 @@ static perfmon_intel_pmc_event_t event_table[] = { { .event_code = {0x60}, .umask = 0x08, + .cmask = 1, .event_name = "offcore_requests_outstanding.cycles_with_data_rd", }, { @@ -458,6 +476,7 @@ static perfmon_intel_pmc_event_t event_table[] = { { .event_code = {0x79}, .umask = 0x04, + .cmask = 1, .event_name = "idq.mite_cycles", }, { @@ -468,6 +487,7 @@ static perfmon_intel_pmc_event_t event_table[] = { { .event_code = {0x79}, .umask = 0x08, + .cmask = 1, .event_name = "idq.dsb_cycles", }, { @@ -478,11 +498,13 @@ static perfmon_intel_pmc_event_t event_table[] = { { .event_code = {0x79}, .umask = 0x18, + .cmask = 4, .event_name = "idq.all_dsb_cycles_4_uops", }, { .event_code = {0x79}, .umask = 0x18, + .cmask = 1, .event_name = "idq.all_dsb_cycles_any_uops", }, { @@ -503,11 +525,13 @@ static perfmon_intel_pmc_event_t event_table[] = { { .event_code = {0x79}, .umask = 0x30, + .cmask = 1, .event_name = "idq.ms_cycles", }, { .event_code = {0x79}, .umask = 0x30, + .edge = 1, .event_name = "idq.ms_switches", }, { @@ -588,26 +612,32 @@ static perfmon_intel_pmc_event_t event_table[] = { { .event_code = {0x9C}, .umask = 0x01, + .cmask = 4, .event_name = "idq_uops_not_delivered.cycles_0_uops_deliv.core", }, { .event_code = {0x9C}, .umask = 0x01, + .cmask = 3, .event_name = "idq_uops_not_delivered.cycles_le_1_uop_deliv.core", }, { .event_code = {0x9C}, .umask = 0x01, + .cmask = 4, .event_name = "idq_uops_not_delivered.cycles_le_2_uop_deliv.core", }, { .event_code = {0x9C}, .umask = 0x01, + .cmask = 1, .event_name = "idq_uops_not_delivered.cycles_le_3_uop_deliv.core", }, { .event_code = {0x9C}, .umask = 0x01, + .cmask = 1, + .inv = 1, .event_name = "idq_uops_not_delivered.cycles_fe_was_ok", }, { @@ -663,36 +693,43 @@ static perfmon_intel_pmc_event_t event_table[] = { { .event_code = {0xA3}, .umask = 0x01, + .cmask = 1, .event_name = "cycle_activity.cycles_l2_miss", }, { .event_code = {0xA3}, .umask = 0x04, + .cmask = 4, .event_name = "cycle_activity.stalls_total", }, { .event_code = {0xA3}, .umask = 0x05, + .cmask = 5, .event_name = "cycle_activity.stalls_l2_miss", }, { .event_code = {0xA3}, .umask = 0x08, + .cmask = 8, .event_name = "cycle_activity.cycles_l1d_miss", }, { .event_code = {0xA3}, .umask = 0x0C, + .cmask = 12, .event_name = "cycle_activity.stalls_l1d_miss", }, { .event_code = {0xA3}, .umask = 0x10, + .cmask = 16, .event_name = "cycle_activity.cycles_mem_any", }, { .event_code = {0xA3}, .umask = 0x14, + .cmask = 20, .event_name = "cycle_activity.stalls_mem_any", }, { @@ -733,11 +770,13 @@ static perfmon_intel_pmc_event_t event_table[] = { { .event_code = {0xA8}, .umask = 0x01, + .cmask = 1, .event_name = "lsd.cycles_active", }, { .event_code = {0xA8}, .umask = 0x01, + .cmask = 4, .event_name = "lsd.cycles_4_uops", }, { @@ -788,26 +827,32 @@ static perfmon_intel_pmc_event_t event_table[] = { { .event_code = {0xB1}, .umask = 0x01, + .cmask = 1, + .inv = 1, .event_name = "uops_executed.stall_cycles", }, { .event_code = {0xB1}, .umask = 0x01, + .cmask = 1, .event_name = "uops_executed.cycles_ge_1_uop_exec", }, { .event_code = {0xB1}, .umask = 0x01, + .cmask = 2, .event_name = "uops_executed.cycles_ge_2_uops_exec", }, { .event_code = {0xB1}, .umask = 0x01, + .cmask = 3, .event_name = "uops_executed.cycles_ge_3_uops_exec", }, { .event_code = {0xB1}, .umask = 0x01, + .cmask = 4, .event_name = "uops_executed.cycles_ge_4_uops_exec", }, { @@ -818,26 +863,32 @@ static perfmon_intel_pmc_event_t event_table[] = { { .event_code = {0xB1}, .umask = 0x02, + .cmask = 1, .event_name = "uops_executed.core_cycles_ge_1", }, { .event_code = {0xB1}, .umask = 0x02, + .cmask = 2, .event_name = "uops_executed.core_cycles_ge_2", }, { .event_code = {0xB1}, .umask = 0x02, + .cmask = 3, .event_name = "uops_executed.core_cycles_ge_3", }, { .event_code = {0xB1}, .umask = 0x02, + .cmask = 4, .event_name = "uops_executed.core_cycles_ge_4", }, { .event_code = {0xB1}, .umask = 0x02, + .cmask = 1, + .inv = 1, .event_name = "uops_executed.core_cycles_none", }, { @@ -873,6 +924,7 @@ static perfmon_intel_pmc_event_t event_table[] = { { .event_code = {0xC0}, .umask = 0x01, + .cmask = 10, .event_name = "inst_retired.total_cycles_ps", }, { @@ -883,16 +935,22 @@ static perfmon_intel_pmc_event_t event_table[] = { { .event_code = {0xC2}, .umask = 0x02, + .cmask = 1, + .inv = 1, .event_name = "uops_retired.stall_cycles", }, { .event_code = {0xC2}, .umask = 0x02, + .cmask = 10, + .inv = 1, .event_name = "uops_retired.total_cycles", }, { .event_code = {0xC3}, .umask = 0x01, + .cmask = 1, + .edge = 1, .event_name = "machine_clears.count", }, { @@ -1083,6 +1141,7 @@ static perfmon_intel_pmc_event_t event_table[] = { { .event_code = {0xCA}, .umask = 0x1E, + .cmask = 1, .event_name = "fp_assist.any", }, { diff --git a/src/plugins/perfmon/perfmon_intel_skx.c b/src/plugins/perfmon/perfmon_intel_skx.c index 399174477ac..9de202d22a3 100644 --- a/src/plugins/perfmon/perfmon_intel_skx.c +++ b/src/plugins/perfmon/perfmon_intel_skx.c @@ -88,6 +88,7 @@ static perfmon_intel_pmc_event_t event_table[] = { { .event_code = {0x0D}, .umask = 0x01, + .anyt = 1, .event_name = "int_misc.recovery_cycles_any", }, { @@ -98,6 +99,8 @@ static perfmon_intel_pmc_event_t event_table[] = { { .event_code = {0x0E}, .umask = 0x01, + .cmask = 1, + .inv = 1, .event_name = "uops_issued.stall_cycles", }, { @@ -253,6 +256,7 @@ static perfmon_intel_pmc_event_t event_table[] = { { .event_code = {0x3C}, .umask = 0x00, + .anyt = 1, .event_name = "cpu_clk_unhalted.thread_p_any", }, { @@ -268,6 +272,7 @@ static perfmon_intel_pmc_event_t event_table[] = { { .event_code = {0x3C}, .umask = 0x01, + .anyt = 1, .event_name = "cpu_clk_thread_unhalted.ref_xclk_any", }, { @@ -288,6 +293,7 @@ static perfmon_intel_pmc_event_t event_table[] = { { .event_code = {0x48}, .umask = 0x01, + .cmask = 1, .event_name = "l1d_pend_miss.pending_cycles", }, { @@ -325,6 +331,12 @@ static perfmon_intel_pmc_event_t event_table[] = { .umask = 0x0E, .event_name = "dtlb_store_misses.walk_completed", }, + { + .event_code = {0x49}, + .umask = 0x10, + .cmask = 1, + .event_name = "dtlb_store_misses.walk_active", + }, { .event_code = {0x49}, .umask = 0x10, @@ -418,6 +430,8 @@ static perfmon_intel_pmc_event_t event_table[] = { { .event_code = {0x5E}, .umask = 0x01, + .cmask = 1, + .inv = 1, .event_name = "rs_events.empty_end", }, { @@ -428,6 +442,7 @@ static perfmon_intel_pmc_event_t event_table[] = { { .event_code = {0x60}, .umask = 0x01, + .cmask = 1, .event_name = "offcore_requests_outstanding.cycles_with_demand_data_rd", }, { @@ -443,6 +458,7 @@ static perfmon_intel_pmc_event_t event_table[] = { { .event_code = {0x60}, .umask = 0x02, + .cmask = 1, .event_name = "offcore_requests_outstanding.cycles_with_demand_code_rd", }, { @@ -453,11 +469,13 @@ static perfmon_intel_pmc_event_t event_table[] = { { .event_code = {0x60}, .umask = 0x04, + .cmask = 1, .event_name = "offcore_requests_outstanding.cycles_with_demand_rfo", }, { .event_code = {0x60}, .umask = 0x08, + .cmask = 1, .event_name = "offcore_requests_outstanding.cycles_with_data_rd", }, { @@ -473,6 +491,7 @@ static perfmon_intel_pmc_event_t event_table[] = { { .event_code = {0x79}, .umask = 0x04, + .cmask = 1, .event_name = "idq.mite_cycles", }, { @@ -483,6 +502,7 @@ static perfmon_intel_pmc_event_t event_table[] = { { .event_code = {0x79}, .umask = 0x08, + .cmask = 1, .event_name = "idq.dsb_cycles", }, { @@ -498,11 +518,13 @@ static perfmon_intel_pmc_event_t event_table[] = { { .event_code = {0x79}, .umask = 0x18, + .cmask = 1, .event_name = "idq.all_dsb_cycles_any_uops", }, { .event_code = {0x79}, .umask = 0x18, + .cmask = 4, .event_name = "idq.all_dsb_cycles_4_uops", }, { @@ -523,6 +545,7 @@ static perfmon_intel_pmc_event_t event_table[] = { { .event_code = {0x79}, .umask = 0x30, + .cmask = 1, .event_name = "idq.ms_cycles", }, { @@ -533,6 +556,7 @@ static perfmon_intel_pmc_event_t event_table[] = { { .event_code = {0x79}, .umask = 0x30, + .edge = 1, .event_name = "idq.ms_switches", }, { @@ -603,26 +627,32 @@ static perfmon_intel_pmc_event_t event_table[] = { { .event_code = {0x9C}, .umask = 0x01, + .cmask = 1, + .inv = 1, .event_name = "idq_uops_not_delivered.cycles_fe_was_ok", }, { .event_code = {0x9C}, .umask = 0x01, + .cmask = 1, .event_name = "idq_uops_not_delivered.cycles_le_3_uop_deliv.core", }, { .event_code = {0x9C}, .umask = 0x01, + .cmask = 2, .event_name = "idq_uops_not_delivered.cycles_le_2_uop_deliv.core", }, { .event_code = {0x9C}, .umask = 0x01, + .cmask = 3, .event_name = "idq_uops_not_delivered.cycles_le_1_uop_deliv.core", }, { .event_code = {0x9C}, .umask = 0x01, + .cmask = 4, .event_name = "idq_uops_not_delivered.cycles_0_uops_deliv.core", }, { @@ -683,36 +713,43 @@ static perfmon_intel_pmc_event_t event_table[] = { { .event_code = {0xA3}, .umask = 0x01, + .cmask = 1, .event_name = "cycle_activity.cycles_l2_miss", }, { .event_code = {0xA3}, .umask = 0x04, + .cmask = 4, .event_name = "cycle_activity.stalls_total", }, { .event_code = {0xA3}, .umask = 0x05, + .cmask = 5, .event_name = "cycle_activity.stalls_l2_miss", }, { .event_code = {0xA3}, .umask = 0x08, + .cmask = 8, .event_name = "cycle_activity.cycles_l1d_miss", }, { .event_code = {0xA3}, .umask = 0x0C, + .cmask = 12, .event_name = "cycle_activity.stalls_l1d_miss", }, { .event_code = {0xA3}, .umask = 0x10, + .cmask = 16, .event_name = "cycle_activity.cycles_mem_any", }, { .event_code = {0xA3}, .umask = 0x14, + .cmask = 20, .event_name = "cycle_activity.stalls_mem_any", }, { @@ -753,11 +790,13 @@ static perfmon_intel_pmc_event_t event_table[] = { { .event_code = {0xA8}, .umask = 0x01, + .cmask = 4, .event_name = "lsd.cycles_4_uops", }, { .event_code = {0xA8}, .umask = 0x01, + .cmask = 1, .event_name = "lsd.cycles_active", }, { @@ -803,26 +842,32 @@ static perfmon_intel_pmc_event_t event_table[] = { { .event_code = {0xB1}, .umask = 0x01, + .cmask = 4, .event_name = "uops_executed.cycles_ge_4_uops_exec", }, { .event_code = {0xB1}, .umask = 0x01, + .cmask = 3, .event_name = "uops_executed.cycles_ge_3_uops_exec", }, { .event_code = {0xB1}, .umask = 0x01, + .cmask = 2, .event_name = "uops_executed.cycles_ge_2_uops_exec", }, { .event_code = {0xB1}, .umask = 0x01, + .cmask = 1, .event_name = "uops_executed.cycles_ge_1_uop_exec", }, { .event_code = {0xB1}, .umask = 0x01, + .cmask = 1, + .inv = 1, .event_name = "uops_executed.stall_cycles", }, { @@ -838,26 +883,32 @@ static perfmon_intel_pmc_event_t event_table[] = { { .event_code = {0xB1}, .umask = 0x02, + .cmask = 1, + .inv = 1, .event_name = "uops_executed.core_cycles_none", }, { .event_code = {0xB1}, .umask = 0x02, + .cmask = 4, .event_name = "uops_executed.core_cycles_ge_4", }, { .event_code = {0xB1}, .umask = 0x02, + .cmask = 3, .event_name = "uops_executed.core_cycles_ge_3", }, { .event_code = {0xB1}, .umask = 0x02, + .cmask = 2, .event_name = "uops_executed.core_cycles_ge_2", }, { .event_code = {0xB1}, .umask = 0x02, + .cmask = 1, .event_name = "uops_executed.core_cycles_ge_1", }, { @@ -898,16 +949,21 @@ static perfmon_intel_pmc_event_t event_table[] = { { .event_code = {0xC0}, .umask = 0x01, + .cmask = 10, .event_name = "inst_retired.total_cycles_ps", }, { .event_code = {0xC2}, .umask = 0x02, + .cmask = 10, + .inv = 1, .event_name = "uops_retired.total_cycles", }, { .event_code = {0xC2}, .umask = 0x02, + .cmask = 1, + .inv = 1, .event_name = "uops_retired.stall_cycles", }, { @@ -918,6 +974,8 @@ static perfmon_intel_pmc_event_t event_table[] = { { .event_code = {0xC3}, .umask = 0x01, + .cmask = 1, + .edge = 1, .event_name = "machine_clears.count", }, { @@ -1118,6 +1176,7 @@ static perfmon_intel_pmc_event_t event_table[] = { { .event_code = {0xCA}, .umask = 0x1E, + .cmask = 1, .event_name = "fp_assist.any", }, { diff --git a/src/plugins/perfmon/perfmon_periodic.c b/src/plugins/perfmon/perfmon_periodic.c index 37d669b8d13..de31221f6f4 100644 --- a/src/plugins/perfmon/perfmon_periodic.c +++ b/src/plugins/perfmon/perfmon_periodic.c @@ -33,52 +33,65 @@ perf_event_open (struct perf_event_attr *hw_event, pid_t pid, int cpu, } static void -read_current_perf_counters (vlib_main_t * vm, u64 * c0, u64 * c1, - vlib_node_runtime_t * node, - vlib_frame_t * frame, int before_or_after) +read_current_perf_counters (vlib_node_runtime_perf_callback_data_t * data, + vlib_node_runtime_perf_callback_args_t * args) { int i; - u64 *cc; perfmon_main_t *pm = &perfmon_main; - uword my_thread_index = vm->thread_index; + perfmon_thread_t *pt = data->u[0].v; + u64 c[2] = { 0, 0 }; + u64 *cc; - *c0 = *c1 = 0; + if (PREDICT_FALSE (args->call_type == VLIB_NODE_RUNTIME_PERF_RESET)) + return; + + if (args->call_type == VLIB_NODE_RUNTIME_PERF_BEFORE) + cc = pt->c; + else + cc = c; for (i = 0; i < pm->n_active; i++) { - cc = (i == 0) ? c0 : c1; - if (pm->rdpmc_indices[i][my_thread_index] != ~0) - *cc = clib_rdpmc ((int) pm->rdpmc_indices[i][my_thread_index]); + if (pt->rdpmc_indices[i] != ~0) + cc[i] = clib_rdpmc ((int) pt->rdpmc_indices[i]); else { u64 sw_value; int read_result; - if ((read_result = read (pm->pm_fds[i][my_thread_index], &sw_value, - sizeof (sw_value)) != sizeof (sw_value))) + if ((read_result = read (pt->pm_fds[i], &sw_value, + sizeof (sw_value))) != sizeof (sw_value)) { clib_unix_warning ("counter read returned %d, expected %d", read_result, sizeof (sw_value)); - clib_callback_enable_disable - (vm->vlib_node_runtime_perf_counter_cbs, - vm->vlib_node_runtime_perf_counter_cb_tmp, - vm->worker_thread_main_loop_callback_lock, + clib_callback_data_enable_disable + (&args->vm->vlib_node_runtime_perf_callbacks, read_current_perf_counters, 0 /* enable */ ); return; } - *cc = sw_value; + cc[i] = sw_value; } } + + if (args->call_type == VLIB_NODE_RUNTIME_PERF_AFTER) + { + u32 node_index = args->node->node_index; + vec_validate (pt->counters, node_index); + pt->counters[node_index].ticks[0] += c[0] - pt->c[0]; + pt->counters[node_index].ticks[1] += c[1] - pt->c[1]; + pt->counters[node_index].vectors += args->packets; + } } static void clear_counters (perfmon_main_t * pm) { - int i, j; + int j; vlib_main_t *vm = pm->vlib_main; vlib_main_t *stat_vm; - vlib_node_main_t *nm; - vlib_node_t *n; + perfmon_thread_t *pt; + u32 len; + vlib_worker_thread_barrier_sync (vm); @@ -88,26 +101,12 @@ clear_counters (perfmon_main_t * pm) if (stat_vm == 0) continue; - nm = &stat_vm->node_main; - - /* Clear the node runtime perfmon counters */ - for (i = 0; i < vec_len (nm->nodes); i++) - { - n = nm->nodes[i]; - vlib_node_sync_stats (stat_vm, n); - } + pt = pm->threads[j]; + len = vec_len (pt->counters); + if (!len) + continue; - /* And clear the node perfmon counters */ - for (i = 0; i < vec_len (nm->nodes); i++) - { - n = nm->nodes[i]; - n->stats_total.perf_counter0_ticks = 0; - n->stats_total.perf_counter1_ticks = 0; - n->stats_total.perf_counter_vectors = 0; - n->stats_last_clear.perf_counter0_ticks = 0; - n->stats_last_clear.perf_counter1_ticks = 0; - n->stats_last_clear.perf_counter_vectors = 0; - } + clib_memset (pt->counters, 0, len * sizeof (pt->counters[0])); } vlib_worker_thread_barrier_release (vm); } @@ -121,19 +120,20 @@ enable_current_events (perfmon_main_t * pm) perfmon_event_config_t *c; vlib_main_t *vm = vlib_get_main (); u32 my_thread_index = vm->thread_index; + perfmon_thread_t *pt = pm->threads[my_thread_index]; u32 index; int i, limit = 1; int cpu; + vlib_node_runtime_perf_callback_data_t cbdata = { 0 }; + cbdata.fp = read_current_perf_counters; + cbdata.u[0].v = pt; + cbdata.u[1].v = vm; if ((pm->current_event + 1) < vec_len (pm->single_events_to_collect)) limit = 2; for (i = 0; i < limit; i++) { - vec_validate (pm->pm_fds[i], vec_len (vlib_mains) - 1); - vec_validate (pm->perf_event_pages[i], vec_len (vlib_mains) - 1); - vec_validate (pm->rdpmc_indices[i], vec_len (vlib_mains) - 1); - c = vec_elt_at_index (pm->single_events_to_collect, pm->current_event + i); @@ -184,8 +184,8 @@ enable_current_events (perfmon_main_t * pm) if (ioctl (fd, PERF_EVENT_IOC_ENABLE, 0) < 0) clib_unix_warning ("enable ioctl"); - pm->perf_event_pages[i][my_thread_index] = (void *) p; - pm->pm_fds[i][my_thread_index] = fd; + pt->perf_event_pages[i] = (void *) p; + pt->pm_fds[i] = fd; } /* @@ -194,9 +194,7 @@ enable_current_events (perfmon_main_t * pm) */ for (i = 0; i < limit; i++) { - p = - (struct perf_event_mmap_page *) - pm->perf_event_pages[i][my_thread_index]; + p = (struct perf_event_mmap_page *) pt->perf_event_pages[i]; /* * Software event counters - and others not capable of being @@ -208,16 +206,12 @@ enable_current_events (perfmon_main_t * pm) else index = p->index - 1; - pm->rdpmc_indices[i][my_thread_index] = index; + pt->rdpmc_indices[i] = index; } pm->n_active = i; /* Enable the main loop counter snapshot mechanism */ - clib_callback_enable_disable - (vm->vlib_node_runtime_perf_counter_cbs, - vm->vlib_node_runtime_perf_counter_cb_tmp, - vm->worker_thread_main_loop_callback_lock, - read_current_perf_counters, 1 /* enable */ ); + clib_callback_data_add (&vm->vlib_node_runtime_perf_callbacks, cbdata); } static void @@ -225,35 +219,30 @@ disable_events (perfmon_main_t * pm) { vlib_main_t *vm = vlib_get_main (); u32 my_thread_index = vm->thread_index; + perfmon_thread_t *pt = pm->threads[my_thread_index]; int i; /* Stop main loop collection */ - clib_callback_enable_disable - (vm->vlib_node_runtime_perf_counter_cbs, - vm->vlib_node_runtime_perf_counter_cb_tmp, - vm->worker_thread_main_loop_callback_lock, - read_current_perf_counters, 0 /* enable */ ); + clib_callback_data_remove (&vm->vlib_node_runtime_perf_callbacks, + read_current_perf_counters); for (i = 0; i < pm->n_active; i++) { - if (pm->pm_fds[i][my_thread_index] == 0) + if (pt->pm_fds[i] == 0) continue; - if (ioctl (pm->pm_fds[i][my_thread_index], PERF_EVENT_IOC_DISABLE, 0) < - 0) + if (ioctl (pt->pm_fds[i], PERF_EVENT_IOC_DISABLE, 0) < 0) clib_unix_warning ("disable ioctl"); - if (pm->perf_event_pages[i][my_thread_index]) + if (pt->perf_event_pages[i]) { - if (munmap (pm->perf_event_pages[i][my_thread_index], - pm->page_size) < 0) + if (munmap (pt->perf_event_pages[i], pm->page_size) < 0) clib_unix_warning ("munmap"); - pm->perf_event_pages[i][my_thread_index] = 0; + pt->perf_event_pages[i] = 0; } - (void) close (pm->pm_fds[i][my_thread_index]); - pm->pm_fds[i][my_thread_index] = 0; - + (void) close (pt->pm_fds[i]); + pt->pm_fds[i] = 0; } } @@ -265,7 +254,7 @@ worker_thread_start_event (vlib_main_t * vm) clib_callback_enable_disable (vm->worker_thread_main_loop_callbacks, vm->worker_thread_main_loop_callback_tmp, vm->worker_thread_main_loop_callback_lock, - worker_thread_start_event, 0 /* enable */ ); + worker_thread_start_event, 0 /* disable */ ); enable_current_events (pm); } @@ -276,7 +265,7 @@ worker_thread_stop_event (vlib_main_t * vm) clib_callback_enable_disable (vm->worker_thread_main_loop_callbacks, vm->worker_thread_main_loop_callback_tmp, vm->worker_thread_main_loop_callback_lock, - worker_thread_stop_event, 0 /* enable */ ); + worker_thread_stop_event, 0 /* disable */ ); disable_events (pm); } @@ -329,14 +318,15 @@ scrape_and_clear_counters (perfmon_main_t * pm) vlib_main_t *vm = pm->vlib_main; vlib_main_t *stat_vm; vlib_node_main_t *nm; - vlib_node_t ***node_dups = 0; - vlib_node_t **nodes; - vlib_node_t *n; + perfmon_counters_t *ctr; + perfmon_counters_t *ctrs; + perfmon_counters_t **ctr_dups = 0; + perfmon_thread_t *pt; perfmon_capture_t *c; perfmon_event_config_t *current_event; uword *p; u8 *counter_name; - u64 vectors_this_counter; + u32 len; /* snapshoot the nodes, including pm counters */ vlib_worker_thread_barrier_sync (vm); @@ -347,31 +337,16 @@ scrape_and_clear_counters (perfmon_main_t * pm) if (stat_vm == 0) continue; - nm = &stat_vm->node_main; - - for (i = 0; i < vec_len (nm->nodes); i++) - { - n = nm->nodes[i]; - vlib_node_sync_stats (stat_vm, n); - } - - nodes = 0; - vec_validate (nodes, vec_len (nm->nodes) - 1); - vec_add1 (node_dups, nodes); - - /* Snapshoot and clear the per-node perfmon counters */ - for (i = 0; i < vec_len (nm->nodes); i++) + pt = pm->threads[j]; + len = vec_len (pt->counters); + ctrs = 0; + if (len) { - n = nm->nodes[i]; - nodes[i] = clib_mem_alloc (sizeof (*n)); - clib_memcpy_fast (nodes[i], n, sizeof (*n)); - n->stats_total.perf_counter0_ticks = 0; - n->stats_total.perf_counter1_ticks = 0; - n->stats_total.perf_counter_vectors = 0; - n->stats_last_clear.perf_counter0_ticks = 0; - n->stats_last_clear.perf_counter1_ticks = 0; - n->stats_last_clear.perf_counter_vectors = 0; + vec_validate (ctrs, len - 1); + clib_memcpy (ctrs, pt->counters, len * sizeof (pt->counters[0])); + clib_memset (pt->counters, 0, len * sizeof (pt->counters[0])); } + vec_add1 (ctr_dups, ctrs); } vlib_worker_thread_barrier_release (vm); @@ -382,22 +357,21 @@ scrape_and_clear_counters (perfmon_main_t * pm) if (stat_vm == 0) continue; - nodes = node_dups[j]; + pt = pm->threads[j]; + ctrs = ctr_dups[j]; - for (i = 0; i < vec_len (nodes); i++) + for (i = 0; i < vec_len (ctrs); i++) { u8 *capture_name; - n = nodes[i]; + ctr = &ctrs[i]; + nm = &stat_vm->node_main; - if (n->stats_total.perf_counter0_ticks == 0 && - n->stats_total.perf_counter1_ticks == 0) - goto skip_this_node; + if (ctr->ticks[0] == 0 && ctr->ticks[1] == 0) + continue; for (k = 0; k < 2; k++) { - u64 counter_value, counter_last_clear; - /* * We collect 2 counters at once, except for the * last counter when the user asks for an odd number of @@ -407,20 +381,7 @@ scrape_and_clear_counters (perfmon_main_t * pm) >= vec_len (pm->single_events_to_collect)) break; - if (k == 0) - { - counter_value = n->stats_total.perf_counter0_ticks; - counter_last_clear = - n->stats_last_clear.perf_counter0_ticks; - } - else - { - counter_value = n->stats_total.perf_counter1_ticks; - counter_last_clear = - n->stats_last_clear.perf_counter1_ticks; - } - - capture_name = format (0, "t%d-%v%c", j, n->name, 0); + capture_name = format (0, "t%d-%v%c", j, nm->nodes[i]->name, 0); p = hash_get_mem (pm->capture_by_thread_and_node_name, capture_name); @@ -443,20 +404,15 @@ scrape_and_clear_counters (perfmon_main_t * pm) current_event = pm->single_events_to_collect + pm->current_event + k; counter_name = (u8 *) current_event->name; - vectors_this_counter = n->stats_total.perf_counter_vectors - - n->stats_last_clear.perf_counter_vectors; vec_add1 (c->counter_names, counter_name); - vec_add1 (c->counter_values, - counter_value - counter_last_clear); - vec_add1 (c->vectors_this_counter, vectors_this_counter); + vec_add1 (c->counter_values, ctr->ticks[k]); + vec_add1 (c->vectors_this_counter, ctr->vectors); } - skip_this_node: - clib_mem_free (n); } - vec_free (nodes); + vec_free (ctrs); } - vec_free (node_dups); + vec_free (ctr_dups); } static void @@ -492,9 +448,8 @@ handle_timeout (vlib_main_t * vm, perfmon_main_t * pm, f64 now) for (i = 1; i < vec_len (vlib_mains); i++) { /* Has the worker actually stopped collecting data? */ - while (clib_callback_is_set - (vlib_mains[i]->worker_thread_main_loop_callbacks, - vlib_mains[i]->worker_thread_main_loop_callback_lock, + while (clib_callback_data_is_set + (&vm->vlib_node_runtime_perf_callbacks, read_current_perf_counters)) { if (vlib_time_now (vm) > deadman) @@ -528,7 +483,7 @@ handle_timeout (vlib_main_t * vm, perfmon_main_t * pm, f64 now) (vlib_mains[i]->worker_thread_main_loop_callbacks, vlib_mains[i]->worker_thread_main_loop_callback_tmp, vlib_mains[i]->worker_thread_main_loop_callback_lock, - worker_thread_start_event, 1 /* enable */ ); + worker_thread_start_event, 0 /* disable */ ); } } diff --git a/src/plugins/perfmon/perfmon_plugin.c b/src/plugins/perfmon/perfmon_plugin.c new file mode 100644 index 00000000000..1d56573abd5 --- /dev/null +++ b/src/plugins/perfmon/perfmon_plugin.c @@ -0,0 +1,38 @@ +/* + * perfmon_plugin.c - perf monitor plugin + * + * Copyright (c) + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +/* *INDENT-OFF* */ +VLIB_PLUGIN_REGISTER () = +{ + .version = VPP_BUILD_VER, + .description = "Performance Monitor", +#if !defined(__x86_64__) + .default_disabled = 1, +#endif +}; +/* *INDENT-ON* */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vlib/cli.c b/src/vlib/cli.c index d14ea683fb9..2697c0ae083 100644 --- a/src/vlib/cli.c +++ b/src/vlib/cli.c @@ -39,6 +39,7 @@ #include #include +#include #include #include #include @@ -563,10 +564,16 @@ vlib_cli_dispatch_sub_commands (vlib_main_t * vm, if (!c->is_mp_safe) vlib_worker_thread_barrier_sync (vm); + if (PREDICT_FALSE (vec_len (cm->perf_counter_cbs) != 0)) + clib_call_callbacks (cm->perf_counter_cbs, cm, + c - cm->commands, 0 /* before */ ); c->hit_counter++; c_error = c->function (vm, si, c); + if (PREDICT_FALSE (vec_len (cm->perf_counter_cbs) != 0)) + clib_call_callbacks (cm->perf_counter_cbs, cm, + c - cm->commands, 1 /* after */ ); if (!c->is_mp_safe) vlib_worker_thread_barrier_release (vm); diff --git a/src/vlib/cli.h b/src/vlib/cli.h index df9ed7212bc..0a8ef9d78d7 100644 --- a/src/vlib/cli.h +++ b/src/vlib/cli.h @@ -132,7 +132,7 @@ typedef struct vlib_cli_command_t typedef void (vlib_cli_output_function_t) (uword arg, u8 * buffer, uword buffer_bytes); -typedef struct +typedef struct vlib_cli_main_t { /* Vector of all known commands. */ vlib_cli_command_t *commands; @@ -146,6 +146,12 @@ typedef struct /* index vector, to sort commands, etc. */ u32 *sort_vector; + + /* performance counter callback */ + void (**perf_counter_cbs) + (struct vlib_cli_main_t *, u32 id, int before_or_after); + void (**perf_counter_cbs_tmp) + (struct vlib_cli_main_t *, u32 id, int before_or_after); } vlib_cli_main_t; #ifndef CLIB_MARCH_VARIANT diff --git a/src/vlib/init.h b/src/vlib/init.h index fc638013efc..68ac2f36717 100644 --- a/src/vlib/init.h +++ b/src/vlib/init.h @@ -317,6 +317,19 @@ static void __vlib_rm_config_function_##x (void) \ _error; \ }) +#define vlib_call_main_loop_enter_function(vm, x) \ + ({ \ + extern vlib_init_function_t * VLIB_MAIN_LOOP_ENTER_FUNCTION_SYMBOL (x); \ + vlib_init_function_t * _f = VLIB_MAIN_LOOP_ENTER_FUNCTION_SYMBOL (x); \ + clib_error_t * _error = 0; \ + if (! hash_get (vm->init_functions_called, _f)) \ + { \ + hash_set1 (vm->init_functions_called, _f); \ + _error = _f (vm); \ + } \ + _error; \ + }) + /* External functions. */ clib_error_t *vlib_call_all_init_functions (struct vlib_main_t *vm); clib_error_t *vlib_call_all_config_functions (struct vlib_main_t *vm, diff --git a/src/vlib/main.c b/src/vlib/main.c index 8d7c6c09275..cb651e43a75 100644 --- a/src/vlib/main.c +++ b/src/vlib/main.c @@ -568,41 +568,29 @@ vlib_put_next_frame (vlib_main_t * vm, never_inline void vlib_node_runtime_sync_stats (vlib_main_t * vm, vlib_node_runtime_t * r, - uword n_calls, uword n_vectors, uword n_clocks, - uword n_ticks0, uword n_ticks1) + uword n_calls, uword n_vectors, uword n_clocks) { vlib_node_t *n = vlib_get_node (vm, r->node_index); n->stats_total.calls += n_calls + r->calls_since_last_overflow; n->stats_total.vectors += n_vectors + r->vectors_since_last_overflow; n->stats_total.clocks += n_clocks + r->clocks_since_last_overflow; - n->stats_total.perf_counter0_ticks += n_ticks0 + - r->perf_counter0_ticks_since_last_overflow; - n->stats_total.perf_counter1_ticks += n_ticks1 + - r->perf_counter1_ticks_since_last_overflow; - n->stats_total.perf_counter_vectors += n_vectors + - r->perf_counter_vectors_since_last_overflow; n->stats_total.max_clock = r->max_clock; n->stats_total.max_clock_n = r->max_clock_n; r->calls_since_last_overflow = 0; r->vectors_since_last_overflow = 0; r->clocks_since_last_overflow = 0; - r->perf_counter0_ticks_since_last_overflow = 0ULL; - r->perf_counter1_ticks_since_last_overflow = 0ULL; - r->perf_counter_vectors_since_last_overflow = 0ULL; } always_inline void __attribute__ ((unused)) vlib_process_sync_stats (vlib_main_t * vm, vlib_process_t * p, - uword n_calls, uword n_vectors, uword n_clocks, - uword n_ticks0, uword n_ticks1) + uword n_calls, uword n_vectors, uword n_clocks) { vlib_node_runtime_t *rt = &p->node_runtime; vlib_node_t *n = vlib_get_node (vm, rt->node_index); - vlib_node_runtime_sync_stats (vm, rt, n_calls, n_vectors, n_clocks, - n_ticks0, n_ticks1); + vlib_node_runtime_sync_stats (vm, rt, n_calls, n_vectors, n_clocks); n->stats_total.suspends += p->n_suspends; p->n_suspends = 0; } @@ -628,7 +616,7 @@ vlib_node_sync_stats (vlib_main_t * vm, vlib_node_t * n) vec_elt_at_index (vm->node_main.nodes_by_type[n->type], n->runtime_index); - vlib_node_runtime_sync_stats (vm, rt, 0, 0, 0, 0, 0); + vlib_node_runtime_sync_stats (vm, rt, 0, 0, 0); /* Sync up runtime next frame vector counters with main node structure. */ { @@ -648,32 +636,21 @@ always_inline u32 vlib_node_runtime_update_stats (vlib_main_t * vm, vlib_node_runtime_t * node, uword n_calls, - uword n_vectors, uword n_clocks, - uword n_ticks0, uword n_ticks1) + uword n_vectors, uword n_clocks) { u32 ca0, ca1, v0, v1, cl0, cl1, r; - u32 ptick00, ptick01, ptick10, ptick11, pvec0, pvec1; cl0 = cl1 = node->clocks_since_last_overflow; ca0 = ca1 = node->calls_since_last_overflow; v0 = v1 = node->vectors_since_last_overflow; - ptick00 = ptick01 = node->perf_counter0_ticks_since_last_overflow; - ptick10 = ptick11 = node->perf_counter1_ticks_since_last_overflow; - pvec0 = pvec1 = node->perf_counter_vectors_since_last_overflow; ca1 = ca0 + n_calls; v1 = v0 + n_vectors; cl1 = cl0 + n_clocks; - ptick01 = ptick00 + n_ticks0; - ptick11 = ptick10 + n_ticks1; - pvec1 = pvec0 + n_vectors; node->calls_since_last_overflow = ca1; node->clocks_since_last_overflow = cl1; node->vectors_since_last_overflow = v1; - node->perf_counter0_ticks_since_last_overflow = ptick01; - node->perf_counter1_ticks_since_last_overflow = ptick11; - node->perf_counter_vectors_since_last_overflow = pvec1; node->max_clock_n = node->max_clock > n_clocks ? node->max_clock_n : n_vectors; @@ -681,42 +658,25 @@ vlib_node_runtime_update_stats (vlib_main_t * vm, r = vlib_node_runtime_update_main_loop_vector_stats (vm, node, n_vectors); - if (PREDICT_FALSE (ca1 < ca0 || v1 < v0 || cl1 < cl0) || (ptick01 < ptick00) - || (ptick11 < ptick10) || (pvec1 < pvec0)) + if (PREDICT_FALSE (ca1 < ca0 || v1 < v0 || cl1 < cl0)) { node->calls_since_last_overflow = ca0; node->clocks_since_last_overflow = cl0; node->vectors_since_last_overflow = v0; - node->perf_counter0_ticks_since_last_overflow = ptick00; - node->perf_counter1_ticks_since_last_overflow = ptick10; - node->perf_counter_vectors_since_last_overflow = pvec0; - vlib_node_runtime_sync_stats (vm, node, n_calls, n_vectors, n_clocks, - n_ticks0, n_ticks1); + vlib_node_runtime_sync_stats (vm, node, n_calls, n_vectors, n_clocks); } return r; } -always_inline void -vlib_node_runtime_perf_counter (vlib_main_t * vm, u64 * pmc0, u64 * pmc1, - vlib_node_runtime_t * node, - vlib_frame_t * frame, int before_or_after) -{ - *pmc0 = 0; - *pmc1 = 0; - if (PREDICT_FALSE (vec_len (vm->vlib_node_runtime_perf_counter_cbs) != 0)) - clib_call_callbacks (vm->vlib_node_runtime_perf_counter_cbs, vm, pmc0, - pmc1, node, frame, before_or_after); -} - always_inline void vlib_process_update_stats (vlib_main_t * vm, vlib_process_t * p, uword n_calls, uword n_vectors, uword n_clocks) { vlib_node_runtime_update_stats (vm, &p->node_runtime, - n_calls, n_vectors, n_clocks, 0ULL, 0ULL); + n_calls, n_vectors, n_clocks); } static clib_error_t * @@ -1166,7 +1126,6 @@ dispatch_node (vlib_main_t * vm, u64 t; vlib_node_main_t *nm = &vm->node_main; vlib_next_frame_t *nf; - u64 pmc_before[2], pmc_after[2], pmc_delta[2]; if (CLIB_DEBUG > 0) { @@ -1206,8 +1165,8 @@ dispatch_node (vlib_main_t * vm, last_time_stamp, frame ? frame->n_vectors : 0, /* is_after */ 0); - vlib_node_runtime_perf_counter (vm, &pmc_before[0], &pmc_before[1], - node, frame, 0 /* before */ ); + vlib_node_runtime_perf_counter (vm, node, frame, 0, last_time_stamp, + VLIB_NODE_RUNTIME_PERF_BEFORE); /* * Turn this on if you run into @@ -1237,15 +1196,8 @@ dispatch_node (vlib_main_t * vm, t = clib_cpu_time_now (); - /* - * To validate accounting: pmc_delta = t - pmc_before; - * perf ticks should equal clocks/pkt... - */ - vlib_node_runtime_perf_counter (vm, &pmc_after[0], &pmc_after[1], node, - frame, 1 /* after */ ); - - pmc_delta[0] = pmc_after[0] - pmc_before[0]; - pmc_delta[1] = pmc_after[1] - pmc_before[1]; + vlib_node_runtime_perf_counter (vm, node, frame, n, t, + VLIB_NODE_RUNTIME_PERF_AFTER); vlib_elog_main_loop_event (vm, node->node_index, t, n, 1 /* is_after */ ); @@ -1255,9 +1207,7 @@ dispatch_node (vlib_main_t * vm, v = vlib_node_runtime_update_stats (vm, node, /* n_calls */ 1, /* n_vectors */ n, - /* n_clocks */ t - last_time_stamp, - pmc_delta[0] /* PMC0 */ , - pmc_delta[1] /* PMC1 */ ); + /* n_clocks */ t - last_time_stamp); /* When in interrupt mode and vector rate crosses threshold switch to polling mode. */ @@ -1579,6 +1529,9 @@ dispatch_process (vlib_main_t * vm, old_process_index = nm->current_process_index; nm->current_process_index = node->runtime_index; + vlib_node_runtime_perf_counter (vm, node_runtime, f, 0, last_time_stamp, + VLIB_NODE_RUNTIME_PERF_BEFORE); + n_vectors = vlib_process_startup (vm, p, f); nm->current_process_index = old_process_index; @@ -1618,6 +1571,9 @@ dispatch_process (vlib_main_t * vm, vlib_elog_main_loop_event (vm, node_runtime->node_index, t, is_suspend, /* is_after */ 1); + vlib_node_runtime_perf_counter (vm, node_runtime, f, n_vectors, t, + VLIB_NODE_RUNTIME_PERF_AFTER); + vlib_process_update_stats (vm, p, /* n_calls */ !is_suspend, /* n_vectors */ n_vectors, @@ -1668,6 +1624,9 @@ dispatch_suspended_process (vlib_main_t * vm, /* Save away current process for suspend. */ nm->current_process_index = node->runtime_index; + vlib_node_runtime_perf_counter (vm, node_runtime, f, 0, last_time_stamp, + VLIB_NODE_RUNTIME_PERF_BEFORE); + n_vectors = vlib_process_resume (vm, p); t = clib_cpu_time_now (); @@ -1701,6 +1660,9 @@ dispatch_suspended_process (vlib_main_t * vm, vlib_elog_main_loop_event (vm, node_runtime->node_index, t, !is_suspend, /* is_after */ 1); + vlib_node_runtime_perf_counter (vm, node_runtime, f, n_vectors, t, + VLIB_NODE_RUNTIME_PERF_AFTER); + vlib_process_update_stats (vm, p, /* n_calls */ !is_suspend, /* n_vectors */ n_vectors, @@ -1831,11 +1793,14 @@ vlib_main_or_worker_loop (vlib_main_t * vm, int is_main) else frame_queue_check_counter--; } - if (PREDICT_FALSE (vec_len (vm->worker_thread_main_loop_callbacks))) - clib_call_callbacks (vm->worker_thread_main_loop_callbacks, vm); } + if (PREDICT_FALSE (vec_len (vm->worker_thread_main_loop_callbacks))) + clib_call_callbacks (vm->worker_thread_main_loop_callbacks, vm, + cpu_time_now); + /* Process pre-input nodes. */ + cpu_time_now = clib_cpu_time_now (); vec_foreach (n, nm->nodes_by_type[VLIB_NODE_TYPE_PRE_INPUT]) cpu_time_now = dispatch_node (vm, n, VLIB_NODE_TYPE_PRE_INPUT, diff --git a/src/vlib/main.h b/src/vlib/main.h index 2e070aa6d64..f7a4a1c912a 100644 --- a/src/vlib/main.h +++ b/src/vlib/main.h @@ -40,6 +40,7 @@ #ifndef included_vlib_main_h #define included_vlib_main_h +#include #include #include #include @@ -80,6 +81,42 @@ typedef struct u32 trace_filter_set_index; } vlib_trace_filter_t; +typedef enum +{ + VLIB_NODE_RUNTIME_PERF_BEFORE, + VLIB_NODE_RUNTIME_PERF_AFTER, + VLIB_NODE_RUNTIME_PERF_RESET, +} vlib_node_runtime_perf_call_type_t; + +typedef struct +{ + struct vlib_main_t *vm; + vlib_node_runtime_t *node; + vlib_frame_t *frame; + uword packets; + u64 cpu_time_now; + vlib_node_runtime_perf_call_type_t call_type; +} vlib_node_runtime_perf_callback_args_t; + +struct vlib_node_runtime_perf_callback_data_t; + +typedef void (*vlib_node_runtime_perf_callback_fp_t) + (struct vlib_node_runtime_perf_callback_data_t * data, + vlib_node_runtime_perf_callback_args_t * args); + +typedef struct vlib_node_runtime_perf_callback_data_t +{ + vlib_node_runtime_perf_callback_fp_t fp; + union + { + void *v; + u64 u; + } u[3]; +} vlib_node_runtime_perf_callback_data_t; + +clib_callback_data_typedef (vlib_node_runtime_perf_callback_set_t, + vlib_node_runtime_perf_callback_data_t); + typedef struct vlib_main_t { CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); @@ -112,14 +149,8 @@ typedef struct vlib_main_t u32 internal_node_last_vectors_per_main_loop; /* Main loop hw / sw performance counters */ - void (**vlib_node_runtime_perf_counter_cbs) (struct vlib_main_t *, - u64 *, u64 *, - vlib_node_runtime_t *, - vlib_frame_t *, int); - void (**vlib_node_runtime_perf_counter_cb_tmp) (struct vlib_main_t *, - u64 *, u64 *, - vlib_node_runtime_t *, - vlib_frame_t *, int); + vlib_node_runtime_perf_callback_set_t vlib_node_runtime_perf_callbacks; + /* Every so often we switch to the next counter. */ #define VLIB_LOG2_MAIN_LOOPS_PER_STATS_UPDATE 7 @@ -234,9 +265,10 @@ typedef struct vlib_main_t u8 **argv; /* Top of (worker) dispatch loop callback */ - void (**volatile worker_thread_main_loop_callbacks) (struct vlib_main_t *); + void (**volatile worker_thread_main_loop_callbacks) + (struct vlib_main_t *, u64 t); void (**volatile worker_thread_main_loop_callback_tmp) - (struct vlib_main_t *); + (struct vlib_main_t *, u64 t); clib_spinlock_t worker_thread_main_loop_callback_lock; /* debugging */ @@ -268,6 +300,12 @@ typedef struct vlib_main_t /* Earliest barrier can be closed again */ f64 barrier_no_close_before; + /* Barrier counter callback */ + void (**volatile barrier_perf_callbacks) + (struct vlib_main_t *, u64 t, int leave); + void (**volatile barrier_perf_callbacks_tmp) + (struct vlib_main_t *, u64 t, int leave); + /* Need to check the frame queues */ volatile uword check_frame_queues; @@ -399,6 +437,27 @@ vlib_last_vectors_per_main_loop (vlib_main_t * vm) return vm->internal_node_last_vectors_per_main_loop; } +always_inline void +vlib_node_runtime_perf_counter (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * frame, uword n, u64 t, + vlib_node_runtime_perf_call_type_t call_type) +{ + vlib_node_runtime_perf_callback_data_t *v = + clib_callback_data_check_and_get (&vm->vlib_node_runtime_perf_callbacks); + if (vec_len (v)) + { + vlib_node_runtime_perf_callback_args_t args = { + .vm = vm, + .node = node, + .frame = frame, + .packets = n, + .cpu_time_now = t, + .call_type = call_type, + }; + clib_callback_data_call_vec (v, &args); + } +} + always_inline void vlib_set_queue_signal_callback (vlib_main_t * vm, void (*fp) (vlib_main_t *)) { diff --git a/src/vlib/node.h b/src/vlib/node.h index 9c4cadd56f7..f7155aeda86 100644 --- a/src/vlib/node.h +++ b/src/vlib/node.h @@ -235,9 +235,6 @@ typedef struct u64 calls, vectors, clocks, suspends; u64 max_clock; u64 max_clock_n; - u64 perf_counter0_ticks; - u64 perf_counter1_ticks; - u64 perf_counter_vectors; } vlib_node_stats_t; #define foreach_vlib_node_state \ @@ -484,10 +481,6 @@ typedef struct vlib_node_runtime_t u32 vectors_since_last_overflow; /**< Number of vector elements processed by this node. */ - u32 perf_counter0_ticks_since_last_overflow; /**< Perf counter 0 ticks */ - u32 perf_counter1_ticks_since_last_overflow; /**< Perf counter 1 ticks */ - u32 perf_counter_vectors_since_last_overflow; /**< Perf counter vectors */ - u32 next_frame_index; /**< Start of next frames for this node. */ diff --git a/src/vlib/node_funcs.h b/src/vlib/node_funcs.h index 89f212374e9..dfeba17ab09 100644 --- a/src/vlib/node_funcs.h +++ b/src/vlib/node_funcs.h @@ -201,6 +201,10 @@ vlib_node_set_state (vlib_main_t * vm, u32 node_index, nm->input_node_counts_by_state[new_state] += 1; } + if (PREDICT_FALSE (r->state == VLIB_NODE_STATE_DISABLED)) + vlib_node_runtime_perf_counter (vm, r, 0, 0, 0, + VLIB_NODE_RUNTIME_PERF_RESET); + n->state = new_state; r->state = new_state; } diff --git a/src/vlib/threads.c b/src/vlib/threads.c index a8c1a1a207c..4df550e61fb 100644 --- a/src/vlib/threads.c +++ b/src/vlib/threads.c @@ -700,6 +700,9 @@ start_workers (vlib_main_t * vm) clib_mem_alloc_aligned (CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES); vm->elog_main.lock[0] = 0; + clib_callback_data_init (&vm->vlib_node_runtime_perf_callbacks, + &vm->worker_thread_main_loop_callback_lock); + if (n_vlib_mains > 1) { /* Replace hand-crafted length-1 vector with a real vector */ @@ -734,6 +737,7 @@ start_workers (vlib_main_t * vm) vm->barrier_no_close_before = 0; worker_thread_index = 1; + clib_spinlock_init (&vm->worker_thread_main_loop_callback_lock); for (i = 0; i < vec_len (tm->registrations); i++) { @@ -790,6 +794,11 @@ start_workers (vlib_main_t * vm) _vec_len (vm_clone->pending_rpc_requests) = 0; clib_memset (&vm_clone->random_buffer, 0, sizeof (vm_clone->random_buffer)); + clib_spinlock_init + (&vm_clone->worker_thread_main_loop_callback_lock); + clib_callback_data_init + (&vm_clone->vlib_node_runtime_perf_callbacks, + &vm_clone->worker_thread_main_loop_callback_lock); nm = &vlib_mains[0]->node_main; nm_clone = &vm_clone->node_main; @@ -1466,6 +1475,10 @@ vlib_worker_thread_barrier_sync_int (vlib_main_t * vm, const char *func_name) return; } + if (PREDICT_FALSE (vec_len (vm->barrier_perf_callbacks) != 0)) + clib_call_callbacks (vm->barrier_perf_callbacks, vm, + vm->clib_time.last_cpu_time, 0 /* enter */ ); + /* * Need data to decide if we're working hard enough to honor * the barrier hold-down timer. @@ -1629,6 +1642,9 @@ vlib_worker_thread_barrier_release (vlib_main_t * vm) barrier_trace_release (t_entry, t_closed_total, t_update_main); + if (PREDICT_FALSE (vec_len (vm->barrier_perf_callbacks) != 0)) + clib_call_callbacks (vm->barrier_perf_callbacks, vm, + vm->clib_time.last_cpu_time, 1 /* leave */ ); } /* diff --git a/src/vlib/threads.h b/src/vlib/threads.h index c1188cea933..e8d416997b0 100644 --- a/src/vlib/threads.h +++ b/src/vlib/threads.h @@ -16,6 +16,7 @@ #define included_vlib_threads_h #include +#include #include extern vlib_main_t **vlib_mains; @@ -400,6 +401,10 @@ vlib_worker_thread_barrier_check (void) u32 thread_index = vm->thread_index; f64 t = vlib_time_now (vm); + if (PREDICT_FALSE (vec_len (vm->barrier_perf_callbacks) != 0)) + clib_call_callbacks (vm->barrier_perf_callbacks, vm, + vm->clib_time.last_cpu_time, 0 /* enter */ ); + if (PREDICT_FALSE (vlib_worker_threads->barrier_elog_enabled)) { vlib_worker_thread_t *w = vlib_worker_threads + thread_index; @@ -498,6 +503,10 @@ vlib_worker_thread_barrier_check (void) ed->thread_index = thread_index; ed->duration = (int) (1000000.0 * t); } + + if (PREDICT_FALSE (vec_len (vm->barrier_perf_callbacks) != 0)) + clib_call_callbacks (vm->barrier_perf_callbacks, vm, + vm->clib_time.last_cpu_time, 1 /* leave */ ); } } diff --git a/src/vlibapi/api_common.h b/src/vlibapi/api_common.h index 86b1c5ac3ee..915ddabaca1 100644 --- a/src/vlibapi/api_common.h +++ b/src/vlibapi/api_common.h @@ -224,7 +224,7 @@ typedef struct } api_version_t; /** API main structure, used by both vpp and binary API clients */ -typedef struct +typedef struct api_main_t { /** Message handler vector */ void (**msg_handlers) (void *); @@ -374,6 +374,12 @@ typedef struct elog_main_t *elog_main; int elog_trace_api_messages; + /** performance counter callback **/ + void (**perf_counter_cbs) + (struct api_main_t *, u32 id, int before_or_after); + void (**perf_counter_cbs_tmp) + (struct api_main_t *, u32 id, int before_or_after); + } api_main_t; extern __thread api_main_t *my_api_main; diff --git a/src/vlibapi/api_shared.c b/src/vlibapi/api_shared.c index caad6e54828..5e715d6f829 100644 --- a/src/vlibapi/api_shared.c +++ b/src/vlibapi/api_shared.c @@ -30,6 +30,7 @@ #include #include #include +#include /* *INDENT-OFF* */ api_main_t api_global_main = @@ -493,7 +494,15 @@ msg_handler_internal (api_main_t * am, (*endian_fp) (the_msg); } + if (PREDICT_FALSE (vec_len (am->perf_counter_cbs) != 0)) + clib_call_callbacks (am->perf_counter_cbs, am, id, + 0 /* before */ ); + (*am->msg_handlers[id]) (the_msg); + + if (PREDICT_FALSE (vec_len (am->perf_counter_cbs) != 0)) + clib_call_callbacks (am->perf_counter_cbs, am, id, + 1 /* after */ ); if (!am->is_mp_safe[id]) vl_msg_api_barrier_release (); } @@ -620,8 +629,13 @@ vl_msg_api_handler_with_vm_node (api_main_t * am, svm_region_t * vlib_rp, endian_fp = am->msg_endian_handlers[id]; (*endian_fp) (the_msg); } + if (PREDICT_FALSE (vec_len (am->perf_counter_cbs) != 0)) + clib_call_callbacks (am->perf_counter_cbs, am, id, 0 /* before */ ); (*handler) (the_msg, vm, node); + + if (PREDICT_FALSE (vec_len (am->perf_counter_cbs) != 0)) + clib_call_callbacks (am->perf_counter_cbs, am, id, 1 /* after */ ); if (is_private) { am->vlib_rp = old_vlib_rp; diff --git a/src/vnet/interface.c b/src/vnet/interface.c index dfefdbac921..6d5b3561f19 100644 --- a/src/vnet/interface.c +++ b/src/vnet/interface.c @@ -872,6 +872,8 @@ vnet_register_interface (vnet_main_t * vnm, foreach_vlib_main ({ nrt = vlib_node_get_runtime (this_vlib_main, hw->output_node_index); nrt->function = node->function; + vlib_node_runtime_perf_counter (this_vlib_main, nrt, 0, 0, 0, + VLIB_NODE_RUNTIME_PERF_RESET); }); /* *INDENT-ON* */ @@ -882,6 +884,8 @@ vnet_register_interface (vnet_main_t * vnm, foreach_vlib_main ({ nrt = vlib_node_get_runtime (this_vlib_main, hw->tx_node_index); nrt->function = node->function; + vlib_node_runtime_perf_counter (this_vlib_main, nrt, 0, 0, 0, + VLIB_NODE_RUNTIME_PERF_RESET); }); /* *INDENT-ON* */ diff --git a/src/vppinfra/CMakeLists.txt b/src/vppinfra/CMakeLists.txt index a10f335dd52..8648275e0da 100644 --- a/src/vppinfra/CMakeLists.txt +++ b/src/vppinfra/CMakeLists.txt @@ -102,6 +102,7 @@ set(VPPINFRA_HEADERS byte_order.h cache.h callback.h + callback_data.h clib_error.h clib.h cpu.h diff --git a/src/vppinfra/callback.h b/src/vppinfra/callback.h index 595d69d72ab..a938ea326c9 100644 --- a/src/vppinfra/callback.h +++ b/src/vppinfra/callback.h @@ -70,12 +70,11 @@ do { \ * Note: fp exists to shut up gcc-6, which \ * produces a warning not seen with gcc-7 or 8 \ */ \ - void (*fp)(void *a1, ...); \ + typeof (h) h_ = (h); \ int i; \ - for (i = 0; i < vec_len (h); i++) \ + for (i = 0; i < vec_len (h_); i++) \ { \ - fp = (void *)(h[i]); \ - (*fp) (__VA_ARGS__); \ + (h_[i]) (__VA_ARGS__); \ } \ } while (0); diff --git a/src/vppinfra/callback_data.h b/src/vppinfra/callback_data.h new file mode 100644 index 00000000000..9a1ad0a9778 --- /dev/null +++ b/src/vppinfra/callback_data.h @@ -0,0 +1,315 @@ +/* + * Copyright (c) 2020 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** @file + * @brief Callback multiplex scheme + */ + +#ifndef included_callback_data_h +#define included_callback_data_h +#include + +/** @brief Declare and define a callback set type + * @param set_t_ The set type to define + * @param cb_t_ The callback type to use + */ +#define clib_callback_data_typedef(set_t_, cb_t_) \ +typedef struct set_t_ \ +{ \ + cb_t_* curr; \ + cb_t_* volatile next; \ + cb_t_* spare; \ + clib_spinlock_t* lock; \ +} set_t_ + +/** @brief Initialize a callback set + * @param set_ The callback set to initialize + * @param lock_ The lock to use, if any + */ +#define clib_callback_data_init(set_,lock_) \ +do { \ + (set_)->lock = (lock_); \ + (set_)->curr = 0; \ + (set_)->next = 0; \ + (set_)->spare = 0; \ +} while (0) + +/** @brief Add a callback to the specified callback set + * @param set_ The callback set + * @param value_ The value_ to assign the callback + * + * Add a callback from the indicated callback set. If the set is + * currently being iterated, then the change will be applied after the + * current full iteration, and prior to the next full iteration. + */ +#define clib_callback_data_add(set_,value_) \ +do { \ + clib_spinlock_lock_if_init ((set_)->lock); \ + typeof ((set_)->next) next_ = (set_)->next; \ + if (PREDICT_TRUE (next_ == 0)) \ + { \ + next_ = (set_)->spare; \ + (set_)->spare = 0; \ + vec_append (next_, (set_)->curr); \ + } \ + u32 sz_ = vec_len (next_); \ + vec_validate (next_, sz_); \ + next_[sz_] = (value_); \ + (set_)->next = next_; \ + clib_spinlock_unlock_if_init ((set_)->lock); \ +} while (0) + +/** @brief Remove a callback from the specified callback set + * @param set_ The callback set + * @param fp_ The current callback function + * @return 1 if the function was removed, 0 if not + * + * Remove a callback from the indicated callback set. Idempotent. If + * the set is currently being iterated, then the change will be applied + * after the current full iteration, and prior to the next full + * iteration. + */ +#define clib_callback_data_remove(set_,fp_) \ +({ \ + int found_ = 0; \ + clib_spinlock_lock_if_init ((set_)->lock); \ + typeof ((set_)->next) next_ = (set_)->next; \ + if (PREDICT_TRUE (next_ == 0)) \ + { \ + next_ = (set_)->spare; \ + (set_)->spare = 0; \ + vec_append (next_, (set_)->curr); \ + } \ + u32 sz_ = vec_len (next_); \ + u32 i_; \ + for (i_ = 0; i_ < sz_; i_++) \ + if (next_[i_].fp == (fp_)) \ + { \ + vec_delete (next_, 1, i_); \ + found_ = 1; \ + break; \ + } \ + (set_)->next = next_; \ + clib_spinlock_unlock_if_init ((set_)->lock); \ + found_; \ +}) + +/** @brief Swap a callback in the specified callback set + * @param set_ The callback set + * @param fp_ The current callback function + * @param value_ The value_ to assign the callback + * @return 1 if the function was swapped, 0 if not + * + * Swap a callback in the indicated callback set. If the callback is + * not found, then nothing is done. If the set is currently being + * iterated, then the change will be applied after the current full + * iteration, and prior to the next full iteration. + */ +#define clib_callback_data_swap(set_,fp_,value_) \ +({ \ + int found_ = 0; \ + clib_spinlock_lock_if_init ((set_)->lock); \ + typeof ((set_)->next) next_ = (set_)->next; \ + if (PREDICT_TRUE (next_ == 0)) \ + { \ + next_ = (set_)->spare; \ + (set_)->spare = 0; \ + vec_append (next_, (set_)->curr); \ + } \ + u32 sz_ = vec_len (next_); \ + u32 i_; \ + for (i_ = 0; i_ < sz_; i_++) \ + if (next_[i_].fp == (fp_)) \ + { \ + next_[i_] = (value_); \ + found_ = 1; \ + break; \ + } \ + (set_)->next = next_; \ + clib_spinlock_unlock_if_init ((set_)->lock); \ + found_; \ +}) + +/** @brief Ensure a callback is in the specified callback set + * @param set_ The callback set + * @param value_ The value_ to assign the callback + * @return 1 if the function was swapped, 0 if not + * + * Add or swap a callback in the indicated callback set. If the + * callback is already in the set, it is replaced. If the callback is + * not found, then it is added. If the set is currently being + * iterated, then the change will be applied after the current full + * iteration, and prior to the next full iteration. + */ +#define clib_callback_data_ensure(set_,value_) \ +do { \ + int found_ = 0; \ + clib_spinlock_lock_if_init ((set_)->lock); \ + typeof ((set_)->next) next_ = (set_)->next; \ + if (PREDICT_TRUE (next_ == 0)) \ + { \ + next_ = (set_)->spare; \ + (set_)->spare = 0; \ + vec_append (next_, (set_)->curr); \ + } \ + u32 sz_ = vec_len (next_); \ + u32 i_; \ + for (i_ = 0; i_ < sz_; i_++) \ + if (next_[i_].fp == (value_).fp) \ + { \ + found_ = 1; \ + break; \ + } \ + if (!found_) \ + vec_validate (next_, i_); \ + next_[i_] = (value_); \ + (set_)->next = next_; \ + clib_spinlock_unlock_if_init ((set_)->lock); \ +} while(0) + +/** @brief Enable/Disable the specified callback + * @param set_ The callback set + * @param fp_ The callback function + * @param ena_ 1 to enable, 0 to disable + * + * Enable or disable a callback function, with no data. + */ +#define clib_callback_data_enable_disable(set_,fp_,ena_) \ +do { \ + if (ena_) \ + { \ + typeof ((set_)->next[0]) data_ = { .fp = (fp_) }; \ + clib_callback_data_add ((set_), data_); \ + } \ + else \ + clib_callback_data_remove ((set_), (fp_)); \ +} while (0) + +/** @brief Get the value of a callback, if set. + * @param set_ The callback set + * @param fp_ The callback function + * @param v_ Set to the callback's current value + * @return 1 if the function is in the set, 0 if not + */ +#define clib_callback_data_get_value(set_,fp_,v_) \ +({ \ + int found_ = 0; \ + clib_spinlock_lock_if_init ((set_)->lock); \ + typeof ((set_)->next) search_ = (set_)->next; \ + if (PREDICT_TRUE (search_ == 0)) \ + search_ = (set_)->curr; \ + u32 sz_ = vec_len (search_); \ + u32 i_; \ + for (i_ = 0; i_ < sz_; i_++) \ + if (search_[i_].fp == (fp_)) \ + { \ + (v_) = search_[i]; \ + found_ = 1; \ + break; \ + } \ + clib_spinlock_unlock_if_init ((set_)->lock); \ + found_; \ +}) + +/** @brief Check if callback is set + * @param set_ The callback set + * @param fp_ The callback function + * @return 1 if the function is in the set, 0 if not + */ +#define clib_callback_data_is_set(set_,fp_) \ +({ \ + int found_ = 0; \ + clib_spinlock_lock_if_init ((set_)->lock); \ + typeof ((set_)->next) search_ = (set_)->next; \ + if (PREDICT_TRUE (search_ == 0)) \ + search_ = (set_)->curr; \ + u32 sz_ = vec_len (search_); \ + u32 i_; \ + for (i_ = 0; i_ < sz_; i_++) \ + if (search_[i_].fp == (fp_)) \ + { \ + found_ = 1; \ + break; \ + } \ + clib_spinlock_unlock_if_init ((set_)->lock); \ + found_; \ +}) + +/** @brief Check for and get current callback set + * @param set_ the callback set + * @param varargs additional callback parameters + */ +#define clib_callback_data_check_and_get(set_) \ +({ \ + typeof ((set_)->curr) curr_ = (set_)->curr; \ + if (PREDICT_FALSE ((set_)->next != 0)) \ + { \ + clib_spinlock_lock_if_init ((set_)->lock); \ + vec_reset_length (curr_); \ + (set_)->spare = curr_; \ + curr_ = (set_)->next; \ + (set_)->next = 0; \ + if (PREDICT_FALSE (0 == vec_len (curr_))) \ + vec_free (curr_); \ + (set_)->curr = curr_; \ + clib_spinlock_unlock_if_init ((set_)->lock); \ + } \ + curr_; \ +}) + +/** @brief Iterate and call a callback vector + * @param vec_ the callback vector + * @param varargs additional callback parameters + */ +#define clib_callback_data_call_vec(vec_, ...) \ +do { \ + u32 sz_ = vec_len (vec_); \ + u32 i_; \ + for (i_ = 0; i_ < sz_; i_++) \ + { \ + CLIB_PREFETCH (&vec_[i_+1], CLIB_CACHE_LINE_BYTES, STORE); \ + (vec_[i_].fp) (&vec_[i_], __VA_ARGS__); \ + } \ +} while (0) + +/** @brief Call the specified callback set + * @param set_ the callback set + * @param varargs additional callback parameters + */ +#define clib_callback_data_call(set_, ...) \ +do { \ + typeof ((set_)->curr) v_ = clib_callback_data_check_and_get(set_); \ + clib_callback_data_iterate (v_, __VA_ARGS__); \ +} while (0) + +/** @brief prefetch the callback set + * @param set_ The callback set + */ +#define clib_callback_data_prefetch(set_) \ +do { \ + if (PREDICT_FALSE ((set_)->curr)) \ + CLIB_PREFETCH ((set_)->curr, CLIB_CACHE_LINE_BYTES, STORE); \ +} while (0) + + +#endif /* included_callback_data_h */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ -- cgit 1.2.3-korg