summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/plugins/mdata/mdata.c22
-rw-r--r--src/plugins/perfmon/CMakeLists.txt18
-rw-r--r--src/plugins/perfmon/perfmon.c33
-rw-r--r--src/plugins/perfmon/perfmon.h38
-rw-r--r--src/plugins/perfmon/perfmon_intel.h4
-rw-r--r--src/plugins/perfmon/perfmon_intel_skl.c59
-rw-r--r--src/plugins/perfmon/perfmon_intel_skx.c59
-rw-r--r--src/plugins/perfmon/perfmon_periodic.c219
-rw-r--r--src/plugins/perfmon/perfmon_plugin.c38
-rw-r--r--src/vlib/cli.c7
-rw-r--r--src/vlib/cli.h8
-rw-r--r--src/vlib/init.h13
-rw-r--r--src/vlib/main.c95
-rw-r--r--src/vlib/main.h79
-rw-r--r--src/vlib/node.h7
-rw-r--r--src/vlib/node_funcs.h4
-rw-r--r--src/vlib/threads.c16
-rw-r--r--src/vlib/threads.h9
-rw-r--r--src/vlibapi/api_common.h8
-rw-r--r--src/vlibapi/api_shared.c14
-rw-r--r--src/vnet/interface.c4
-rw-r--r--src/vppinfra/CMakeLists.txt1
-rw-r--r--src/vppinfra/callback.h7
-rw-r--r--src/vppinfra/callback_data.h315
24 files changed, 825 insertions, 252 deletions
diff --git a/src/plugins/mdata/mdata.c b/src/plugins/mdata/mdata.c
index fc5bbfbb571..f74564eb33c 100644
--- a/src/plugins/mdata/mdata.c
+++ b/src/plugins/mdata/mdata.c
@@ -21,6 +21,7 @@
#include <vlibapi/api.h>
#include <vlibmemory/api.h>
+#include <vppinfra/callback_data.h>
#include <vpp/app/version.h>
#include <stdbool.h>
@@ -42,9 +43,8 @@ static mdata_t mdata_none;
before_or_after: 0 => before, 1=> after
*/
static void
-mdata_trace_callback (vlib_main_t * vm, u64 * c0, u64 * c1,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame, int before_or_after)
+mdata_trace_callback (vlib_node_runtime_perf_callback_data_t * data,
+ vlib_node_runtime_perf_callback_args_t * args)
{
int i;
mdata_main_t *mm = &mdata_main;
@@ -53,6 +53,12 @@ mdata_trace_callback (vlib_main_t * vm, u64 * c0, u64 * c1,
u32 n_left_from;
mdata_t *before, *modifies;
u8 *after;
+ vlib_main_t *vm = args->vm;
+ vlib_frame_t *frame = args->frame;
+ vlib_node_runtime_t *node = args->node;
+
+ if (PREDICT_FALSE (args->call_type == VLIB_NODE_RUNTIME_PERF_RESET))
+ return;
/* Input nodes don't have frames, etc. */
if (frame == 0)
@@ -68,7 +74,7 @@ mdata_trace_callback (vlib_main_t * vm, u64 * c0, u64 * c1,
vlib_get_buffers (vm, from, bufs, n_left_from);
b = bufs;
- if (before_or_after == 1 /* after */ )
+ if (args->call_type == VLIB_NODE_RUNTIME_PERF_AFTER)
goto after_pass;
/* Resize the per-thread "before" vector to cover the current frame */
@@ -152,11 +158,9 @@ mdata_enable_disable (mdata_main_t * mmp, int enable_disable)
if (vlib_mains[i] == 0)
continue;
- clib_callback_enable_disable
- (vlib_mains[i]->vlib_node_runtime_perf_counter_cbs,
- vlib_mains[i]->vlib_node_runtime_perf_counter_cb_tmp,
- vlib_mains[i]->worker_thread_main_loop_callback_lock,
- (void *) mdata_trace_callback, enable_disable);
+ clib_callback_data_enable_disable
+ (&vlib_mains[i]->vlib_node_runtime_perf_callbacks,
+ mdata_trace_callback, enable_disable);
}
return rv;
diff --git a/src/plugins/perfmon/CMakeLists.txt b/src/plugins/perfmon/CMakeLists.txt
index a3f045f75f3..69e225b4a3f 100644
--- a/src/plugins/perfmon/CMakeLists.txt
+++ b/src/plugins/perfmon/CMakeLists.txt
@@ -11,7 +11,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-add_vpp_plugin(perfmon
+add_vpp_library (perfcore
SOURCES
perfmon.c
perfmon_periodic.c
@@ -32,6 +32,22 @@ add_vpp_plugin(perfmon
perfmon_intel_wsm_ep_dp.c
perfmon_intel_wsm_ep_sp.c
perfmon_intel_wsm_ex.c
+
+ INSTALL_HEADERS
+ perfmon.h
+
+ LINK_LIBRARIES
+ vppinfra
+ vlib
+ vnet
+)
+
+add_vpp_plugin(perfmon
+ SOURCES
+ perfmon_plugin.c
+
+ LINK_LIBRARIES
+ perfcore
)
option(VPP_BUILD_MAPFILE_TOOL "Build perfmon mapfile utility." OFF)
diff --git a/src/plugins/perfmon/perfmon.c b/src/plugins/perfmon/perfmon.c
index 7e276c30810..525a864b584 100644
--- a/src/plugins/perfmon/perfmon.c
+++ b/src/plugins/perfmon/perfmon.c
@@ -16,7 +16,6 @@
*/
#include <vnet/vnet.h>
-#include <vnet/plugin/plugin.h>
#include <perfmon/perfmon.h>
#include <perfmon/perfmon_intel.h>
@@ -98,6 +97,7 @@ perfmon_init (vlib_main_t * vm)
u32 cpuid;
u8 model, stepping;
perfmon_intel_pmc_event_t *ev;
+ int i;
pm->vlib_main = vm;
pm->vnet_main = vnet_get_main ();
@@ -109,9 +109,17 @@ perfmon_init (vlib_main_t * vm)
/* Default data collection interval */
pm->timeout_interval = 2.0; /* seconds */
- vec_validate (pm->pm_fds, 1);
- vec_validate (pm->perf_event_pages, 1);
- vec_validate (pm->rdpmc_indices, 1);
+
+ vec_validate (pm->threads, vlib_get_thread_main ()->n_vlib_mains - 1);
+ for (i = 0; i < vec_len (pm->threads); i++)
+ {
+ perfmon_thread_t *pt = clib_mem_alloc_aligned
+ (sizeof (perfmon_thread_t), CLIB_CACHE_LINE_BYTES);
+ clib_memset (pt, 0, sizeof (*pt));
+ pm->threads[i] = pt;
+ pt->pm_fds[0] = -1;
+ pt->pm_fds[1] = -1;
+ }
pm->page_size = getpagesize ();
pm->perfmon_table = 0;
@@ -147,18 +155,7 @@ perfmon_init (vlib_main_t * vm)
VLIB_INIT_FUNCTION (perfmon_init);
-/* *INDENT-OFF* */
-VLIB_PLUGIN_REGISTER () =
-{
- .version = VPP_BUILD_VER,
- .description = "Performance Monitor",
-#if !defined(__x86_64__)
- .default_disabled = 1,
-#endif
-};
-/* *INDENT-ON* */
-
-static uword
+uword
unformat_processor_event (unformat_input_t * input, va_list * args)
{
perfmon_main_t *pm = va_arg (*args, perfmon_main_t *);
@@ -185,6 +182,10 @@ unformat_processor_event (unformat_input_t * input, va_list * args)
pe_config |= pm->perfmon_table[idx].event_code[0];
pe_config |= pm->perfmon_table[idx].umask << 8;
+ pe_config |= pm->perfmon_table[idx].edge << 18;
+ pe_config |= pm->perfmon_table[idx].anyt << 21;
+ pe_config |= pm->perfmon_table[idx].inv << 23;
+ pe_config |= pm->perfmon_table[idx].cmask << 24;
ep->name = (char *) hp->key;
ep->pe_type = PERF_TYPE_RAW;
diff --git a/src/plugins/perfmon/perfmon.h b/src/plugins/perfmon/perfmon.h
index 000e3c2849c..c8782023597 100644
--- a/src/plugins/perfmon/perfmon.h
+++ b/src/plugins/perfmon/perfmon.h
@@ -78,6 +78,32 @@ typedef struct
typedef struct
{
+ u64 ticks[2];
+ u64 vectors;
+} perfmon_counters_t;
+
+typedef struct
+{
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
+
+ /* Current counters */
+ u64 c[2];
+
+ /* Current perf_event file descriptors, per thread */
+ int pm_fds[2];
+
+ /* mmap base of mapped struct perf_event_mmap_page */
+ u8 *perf_event_pages[2];
+
+ u32 rdpmc_indices[2];
+
+ /* vector of counters by node index */
+ perfmon_counters_t *counters;
+
+} perfmon_thread_t;
+
+typedef struct
+{
/* API message ID base */
u16 msg_id_base;
@@ -112,17 +138,15 @@ typedef struct
/* Current event (index) being collected */
u32 current_event;
int n_active;
- u32 **rdpmc_indices;
- /* mmap base / size of (mapped) struct perf_event_mmap_page */
- u8 ***perf_event_pages;
+ /* mmap size of (mapped) struct perf_event_mmap_page */
u32 page_size;
- /* Current perf_event file descriptors, per thread */
- int **pm_fds;
-
/* thread bitmap */
uword *thread_bitmap;
+ /* per-thread data */
+ perfmon_thread_t **threads;
+
/* Logging */
vlib_log_class_t log_class;
@@ -137,6 +161,8 @@ extern perfmon_main_t perfmon_main;
extern vlib_node_registration_t perfmon_periodic_node;
uword *perfmon_parse_table (perfmon_main_t * pm, char *path, char *filename);
+uword unformat_processor_event (unformat_input_t * input, va_list * args);
+
/* Periodic function events */
#define PERFMON_START 1
diff --git a/src/plugins/perfmon/perfmon_intel.h b/src/plugins/perfmon/perfmon_intel.h
index 6bb849244d5..475309124ea 100644
--- a/src/plugins/perfmon/perfmon_intel.h
+++ b/src/plugins/perfmon/perfmon_intel.h
@@ -25,6 +25,10 @@ typedef struct
{
u8 event_code[2];
u8 umask;
+ u8 cmask;
+ u8 inv;
+ u8 anyt;
+ u8 edge;
char *event_name;
} perfmon_intel_pmc_event_t;
diff --git a/src/plugins/perfmon/perfmon_intel_skl.c b/src/plugins/perfmon/perfmon_intel_skl.c
index 726dbb4dd8c..b1c03140651 100644
--- a/src/plugins/perfmon/perfmon_intel_skl.c
+++ b/src/plugins/perfmon/perfmon_intel_skl.c
@@ -88,6 +88,7 @@ static perfmon_intel_pmc_event_t event_table[] = {
{
.event_code = {0x0D},
.umask = 0x01,
+ .anyt = 1,
.event_name = "int_misc.recovery_cycles_any",
},
{
@@ -103,6 +104,8 @@ static perfmon_intel_pmc_event_t event_table[] = {
{
.event_code = {0x0E},
.umask = 0x01,
+ .cmask = 1,
+ .inv = 1,
.event_name = "uops_issued.stall_cycles",
},
{
@@ -233,6 +236,7 @@ static perfmon_intel_pmc_event_t event_table[] = {
{
.event_code = {0x3C},
.umask = 0x00,
+ .anyt = 1,
.event_name = "cpu_clk_unhalted.thread_p_any",
},
{
@@ -248,6 +252,7 @@ static perfmon_intel_pmc_event_t event_table[] = {
{
.event_code = {0x3C},
.umask = 0x01,
+ .anyt = 1,
.event_name = "cpu_clk_thread_unhalted.ref_xclk_any",
},
{
@@ -268,6 +273,7 @@ static perfmon_intel_pmc_event_t event_table[] = {
{
.event_code = {0x48},
.umask = 0x01,
+ .cmask = 1,
.event_name = "l1d_pend_miss.pending",
},
{
@@ -308,6 +314,12 @@ static perfmon_intel_pmc_event_t event_table[] = {
{
.event_code = {0x49},
.umask = 0x10,
+ .cmask = 1,
+ .event_name = "dtlb_store_misses.walk_active",
+ },
+ {
+ .event_code = {0x49},
+ .umask = 0x10,
.event_name = "dtlb_store_misses.walk_pending",
},
{
@@ -403,6 +415,8 @@ static perfmon_intel_pmc_event_t event_table[] = {
{
.event_code = {0x5E},
.umask = 0x01,
+ .cmask = 1,
+ .inv = 1,
.event_name = "rs_events.empty_end",
},
{
@@ -413,6 +427,7 @@ static perfmon_intel_pmc_event_t event_table[] = {
{
.event_code = {0x60},
.umask = 0x01,
+ .cmask = 1,
.event_name = "offcore_requests_outstanding.cycles_with_demand_data_rd",
},
{
@@ -423,6 +438,7 @@ static perfmon_intel_pmc_event_t event_table[] = {
{
.event_code = {0x60},
.umask = 0x02,
+ .cmask = 1,
.event_name = "offcore_requests_outstanding.cycles_with_demand_code_rd",
},
{
@@ -433,6 +449,7 @@ static perfmon_intel_pmc_event_t event_table[] = {
{
.event_code = {0x60},
.umask = 0x04,
+ .cmask = 1,
.event_name = "offcore_requests_outstanding.cycles_with_demand_rfo",
},
{
@@ -443,6 +460,7 @@ static perfmon_intel_pmc_event_t event_table[] = {
{
.event_code = {0x60},
.umask = 0x08,
+ .cmask = 1,
.event_name = "offcore_requests_outstanding.cycles_with_data_rd",
},
{
@@ -458,6 +476,7 @@ static perfmon_intel_pmc_event_t event_table[] = {
{
.event_code = {0x79},
.umask = 0x04,
+ .cmask = 1,
.event_name = "idq.mite_cycles",
},
{
@@ -468,6 +487,7 @@ static perfmon_intel_pmc_event_t event_table[] = {
{
.event_code = {0x79},
.umask = 0x08,
+ .cmask = 1,
.event_name = "idq.dsb_cycles",
},
{
@@ -478,11 +498,13 @@ static perfmon_intel_pmc_event_t event_table[] = {
{
.event_code = {0x79},
.umask = 0x18,
+ .cmask = 4,
.event_name = "idq.all_dsb_cycles_4_uops",
},
{
.event_code = {0x79},
.umask = 0x18,
+ .cmask = 1,
.event_name = "idq.all_dsb_cycles_any_uops",
},
{
@@ -503,11 +525,13 @@ static perfmon_intel_pmc_event_t event_table[] = {
{
.event_code = {0x79},
.umask = 0x30,
+ .cmask = 1,
.event_name = "idq.ms_cycles",
},
{
.event_code = {0x79},
.umask = 0x30,
+ .edge = 1,
.event_name = "idq.ms_switches",
},
{
@@ -588,26 +612,32 @@ static perfmon_intel_pmc_event_t event_table[] = {
{
.event_code = {0x9C},
.umask = 0x01,
+ .cmask = 4,
.event_name = "idq_uops_not_delivered.cycles_0_uops_deliv.core",
},
{
.event_code = {0x9C},
.umask = 0x01,
+ .cmask = 3,
.event_name = "idq_uops_not_delivered.cycles_le_1_uop_deliv.core",
},
{
.event_code = {0x9C},
.umask = 0x01,
+ .cmask = 4,
.event_name = "idq_uops_not_delivered.cycles_le_2_uop_deliv.core",
},
{
.event_code = {0x9C},
.umask = 0x01,
+ .cmask = 1,
.event_name = "idq_uops_not_delivered.cycles_le_3_uop_deliv.core",
},
{
.event_code = {0x9C},
.umask = 0x01,
+ .cmask = 1,
+ .inv = 1,
.event_name = "idq_uops_not_delivered.cycles_fe_was_ok",
},
{
@@ -663,36 +693,43 @@ static perfmon_intel_pmc_event_t event_table[] = {
{
.event_code = {0xA3},
.umask = 0x01,
+ .cmask = 1,
.event_name = "cycle_activity.cycles_l2_miss",
},
{
.event_code = {0xA3},
.umask = 0x04,
+ .cmask = 4,
.event_name = "cycle_activity.stalls_total",
},
{
.event_code = {0xA3},
.umask = 0x05,
+ .cmask = 5,
.event_name = "cycle_activity.stalls_l2_miss",
},
{
.event_code = {0xA3},
.umask = 0x08,
+ .cmask = 8,
.event_name = "cycle_activity.cycles_l1d_miss",
},
{
.event_code = {0xA3},
.umask = 0x0C,
+ .cmask = 12,
.event_name = "cycle_activity.stalls_l1d_miss",
},
{
.event_code = {0xA3},
.umask = 0x10,
+ .cmask = 16,
.event_name = "cycle_activity.cycles_mem_any",
},
{
.event_code = {0xA3},
.umask = 0x14,
+ .cmask = 20,
.event_name = "cycle_activity.stalls_mem_any",
},
{
@@ -733,11 +770,13 @@ static perfmon_intel_pmc_event_t event_table[] = {
{
.event_code = {0xA8},
.umask = 0x01,
+ .cmask = 1,
.event_name = "lsd.cycles_active",
},
{
.event_code = {0xA8},
.umask = 0x01,
+ .cmask = 4,
.event_name = "lsd.cycles_4_uops",
},
{
@@ -788,26 +827,32 @@ static perfmon_intel_pmc_event_t event_table[] = {
{
.event_code = {0xB1},
.umask = 0x01,
+ .cmask = 1,
+ .inv = 1,
.event_name = "uops_executed.stall_cycles",
},
{
.event_code = {0xB1},
.umask = 0x01,
+ .cmask = 1,
.event_name = "uops_executed.cycles_ge_1_uop_exec",
},
{
.event_code = {0xB1},
.umask = 0x01,
+ .cmask = 2,
.event_name = "uops_executed.cycles_ge_2_uops_exec",
},
{
.event_code = {0xB1},
.umask = 0x01,
+ .cmask = 3,
.event_name = "uops_executed.cycles_ge_3_uops_exec",
},
{
.event_code = {0xB1},
.umask = 0x01,
+ .cmask = 4,
.event_name = "uops_executed.cycles_ge_4_uops_exec",
},
{
@@ -818,26 +863,32 @@ static perfmon_intel_pmc_event_t event_table[] = {
{
.event_code = {0xB1},
.umask = 0x02,
+ .cmask = 1,
.event_name = "uops_executed.core_cycles_ge_1",
},
{
.event_code = {0xB1},
.umask = 0x02,
+ .cmask = 2,
.event_name = "uops_executed.core_cycles_ge_2",
},
{
.event_code = {0xB1},
.umask = 0x02,
+ .cmask = 3,
.event_name = "uops_executed.core_cycles_ge_3",
},
{
.event_code = {0xB1},
.umask = 0x02,
+ .cmask = 4,
.event_name = "uops_executed.core_cycles_ge_4",
},
{
.event_code = {0xB1},
.umask = 0x02,
+ .cmask = 1,
+ .inv = 1,
.event_name = "uops_executed.core_cycles_none",
},
{
@@ -873,6 +924,7 @@ static perfmon_intel_pmc_event_t event_table[] = {
{
.event_code = {0xC0},
.umask = 0x01,
+ .cmask = 10,
.event_name = "inst_retired.total_cycles_ps",
},
{
@@ -883,16 +935,22 @@ static perfmon_intel_pmc_event_t event_table[] = {
{
.event_code = {0xC2},
.umask = 0x02,
+ .cmask = 1,
+ .inv = 1,
.event_name = "uops_retired.stall_cycles",
},
{
.event_code = {0xC2},
.umask = 0x02,
+ .cmask = 10,
+ .inv = 1,
.event_name = "uops_retired.total_cycles",
},
{
.event_code = {0xC3},
.umask = 0x01,
+ .cmask = 1,
+ .edge = 1,
.event_name = "machine_clears.count",
},
{
@@ -1083,6 +1141,7 @@ static perfmon_intel_pmc_event_t event_table[] = {
{
.event_code = {0xCA},
.umask = 0x1E,
+ .cmask = 1,
.event_name = "fp_assist.any",
},
{
diff --git a/src/plugins/perfmon/perfmon_intel_skx.c b/src/plugins/perfmon/perfmon_intel_skx.c
index 399174477ac..9de202d22a3 100644
--- a/src/plugins/perfmon/perfmon_intel_skx.c
+++ b/src/plugins/perfmon/perfmon_intel_skx.c
@@ -88,6 +88,7 @@ static perfmon_intel_pmc_event_t event_table[] = {
{
.event_code = {0x0D},
.umask = 0x01,
+ .anyt = 1,
.event_name = "int_misc.recovery_cycles_any",
},
{
@@ -98,6 +99,8 @@ static perfmon_intel_pmc_event_t event_table[] = {
{
.event_code = {0x0E},
.umask = 0x01,
+ .cmask = 1,
+ .inv = 1,
.event_name = "uops_issued.stall_cycles",
},
{
@@ -253,6 +256,7 @@ static perfmon_intel_pmc_event_t event_table[] = {
{
.event_code = {0x3C},
.umask = 0x00,
+ .anyt = 1,
.event_name = "cpu_clk_unhalted.thread_p_any",
},
{
@@ -268,6 +272,7 @@ static perfmon_intel_pmc_event_t event_table[] = {
{
.event_code = {0x3C},
.umask = 0x01,
+ .anyt = 1,
.event_name = "cpu_clk_thread_unhalted.ref_xclk_any",
},
{
@@ -288,6 +293,7 @@ static perfmon_intel_pmc_event_t event_table[] = {
{
.event_code = {0x48},
.umask = 0x01,
+ .cmask = 1,
.event_name = "l1d_pend_miss.pending_cycles",
},
{
@@ -328,6 +334,12 @@ static perfmon_intel_pmc_event_t event_table[] = {
{
.event_code = {0x49},
.umask = 0x10,
+ .cmask = 1,
+ .event_name = "dtlb_store_misses.walk_active",
+ },
+ {
+ .event_code = {0x49},
+ .umask = 0x10,
.event_name = "dtlb_store_misses.walk_pending",
},
{
@@ -418,6 +430,8 @@ static perfmon_intel_pmc_event_t event_table[] = {
{
.event_code = {0x5E},
.umask = 0x01,
+ .cmask = 1,
+ .inv = 1,
.event_name = "rs_events.empty_end",
},
{
@@ -428,6 +442,7 @@ static perfmon_intel_pmc_event_t event_table[] = {
{
.event_code = {0x60},
.umask = 0x01,
+ .cmask = 1,
.event_name = "offcore_requests_outstanding.cycles_with_demand_data_rd",
},
{
@@ -443,6 +458,7 @@ static perfmon_intel_pmc_event_t event_table[] = {
{
.event_code = {0x60},
.umask = 0x02,
+ .cmask = 1,
.event_name = "offcore_requests_outstanding.cycles_with_demand_code_rd",
},
{
@@ -453,11 +469,13 @@ static perfmon_intel_pmc_event_t event_table[] = {
{
.event_code = {0x60},
.umask = 0x04,
+ .cmask = 1,
.event_name = "offcore_requests_outstanding.cycles_with_demand_rfo",
},
{
.event_code = {0x60},
.umask = 0x08,
+ .cmask = 1,
.event_name = "offcore_requests_outstanding.cycles_with_data_rd",
},
{
@@ -473,6 +491,7 @@ static perfmon_intel_pmc_event_t event_table[] = {
{
.event_code = {0x79},
.umask = 0x04,
+ .cmask = 1,
.event_name = "idq.mite_cycles",
},
{
@@ -483,6 +502,7 @@ static perfmon_intel_pmc_event_t event_table[] = {
{
.event_code = {0x79},
.umask = 0x08,
+ .cmask = 1,
.event_name = "idq.dsb_cycles",
},
{
@@ -498,11 +518,13 @@ static perfmon_intel_pmc_event_t event_table[] = {
{
.event_code = {0x79},
.umask = 0x18,
+ .cmask = 1,
.event_name = "idq.all_dsb_cycles_any_uops",
},
{
.event_code = {0x79},
.umask = 0x18,
+ .cmask = 4,
.event_name = "idq.all_dsb_cycles_4_uops",
},
{
@@ -523,6 +545,7 @@ static perfmon_intel_pmc_event_t event_table[] = {
{
.event_code = {0x79},
.umask = 0x30,
+ .cmask = 1,
.event_name = "idq.ms_cycles",
},
{
@@ -533,6 +556,7 @@ static perfmon_intel_pmc_event_t event_table[] = {
{
.event_code = {0x79},
.umask = 0x30,
+ .edge = 1,
.event_name = "idq.ms_switches",
},
{
@@ -603,26 +627,32 @@ static perfmon_intel_pmc_event_t event_table[] = {
{
.event_code = {0x9C},
.umask = 0x01,
+ .cmask = 1,
+ .inv = 1,
.event_name = "idq_uops_not_delivered.cycles_fe_was_ok",
},
{
.event_code = {0x9C},
.umask = 0x01,
+ .cmask = 1,
.event_name = "idq_uops_not_delivered.cycles_le_3_uop_deliv.core",
},
{
.event_code = {0x9C},
.umask = 0x01,
+ .cmask = 2,
.event_name = "idq_uops_not_delivered.cycles_le_2_uop_deliv.core",
},
{
.event_code = {0x9C},
.umask = 0x01,
+ .cmask = 3,
.event_name = "idq_uops_not_delivered.cycles_le_1_uop_deliv.core",
},
{
.event_code = {0x9C},
.umask = 0x01,
+ .cmask = 4,
.event_name = "idq_uops_not_delivered.cycles_0_uops_deliv.core",
},
{
@@ -683,36 +713,43 @@ static perfmon_intel_pmc_event_t event_table[] = {
{
.event_code = {0xA3},
.umask = 0x01,
+ .cmask = 1,
.event_name = "cycle_activity.cycles_l2_miss",
},
{
.event_code = {0xA3},
.umask = 0x04,
+ .cmask = 4,
.event_name = "cycle_activity.stalls_total",
},
{
.event_code = {0xA3},
.umask = 0x05,
+ .cmask = 5,
.event_name = "cycle_activity.stalls_l2_miss",
},
{
.event_code = {0xA3},
.umask = 0x08,
+ .cmask = 8,
.event_name = "cycle_activity.cycles_l1d_miss",
},
{
.event_code = {0xA3},
.umask = 0x0C,
+ .cmask = 12,
.event_name = "cycle_activity.stalls_l1d_miss",
},
{
.event_code = {0xA3},
.umask = 0x10,
+ .cmask = 16,
.event_name = "cycle_activity.cycles_mem_any",
},
{
.event_code = {0xA3},
.umask = 0x14,
+ .cmask = 20,
.event_name = "cycle_activity.stalls_mem_any",
},
{
@@ -753,11 +790,13 @@ static perfmon_intel_pmc_event_t event_table[] = {
{
.event_code = {0xA8},
.umask = 0x01,
+ .cmask = 4,
.event_name = "lsd.cycles_4_uops",
},
{
.event_code = {0xA8},
.umask = 0x01,
+ .cmask = 1,
.event_name = "lsd.cycles_active",
},
{
@@ -803,26 +842,32 @@ static perfmon_intel_pmc_event_t event_table[] = {
{
.event_code = {0xB1},
.umask = 0x01,
+ .cmask = 4,
.event_name = "uops_executed.cycles_ge_4_uops_exec",
},
{
.event_code = {0xB1},
.umask = 0x01,
+ .cmask = 3,
.event_name = "uops_executed.cycles_ge_3_uops_exec",
},
{
.event_code = {0xB1},
.umask = 0x01,
+ .cmask = 2,
.event_name = "uops_executed.cycles_ge_2_uops_exec",
},
{
.event_code = {0xB1},
.umask = 0x01,
+ .cmask = 1,
.event_name = "uops_executed.cycles_ge_1_uop_exec",
},
{
.event_code = {0xB1},
.umask = 0x01,
+ .cmask = 1,
+ .inv = 1,
.event_name = "uops_executed.stall_cycles",
},
{
@@ -838,26 +883,32 @@ static perfmon_intel_pmc_event_t event_table[] = {
{
.event_code = {0xB1},
.umask = 0x02,
+ .cmask = 1,
+ .inv = 1,
.event_name = "uops_executed.core_cycles_none",
},
{
.event_code = {0xB1},
.umask = 0x02,
+ .cmask = 4,
.event_name = "uops_executed.core_cycles_ge_4",
},
{
.event_code = {0xB1},
.umask = 0x02,
+ .cmask = 3,
.event_name = "uops_executed.core_cycles_ge_3",
},
{
.event_code = {0xB1},
.umask = 0x02,
+ .cmask = 2,
.event_name = "uops_executed.core_cycles_ge_2",
},
{
.event_code = {0xB1},
.umask = 0x02,
+ .cmask = 1,
.event_name = "uops_executed.core_cycles_ge_1",
},
{
@@ -898,16 +949,21 @@ static perfmon_intel_pmc_event_t event_table[] = {
{
.event_code = {0xC0},
.umask = 0x01,
+ .cmask = 10,
.event_name = "inst_retired.total_cycles_ps",
},
{
.event_code = {0xC2},
.umask = 0x02,
+ .cmask = 10,
+ .inv = 1,
.event_name = "uops_retired.total_cycles",
},
{
.event_code = {0xC2},
.umask = 0x02,
+ .cmask = 1,
+ .inv = 1,
.event_name = "uops_retired.stall_cycles",
},
{
@@ -918,6 +974,8 @@ static perfmon_intel_pmc_event_t event_table[] = {
{
.event_code = {0xC3},
.umask = 0x01,
+ .cmask = 1,
+ .edge = 1,
.event_name = "machine_clears.count",
},
{
@@ -1118,6 +1176,7 @@ static perfmon_intel_pmc_event_t event_table[] = {
{
.event_code = {0xCA},
.umask = 0x1E,
+ .cmask = 1,
.event_name = "fp_assist.any",
},
{
diff --git a/src/plugins/perfmon/perfmon_periodic.c b/src/plugins/perfmon/perfmon_periodic.c
index 37d669b8d13..de31221f6f4 100644
--- a/src/plugins/perfmon/perfmon_periodic.c
+++ b/src/plugins/perfmon/perfmon_periodic.c
@@ -33,52 +33,65 @@ perf_event_open (struct perf_event_attr *hw_event, pid_t pid, int cpu,
}
static void
-read_current_perf_counters (vlib_main_t * vm, u64 * c0, u64 * c1,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame, int before_or_after)
+read_current_perf_counters (vlib_node_runtime_perf_callback_data_t * data,
+ vlib_node_runtime_perf_callback_args_t * args)
{
int i;
- u64 *cc;
perfmon_main_t *pm = &perfmon_main;
- uword my_thread_index = vm->thread_index;
+ perfmon_thread_t *pt = data->u[0].v;
+ u64 c[2] = { 0, 0 };
+ u64 *cc;
- *c0 = *c1 = 0;
+ if (PREDICT_FALSE (args->call_type == VLIB_NODE_RUNTIME_PERF_RESET))
+ return;
+
+ if (args->call_type == VLIB_NODE_RUNTIME_PERF_BEFORE)
+ cc = pt->c;
+ else
+ cc = c;
for (i = 0; i < pm->n_active; i++)
{
- cc = (i == 0) ? c0 : c1;
- if (pm->rdpmc_indices[i][my_thread_index] != ~0)
- *cc = clib_rdpmc ((int) pm->rdpmc_indices[i][my_thread_index]);
+ if (pt->rdpmc_indices[i] != ~0)
+ cc[i] = clib_rdpmc ((int) pt->rdpmc_indices[i]);
else
{
u64 sw_value;
int read_result;
- if ((read_result = read (pm->pm_fds[i][my_thread_index], &sw_value,
- sizeof (sw_value)) != sizeof (sw_value)))
+ if ((read_result = read (pt->pm_fds[i], &sw_value,
+ sizeof (sw_value))) != sizeof (sw_value))
{
clib_unix_warning
("counter read returned %d, expected %d",
read_result, sizeof (sw_value));
- clib_callback_enable_disable
- (vm->vlib_node_runtime_perf_counter_cbs,
- vm->vlib_node_runtime_perf_counter_cb_tmp,
- vm->worker_thread_main_loop_callback_lock,
+ clib_callback_data_enable_disable
+ (&args->vm->vlib_node_runtime_perf_callbacks,
read_current_perf_counters, 0 /* enable */ );
return;
}
- *cc = sw_value;
+ cc[i] = sw_value;
}
}
+
+ if (args->call_type == VLIB_NODE_RUNTIME_PERF_AFTER)
+ {
+ u32 node_index = args->node->node_index;
+ vec_validate (pt->counters, node_index);
+ pt->counters[node_index].ticks[0] += c[0] - pt->c[0];
+ pt->counters[node_index].ticks[1] += c[1] - pt->c[1];
+ pt->counters[node_index].vectors += args->packets;
+ }
}
static void
clear_counters (perfmon_main_t * pm)
{
- int i, j;
+ int j;
vlib_main_t *vm = pm->vlib_main;
vlib_main_t *stat_vm;
- vlib_node_main_t *nm;
- vlib_node_t *n;
+ perfmon_thread_t *pt;
+ u32 len;
+
vlib_worker_thread_barrier_sync (vm);
@@ -88,26 +101,12 @@ clear_counters (perfmon_main_t * pm)
if (stat_vm == 0)
continue;
- nm = &stat_vm->node_main;
-
- /* Clear the node runtime perfmon counters */
- for (i = 0; i < vec_len (nm->nodes); i++)
- {
- n = nm->nodes[i];
- vlib_node_sync_stats (stat_vm, n);
- }
+ pt = pm->threads[j];
+ len = vec_len (pt->counters);
+ if (!len)
+ continue;
- /* And clear the node perfmon counters */
- for (i = 0; i < vec_len (nm->nodes); i++)
- {
- n = nm->nodes[i];
- n->stats_total.perf_counter0_ticks = 0;
- n->stats_total.perf_counter1_ticks = 0;
- n->stats_total.perf_counter_vectors = 0;
- n->stats_last_clear.perf_counter0_ticks = 0;
- n->stats_last_clear.perf_counter1_ticks = 0;
- n->stats_last_clear.perf_counter_vectors = 0;
- }
+ clib_memset (pt->counters, 0, len * sizeof (pt->counters[0]));
}
vlib_worker_thread_barrier_release (vm);
}
@@ -121,19 +120,20 @@ enable_current_events (perfmon_main_t * pm)
perfmon_event_config_t *c;
vlib_main_t *vm = vlib_get_main ();
u32 my_thread_index = vm->thread_index;
+ perfmon_thread_t *pt = pm->threads[my_thread_index];
u32 index;
int i, limit = 1;
int cpu;
+ vlib_node_runtime_perf_callback_data_t cbdata = { 0 };
+ cbdata.fp = read_current_perf_counters;
+ cbdata.u[0].v = pt;
+ cbdata.u[1].v = vm;
if ((pm->current_event + 1) < vec_len (pm->single_events_to_collect))
limit = 2;
for (i = 0; i < limit; i++)
{
- vec_validate (pm->pm_fds[i], vec_len (vlib_mains) - 1);
- vec_validate (pm->perf_event_pages[i], vec_len (vlib_mains) - 1);
- vec_validate (pm->rdpmc_indices[i], vec_len (vlib_mains) - 1);
-
c = vec_elt_at_index (pm->single_events_to_collect,
pm->current_event + i);
@@ -184,8 +184,8 @@ enable_current_events (perfmon_main_t * pm)
if (ioctl (fd, PERF_EVENT_IOC_ENABLE, 0) < 0)
clib_unix_warning ("enable ioctl");
- pm->perf_event_pages[i][my_thread_index] = (void *) p;
- pm->pm_fds[i][my_thread_index] = fd;
+ pt->perf_event_pages[i] = (void *) p;
+ pt->pm_fds[i] = fd;
}
/*
@@ -194,9 +194,7 @@ enable_current_events (perfmon_main_t * pm)
*/
for (i = 0; i < limit; i++)
{
- p =
- (struct perf_event_mmap_page *)
- pm->perf_event_pages[i][my_thread_index];
+ p = (struct perf_event_mmap_page *) pt->perf_event_pages[i];
/*
* Software event counters - and others not capable of being
@@ -208,16 +206,12 @@ enable_current_events (perfmon_main_t * pm)
else
index = p->index - 1;
- pm->rdpmc_indices[i][my_thread_index] = index;
+ pt->rdpmc_indices[i] = index;
}
pm->n_active = i;
/* Enable the main loop counter snapshot mechanism */
- clib_callback_enable_disable
- (vm->vlib_node_runtime_perf_counter_cbs,
- vm->vlib_node_runtime_perf_counter_cb_tmp,
- vm->worker_thread_main_loop_callback_lock,
- read_current_perf_counters, 1 /* enable */ );
+ clib_callback_data_add (&vm->vlib_node_runtime_perf_callbacks, cbdata);
}
static void
@@ -225,35 +219,30 @@ disable_events (perfmon_main_t * pm)
{
vlib_main_t *vm = vlib_get_main ();
u32 my_thread_index = vm->thread_index;
+ perfmon_thread_t *pt = pm->threads[my_thread_index];
int i;
/* Stop main loop collection */
- clib_callback_enable_disable
- (vm->vlib_node_runtime_perf_counter_cbs,
- vm->vlib_node_runtime_perf_counter_cb_tmp,
- vm->worker_thread_main_loop_callback_lock,
- read_current_perf_counters, 0 /* enable */ );
+ clib_callback_data_remove (&vm->vlib_node_runtime_perf_callbacks,
+ read_current_perf_counters);
for (i = 0; i < pm->n_active; i++)
{
- if (pm->pm_fds[i][my_thread_index] == 0)
+ if (pt->pm_fds[i] == 0)
continue;
- if (ioctl (pm->pm_fds[i][my_thread_index], PERF_EVENT_IOC_DISABLE, 0) <
- 0)
+ if (ioctl (pt->pm_fds[i], PERF_EVENT_IOC_DISABLE, 0) < 0)
clib_unix_warning ("disable ioctl");
- if (pm->perf_event_pages[i][my_thread_index])
+ if (pt->perf_event_pages[i])
{
- if (munmap (pm->perf_event_pages[i][my_thread_index],
- pm->page_size) < 0)
+ if (munmap (pt->perf_event_pages[i], pm->page_size) < 0)
clib_unix_warning ("munmap");
- pm->perf_event_pages[i][my_thread_index] = 0;
+ pt->perf_event_pages[i] = 0;
}
- (void) close (pm->pm_fds[i][my_thread_index]);
- pm->pm_fds[i][my_thread_index] = 0;
-
+ (void) close (pt->pm_fds[i]);
+ pt->pm_fds[i] = 0;
}
}
@@ -265,7 +254,7 @@ worker_thread_start_event (vlib_main_t * vm)
clib_callback_enable_disable (vm->worker_thread_main_loop_callbacks,
vm->worker_thread_main_loop_callback_tmp,
vm->worker_thread_main_loop_callback_lock,
- worker_thread_start_event, 0 /* enable */ );
+ worker_thread_start_event, 0 /* disable */ );
enable_current_events (pm);
}
@@ -276,7 +265,7 @@ worker_thread_stop_event (vlib_main_t * vm)
clib_callback_enable_disable (vm->worker_thread_main_loop_callbacks,
vm->worker_thread_main_loop_callback_tmp,
vm->worker_thread_main_loop_callback_lock,
- worker_thread_stop_event, 0 /* enable */ );
+ worker_thread_stop_event, 0 /* disable */ );
disable_events (pm);
}
@@ -329,14 +318,15 @@ scrape_and_clear_counters (perfmon_main_t * pm)
vlib_main_t *vm = pm->vlib_main;
vlib_main_t *stat_vm;
vlib_node_main_t *nm;
- vlib_node_t ***node_dups = 0;
- vlib_node_t **nodes;
- vlib_node_t *n;
+ perfmon_counters_t *ctr;
+ perfmon_counters_t *ctrs;
+ perfmon_counters_t **ctr_dups = 0;
+ perfmon_thread_t *pt;
perfmon_capture_t *c;
perfmon_event_config_t *current_event;
uword *p;
u8 *counter_name;
- u64 vectors_this_counter;
+ u32 len;
/* snapshoot the nodes, including pm counters */
vlib_worker_thread_barrier_sync (vm);
@@ -347,31 +337,16 @@ scrape_and_clear_counters (perfmon_main_t * pm)
if (stat_vm == 0)
continue;
- nm = &stat_vm->node_main;
-
- for (i = 0; i < vec_len (nm->nodes); i++)
- {
- n = nm->nodes[i];
- vlib_node_sync_stats (stat_vm, n);
- }
-
- nodes = 0;
- vec_validate (nodes, vec_len (nm->nodes) - 1);
- vec_add1 (node_dups, nodes);
-
- /* Snapshoot and clear the per-node perfmon counters */
- for (i = 0; i < vec_len (nm->nodes); i++)
+ pt = pm->threads[j];
+ len = vec_len (pt->counters);
+ ctrs = 0;
+ if (len)
{
- n = nm->nodes[i];
- nodes[i] = clib_mem_alloc (sizeof (*n));
- clib_memcpy_fast (nodes[i], n, sizeof (*n));
- n->stats_total.perf_counter0_ticks = 0;
- n->stats_total.perf_counter1_ticks = 0;
- n->stats_total.perf_counter_vectors = 0;
- n->stats_last_clear.perf_counter0_ticks = 0;
- n->stats_last_clear.perf_counter1_ticks = 0;
- n->stats_last_clear.perf_counter_vectors = 0;
+ vec_validate (ctrs, len - 1);
+ clib_memcpy (ctrs, pt->counters, len * sizeof (pt->counters[0]));
+ clib_memset (pt->counters, 0, len * sizeof (pt->counters[0]));
}
+ vec_add1 (ctr_dups, ctrs);
}
vlib_worker_thread_barrier_release (vm);
@@ -382,22 +357,21 @@ scrape_and_clear_counters (perfmon_main_t * pm)
if (stat_vm == 0)
continue;
- nodes = node_dups[j];
+ pt = pm->threads[j];
+ ctrs = ctr_dups[j];
- for (i = 0; i < vec_len (nodes); i++)
+ for (i = 0; i < vec_len (ctrs); i++)
{
u8 *capture_name;
- n = nodes[i];
+ ctr = &ctrs[i];
+ nm = &stat_vm->node_main;
- if (n->stats_total.perf_counter0_ticks == 0 &&
- n->stats_total.perf_counter1_ticks == 0)
- goto skip_this_node;
+ if (ctr->ticks[0] == 0 && ctr->ticks[1] == 0)
+ continue;
for (k = 0; k < 2; k++)
{
- u64 counter_value, counter_last_clear;
-
/*
* We collect 2 counters at once, except for the
* last counter when the user asks for an odd number of
@@ -407,20 +381,7 @@ scrape_and_clear_counters (perfmon_main_t * pm)
>= vec_len (pm->single_events_to_collect))
break;
- if (k == 0)
- {
- counter_value = n->stats_total.perf_counter0_ticks;
- counter_last_clear =
- n->stats_last_clear.perf_counter0_ticks;
- }
- else
- {
- counter_value = n->stats_total.perf_counter1_ticks;
- counter_last_clear =
- n->stats_last_clear.perf_counter1_ticks;
- }
-
- capture_name = format (0, "t%d-%v%c", j, n->name, 0);
+ capture_name = format (0, "t%d-%v%c", j, nm->nodes[i]->name, 0);
p = hash_get_mem (pm->capture_by_thread_and_node_name,
capture_name);
@@ -443,20 +404,15 @@ scrape_and_clear_counters (perfmon_main_t * pm)
current_event = pm->single_events_to_collect
+ pm->current_event + k;
counter_name = (u8 *) current_event->name;
- vectors_this_counter = n->stats_total.perf_counter_vectors -
- n->stats_last_clear.perf_counter_vectors;
vec_add1 (c->counter_names, counter_name);
- vec_add1 (c->counter_values,
- counter_value - counter_last_clear);
- vec_add1 (c->vectors_this_counter, vectors_this_counter);
+ vec_add1 (c->counter_values, ctr->ticks[k]);
+ vec_add1 (c->vectors_this_counter, ctr->vectors);
}
- skip_this_node:
- clib_mem_free (n);
}
- vec_free (nodes);
+ vec_free (ctrs);
}
- vec_free (node_dups);
+ vec_free (ctr_dups);
}
static void
@@ -492,9 +448,8 @@ handle_timeout (vlib_main_t * vm, perfmon_main_t * pm, f64 now)
for (i = 1; i < vec_len (vlib_mains); i++)
{
/* Has the worker actually stopped collecting data? */
- while (clib_callback_is_set
- (vlib_mains[i]->worker_thread_main_loop_callbacks,
- vlib_mains[i]->worker_thread_main_loop_callback_lock,
+ while (clib_callback_data_is_set
+ (&vm->vlib_node_runtime_perf_callbacks,
read_current_perf_counters))
{
if (vlib_time_now (vm) > deadman)
@@ -528,7 +483,7 @@ handle_timeout (vlib_main_t * vm, perfmon_main_t * pm, f64 now)
(vlib_mains[i]->worker_thread_main_loop_callbacks,
vlib_mains[i]->worker_thread_main_loop_callback_tmp,
vlib_mains[i]->worker_thread_main_loop_callback_lock,
- worker_thread_start_event, 1 /* enable */ );
+ worker_thread_start_event, 0 /* disable */ );
}
}
diff --git a/src/plugins/perfmon/perfmon_plugin.c b/src/plugins/perfmon/perfmon_plugin.c
new file mode 100644
index 00000000000..1d56573abd5
--- /dev/null
+++ b/src/plugins/perfmon/perfmon_plugin.c
@@ -0,0 +1,38 @@
+/*
+ * perfmon_plugin.c - perf monitor plugin
+ *
+ * Copyright (c) <current-year> <your-organization>
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/plugin/plugin.h>
+#include <vpp/app/version.h>
+
+/* *INDENT-OFF* */
+VLIB_PLUGIN_REGISTER () =
+{
+ .version = VPP_BUILD_VER,
+ .description = "Performance Monitor",
+#if !defined(__x86_64__)
+ .default_disabled = 1,
+#endif
+};
+/* *INDENT-ON* */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlib/cli.c b/src/vlib/cli.c
index d14ea683fb9..2697c0ae083 100644
--- a/src/vlib/cli.c
+++ b/src/vlib/cli.c
@@ -39,6 +39,7 @@
#include <vlib/vlib.h>
#include <vlib/unix/unix.h>
+#include <vppinfra/callback.h>
#include <vppinfra/cpu.h>
#include <vppinfra/elog.h>
#include <unistd.h>
@@ -563,10 +564,16 @@ vlib_cli_dispatch_sub_commands (vlib_main_t * vm,
if (!c->is_mp_safe)
vlib_worker_thread_barrier_sync (vm);
+ if (PREDICT_FALSE (vec_len (cm->perf_counter_cbs) != 0))
+ clib_call_callbacks (cm->perf_counter_cbs, cm,
+ c - cm->commands, 0 /* before */ );
c->hit_counter++;
c_error = c->function (vm, si, c);
+ if (PREDICT_FALSE (vec_len (cm->perf_counter_cbs) != 0))
+ clib_call_callbacks (cm->perf_counter_cbs, cm,
+ c - cm->commands, 1 /* after */ );
if (!c->is_mp_safe)
vlib_worker_thread_barrier_release (vm);
diff --git a/src/vlib/cli.h b/src/vlib/cli.h
index df9ed7212bc..0a8ef9d78d7 100644
--- a/src/vlib/cli.h
+++ b/src/vlib/cli.h
@@ -132,7 +132,7 @@ typedef struct vlib_cli_command_t
typedef void (vlib_cli_output_function_t) (uword arg,
u8 * buffer, uword buffer_bytes);
-typedef struct
+typedef struct vlib_cli_main_t
{
/* Vector of all known commands. */
vlib_cli_command_t *commands;
@@ -146,6 +146,12 @@ typedef struct
/* index vector, to sort commands, etc. */
u32 *sort_vector;
+
+ /* performance counter callback */
+ void (**perf_counter_cbs)
+ (struct vlib_cli_main_t *, u32 id, int before_or_after);
+ void (**perf_counter_cbs_tmp)
+ (struct vlib_cli_main_t *, u32 id, int before_or_after);
} vlib_cli_main_t;
#ifndef CLIB_MARCH_VARIANT
diff --git a/src/vlib/init.h b/src/vlib/init.h
index fc638013efc..68ac2f36717 100644
--- a/src/vlib/init.h
+++ b/src/vlib/init.h
@@ -317,6 +317,19 @@ static void __vlib_rm_config_function_##x (void) \
_error; \
})
+#define vlib_call_main_loop_enter_function(vm, x) \
+ ({ \
+ extern vlib_init_function_t * VLIB_MAIN_LOOP_ENTER_FUNCTION_SYMBOL (x); \
+ vlib_init_function_t * _f = VLIB_MAIN_LOOP_ENTER_FUNCTION_SYMBOL (x); \
+ clib_error_t * _error = 0; \
+ if (! hash_get (vm->init_functions_called, _f)) \
+ { \
+ hash_set1 (vm->init_functions_called, _f); \
+ _error = _f (vm); \
+ } \
+ _error; \
+ })
+
/* External functions. */
clib_error_t *vlib_call_all_init_functions (struct vlib_main_t *vm);
clib_error_t *vlib_call_all_config_functions (struct vlib_main_t *vm,
diff --git a/src/vlib/main.c b/src/vlib/main.c
index 8d7c6c09275..cb651e43a75 100644
--- a/src/vlib/main.c
+++ b/src/vlib/main.c
@@ -568,41 +568,29 @@ vlib_put_next_frame (vlib_main_t * vm,
never_inline void
vlib_node_runtime_sync_stats (vlib_main_t * vm,
vlib_node_runtime_t * r,
- uword n_calls, uword n_vectors, uword n_clocks,
- uword n_ticks0, uword n_ticks1)
+ uword n_calls, uword n_vectors, uword n_clocks)
{
vlib_node_t *n = vlib_get_node (vm, r->node_index);
n->stats_total.calls += n_calls + r->calls_since_last_overflow;
n->stats_total.vectors += n_vectors + r->vectors_since_last_overflow;
n->stats_total.clocks += n_clocks + r->clocks_since_last_overflow;
- n->stats_total.perf_counter0_ticks += n_ticks0 +
- r->perf_counter0_ticks_since_last_overflow;
- n->stats_total.perf_counter1_ticks += n_ticks1 +
- r->perf_counter1_ticks_since_last_overflow;
- n->stats_total.perf_counter_vectors += n_vectors +
- r->perf_counter_vectors_since_last_overflow;
n->stats_total.max_clock = r->max_clock;
n->stats_total.max_clock_n = r->max_clock_n;
r->calls_since_last_overflow = 0;
r->vectors_since_last_overflow = 0;
r->clocks_since_last_overflow = 0;
- r->perf_counter0_ticks_since_last_overflow = 0ULL;
- r->perf_counter1_ticks_since_last_overflow = 0ULL;
- r->perf_counter_vectors_since_last_overflow = 0ULL;
}
always_inline void __attribute__ ((unused))
vlib_process_sync_stats (vlib_main_t * vm,
vlib_process_t * p,
- uword n_calls, uword n_vectors, uword n_clocks,
- uword n_ticks0, uword n_ticks1)
+ uword n_calls, uword n_vectors, uword n_clocks)
{
vlib_node_runtime_t *rt = &p->node_runtime;
vlib_node_t *n = vlib_get_node (vm, rt->node_index);
- vlib_node_runtime_sync_stats (vm, rt, n_calls, n_vectors, n_clocks,
- n_ticks0, n_ticks1);
+ vlib_node_runtime_sync_stats (vm, rt, n_calls, n_vectors, n_clocks);
n->stats_total.suspends += p->n_suspends;
p->n_suspends = 0;
}
@@ -628,7 +616,7 @@ vlib_node_sync_stats (vlib_main_t * vm, vlib_node_t * n)
vec_elt_at_index (vm->node_main.nodes_by_type[n->type],
n->runtime_index);
- vlib_node_runtime_sync_stats (vm, rt, 0, 0, 0, 0, 0);
+ vlib_node_runtime_sync_stats (vm, rt, 0, 0, 0);
/* Sync up runtime next frame vector counters with main node structure. */
{
@@ -648,32 +636,21 @@ always_inline u32
vlib_node_runtime_update_stats (vlib_main_t * vm,
vlib_node_runtime_t * node,
uword n_calls,
- uword n_vectors, uword n_clocks,
- uword n_ticks0, uword n_ticks1)
+ uword n_vectors, uword n_clocks)
{
u32 ca0, ca1, v0, v1, cl0, cl1, r;
- u32 ptick00, ptick01, ptick10, ptick11, pvec0, pvec1;
cl0 = cl1 = node->clocks_since_last_overflow;
ca0 = ca1 = node->calls_since_last_overflow;
v0 = v1 = node->vectors_since_last_overflow;
- ptick00 = ptick01 = node->perf_counter0_ticks_since_last_overflow;
- ptick10 = ptick11 = node->perf_counter1_ticks_since_last_overflow;
- pvec0 = pvec1 = node->perf_counter_vectors_since_last_overflow;
ca1 = ca0 + n_calls;
v1 = v0 + n_vectors;
cl1 = cl0 + n_clocks;
- ptick01 = ptick00 + n_ticks0;
- ptick11 = ptick10 + n_ticks1;
- pvec1 = pvec0 + n_vectors;
node->calls_since_last_overflow = ca1;
node->clocks_since_last_overflow = cl1;
node->vectors_since_last_overflow = v1;
- node->perf_counter0_ticks_since_last_overflow = ptick01;
- node->perf_counter1_ticks_since_last_overflow = ptick11;
- node->perf_counter_vectors_since_last_overflow = pvec1;
node->max_clock_n = node->max_clock > n_clocks ?
node->max_clock_n : n_vectors;
@@ -681,42 +658,25 @@ vlib_node_runtime_update_stats (vlib_main_t * vm,
r = vlib_node_runtime_update_main_loop_vector_stats (vm, node, n_vectors);
- if (PREDICT_FALSE (ca1 < ca0 || v1 < v0 || cl1 < cl0) || (ptick01 < ptick00)
- || (ptick11 < ptick10) || (pvec1 < pvec0))
+ if (PREDICT_FALSE (ca1 < ca0 || v1 < v0 || cl1 < cl0))
{
node->calls_since_last_overflow = ca0;
node->clocks_since_last_overflow = cl0;
node->vectors_since_last_overflow = v0;
- node->perf_counter0_ticks_since_last_overflow = ptick00;
- node->perf_counter1_ticks_since_last_overflow = ptick10;
- node->perf_counter_vectors_since_last_overflow = pvec0;
- vlib_node_runtime_sync_stats (vm, node, n_calls, n_vectors, n_clocks,
- n_ticks0, n_ticks1);
+ vlib_node_runtime_sync_stats (vm, node, n_calls, n_vectors, n_clocks);
}
return r;
}
always_inline void
-vlib_node_runtime_perf_counter (vlib_main_t * vm, u64 * pmc0, u64 * pmc1,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame, int before_or_after)
-{
- *pmc0 = 0;
- *pmc1 = 0;
- if (PREDICT_FALSE (vec_len (vm->vlib_node_runtime_perf_counter_cbs) != 0))
- clib_call_callbacks (vm->vlib_node_runtime_perf_counter_cbs, vm, pmc0,
- pmc1, node, frame, before_or_after);
-}
-
-always_inline void
vlib_process_update_stats (vlib_main_t * vm,
vlib_process_t * p,
uword n_calls, uword n_vectors, uword n_clocks)
{
vlib_node_runtime_update_stats (vm, &p->node_runtime,
- n_calls, n_vectors, n_clocks, 0ULL, 0ULL);
+ n_calls, n_vectors, n_clocks);
}
static clib_error_t *
@@ -1166,7 +1126,6 @@ dispatch_node (vlib_main_t * vm,
u64 t;
vlib_node_main_t *nm = &vm->node_main;
vlib_next_frame_t *nf;
- u64 pmc_before[2], pmc_after[2], pmc_delta[2];
if (CLIB_DEBUG > 0)
{
@@ -1206,8 +1165,8 @@ dispatch_node (vlib_main_t * vm,
last_time_stamp, frame ? frame->n_vectors : 0,
/* is_after */ 0);
- vlib_node_runtime_perf_counter (vm, &pmc_before[0], &pmc_before[1],
- node, frame, 0 /* before */ );
+ vlib_node_runtime_perf_counter (vm, node, frame, 0, last_time_stamp,
+ VLIB_NODE_RUNTIME_PERF_BEFORE);
/*
* Turn this on if you run into
@@ -1237,15 +1196,8 @@ dispatch_node (vlib_main_t * vm,
t = clib_cpu_time_now ();
- /*
- * To validate accounting: pmc_delta = t - pmc_before;
- * perf ticks should equal clocks/pkt...
- */
- vlib_node_runtime_perf_counter (vm, &pmc_after[0], &pmc_after[1], node,
- frame, 1 /* after */ );
-
- pmc_delta[0] = pmc_after[0] - pmc_before[0];
- pmc_delta[1] = pmc_after[1] - pmc_before[1];
+ vlib_node_runtime_perf_counter (vm, node, frame, n, t,
+ VLIB_NODE_RUNTIME_PERF_AFTER);
vlib_elog_main_loop_event (vm, node->node_index, t, n, 1 /* is_after */ );
@@ -1255,9 +1207,7 @@ dispatch_node (vlib_main_t * vm,
v = vlib_node_runtime_update_stats (vm, node,
/* n_calls */ 1,
/* n_vectors */ n,
- /* n_clocks */ t - last_time_stamp,
- pmc_delta[0] /* PMC0 */ ,
- pmc_delta[1] /* PMC1 */ );
+ /* n_clocks */ t - last_time_stamp);
/* When in interrupt mode and vector rate crosses threshold switch to
polling mode. */
@@ -1579,6 +1529,9 @@ dispatch_process (vlib_main_t * vm,
old_process_index = nm->current_process_index;
nm->current_process_index = node->runtime_index;
+ vlib_node_runtime_perf_counter (vm, node_runtime, f, 0, last_time_stamp,
+ VLIB_NODE_RUNTIME_PERF_BEFORE);
+
n_vectors = vlib_process_startup (vm, p, f);
nm->current_process_index = old_process_index;
@@ -1618,6 +1571,9 @@ dispatch_process (vlib_main_t * vm,
vlib_elog_main_loop_event (vm, node_runtime->node_index, t, is_suspend,
/* is_after */ 1);
+ vlib_node_runtime_perf_counter (vm, node_runtime, f, n_vectors, t,
+ VLIB_NODE_RUNTIME_PERF_AFTER);
+
vlib_process_update_stats (vm, p,
/* n_calls */ !is_suspend,
/* n_vectors */ n_vectors,
@@ -1668,6 +1624,9 @@ dispatch_suspended_process (vlib_main_t * vm,
/* Save away current process for suspend. */
nm->current_process_index = node->runtime_index;
+ vlib_node_runtime_perf_counter (vm, node_runtime, f, 0, last_time_stamp,
+ VLIB_NODE_RUNTIME_PERF_BEFORE);
+
n_vectors = vlib_process_resume (vm, p);
t = clib_cpu_time_now ();
@@ -1701,6 +1660,9 @@ dispatch_suspended_process (vlib_main_t * vm,
vlib_elog_main_loop_event (vm, node_runtime->node_index, t, !is_suspend,
/* is_after */ 1);
+ vlib_node_runtime_perf_counter (vm, node_runtime, f, n_vectors, t,
+ VLIB_NODE_RUNTIME_PERF_AFTER);
+
vlib_process_update_stats (vm, p,
/* n_calls */ !is_suspend,
/* n_vectors */ n_vectors,
@@ -1831,11 +1793,14 @@ vlib_main_or_worker_loop (vlib_main_t * vm, int is_main)
else
frame_queue_check_counter--;
}
- if (PREDICT_FALSE (vec_len (vm->worker_thread_main_loop_callbacks)))
- clib_call_callbacks (vm->worker_thread_main_loop_callbacks, vm);
}
+ if (PREDICT_FALSE (vec_len (vm->worker_thread_main_loop_callbacks)))
+ clib_call_callbacks (vm->worker_thread_main_loop_callbacks, vm,
+ cpu_time_now);
+
/* Process pre-input nodes. */
+ cpu_time_now = clib_cpu_time_now ();
vec_foreach (n, nm->nodes_by_type[VLIB_NODE_TYPE_PRE_INPUT])
cpu_time_now = dispatch_node (vm, n,
VLIB_NODE_TYPE_PRE_INPUT,
diff --git a/src/vlib/main.h b/src/vlib/main.h
index 2e070aa6d64..f7a4a1c912a 100644
--- a/src/vlib/main.h
+++ b/src/vlib/main.h
@@ -40,6 +40,7 @@
#ifndef included_vlib_main_h
#define included_vlib_main_h
+#include <vppinfra/callback_data.h>
#include <vppinfra/elog.h>
#include <vppinfra/format.h>
#include <vppinfra/longjmp.h>
@@ -80,6 +81,42 @@ typedef struct
u32 trace_filter_set_index;
} vlib_trace_filter_t;
+typedef enum
+{
+ VLIB_NODE_RUNTIME_PERF_BEFORE,
+ VLIB_NODE_RUNTIME_PERF_AFTER,
+ VLIB_NODE_RUNTIME_PERF_RESET,
+} vlib_node_runtime_perf_call_type_t;
+
+typedef struct
+{
+ struct vlib_main_t *vm;
+ vlib_node_runtime_t *node;
+ vlib_frame_t *frame;
+ uword packets;
+ u64 cpu_time_now;
+ vlib_node_runtime_perf_call_type_t call_type;
+} vlib_node_runtime_perf_callback_args_t;
+
+struct vlib_node_runtime_perf_callback_data_t;
+
+typedef void (*vlib_node_runtime_perf_callback_fp_t)
+ (struct vlib_node_runtime_perf_callback_data_t * data,
+ vlib_node_runtime_perf_callback_args_t * args);
+
+typedef struct vlib_node_runtime_perf_callback_data_t
+{
+ vlib_node_runtime_perf_callback_fp_t fp;
+ union
+ {
+ void *v;
+ u64 u;
+ } u[3];
+} vlib_node_runtime_perf_callback_data_t;
+
+clib_callback_data_typedef (vlib_node_runtime_perf_callback_set_t,
+ vlib_node_runtime_perf_callback_data_t);
+
typedef struct vlib_main_t
{
CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
@@ -112,14 +149,8 @@ typedef struct vlib_main_t
u32 internal_node_last_vectors_per_main_loop;
/* Main loop hw / sw performance counters */
- void (**vlib_node_runtime_perf_counter_cbs) (struct vlib_main_t *,
- u64 *, u64 *,
- vlib_node_runtime_t *,
- vlib_frame_t *, int);
- void (**vlib_node_runtime_perf_counter_cb_tmp) (struct vlib_main_t *,
- u64 *, u64 *,
- vlib_node_runtime_t *,
- vlib_frame_t *, int);
+ vlib_node_runtime_perf_callback_set_t vlib_node_runtime_perf_callbacks;
+
/* Every so often we switch to the next counter. */
#define VLIB_LOG2_MAIN_LOOPS_PER_STATS_UPDATE 7
@@ -234,9 +265,10 @@ typedef struct vlib_main_t
u8 **argv;
/* Top of (worker) dispatch loop callback */
- void (**volatile worker_thread_main_loop_callbacks) (struct vlib_main_t *);
+ void (**volatile worker_thread_main_loop_callbacks)
+ (struct vlib_main_t *, u64 t);
void (**volatile worker_thread_main_loop_callback_tmp)
- (struct vlib_main_t *);
+ (struct vlib_main_t *, u64 t);
clib_spinlock_t worker_thread_main_loop_callback_lock;
/* debugging */
@@ -268,6 +300,12 @@ typedef struct vlib_main_t
/* Earliest barrier can be closed again */
f64 barrier_no_close_before;
+ /* Barrier counter callback */
+ void (**volatile barrier_perf_callbacks)
+ (struct vlib_main_t *, u64 t, int leave);
+ void (**volatile barrier_perf_callbacks_tmp)
+ (struct vlib_main_t *, u64 t, int leave);
+
/* Need to check the frame queues */
volatile uword check_frame_queues;
@@ -399,6 +437,27 @@ vlib_last_vectors_per_main_loop (vlib_main_t * vm)
return vm->internal_node_last_vectors_per_main_loop;
}
+always_inline void
+vlib_node_runtime_perf_counter (vlib_main_t * vm, vlib_node_runtime_t * node,
+ vlib_frame_t * frame, uword n, u64 t,
+ vlib_node_runtime_perf_call_type_t call_type)
+{
+ vlib_node_runtime_perf_callback_data_t *v =
+ clib_callback_data_check_and_get (&vm->vlib_node_runtime_perf_callbacks);
+ if (vec_len (v))
+ {
+ vlib_node_runtime_perf_callback_args_t args = {
+ .vm = vm,
+ .node = node,
+ .frame = frame,
+ .packets = n,
+ .cpu_time_now = t,
+ .call_type = call_type,
+ };
+ clib_callback_data_call_vec (v, &args);
+ }
+}
+
always_inline void vlib_set_queue_signal_callback
(vlib_main_t * vm, void (*fp) (vlib_main_t *))
{
diff --git a/src/vlib/node.h b/src/vlib/node.h
index 9c4cadd56f7..f7155aeda86 100644
--- a/src/vlib/node.h
+++ b/src/vlib/node.h
@@ -235,9 +235,6 @@ typedef struct
u64 calls, vectors, clocks, suspends;
u64 max_clock;
u64 max_clock_n;
- u64 perf_counter0_ticks;
- u64 perf_counter1_ticks;
- u64 perf_counter_vectors;
} vlib_node_stats_t;
#define foreach_vlib_node_state \
@@ -484,10 +481,6 @@ typedef struct vlib_node_runtime_t
u32 vectors_since_last_overflow; /**< Number of vector elements
processed by this node. */
- u32 perf_counter0_ticks_since_last_overflow; /**< Perf counter 0 ticks */
- u32 perf_counter1_ticks_since_last_overflow; /**< Perf counter 1 ticks */
- u32 perf_counter_vectors_since_last_overflow; /**< Perf counter vectors */
-
u32 next_frame_index; /**< Start of next frames for this
node. */
diff --git a/src/vlib/node_funcs.h b/src/vlib/node_funcs.h
index 89f212374e9..dfeba17ab09 100644
--- a/src/vlib/node_funcs.h
+++ b/src/vlib/node_funcs.h
@@ -201,6 +201,10 @@ vlib_node_set_state (vlib_main_t * vm, u32 node_index,
nm->input_node_counts_by_state[new_state] += 1;
}
+ if (PREDICT_FALSE (r->state == VLIB_NODE_STATE_DISABLED))
+ vlib_node_runtime_perf_counter (vm, r, 0, 0, 0,
+ VLIB_NODE_RUNTIME_PERF_RESET);
+
n->state = new_state;
r->state = new_state;
}
diff --git a/src/vlib/threads.c b/src/vlib/threads.c
index a8c1a1a207c..4df550e61fb 100644
--- a/src/vlib/threads.c
+++ b/src/vlib/threads.c
@@ -700,6 +700,9 @@ start_workers (vlib_main_t * vm)
clib_mem_alloc_aligned (CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES);
vm->elog_main.lock[0] = 0;
+ clib_callback_data_init (&vm->vlib_node_runtime_perf_callbacks,
+ &vm->worker_thread_main_loop_callback_lock);
+
if (n_vlib_mains > 1)
{
/* Replace hand-crafted length-1 vector with a real vector */
@@ -734,6 +737,7 @@ start_workers (vlib_main_t * vm)
vm->barrier_no_close_before = 0;
worker_thread_index = 1;
+ clib_spinlock_init (&vm->worker_thread_main_loop_callback_lock);
for (i = 0; i < vec_len (tm->registrations); i++)
{
@@ -790,6 +794,11 @@ start_workers (vlib_main_t * vm)
_vec_len (vm_clone->pending_rpc_requests) = 0;
clib_memset (&vm_clone->random_buffer, 0,
sizeof (vm_clone->random_buffer));
+ clib_spinlock_init
+ (&vm_clone->worker_thread_main_loop_callback_lock);
+ clib_callback_data_init
+ (&vm_clone->vlib_node_runtime_perf_callbacks,
+ &vm_clone->worker_thread_main_loop_callback_lock);
nm = &vlib_mains[0]->node_main;
nm_clone = &vm_clone->node_main;
@@ -1466,6 +1475,10 @@ vlib_worker_thread_barrier_sync_int (vlib_main_t * vm, const char *func_name)
return;
}
+ if (PREDICT_FALSE (vec_len (vm->barrier_perf_callbacks) != 0))
+ clib_call_callbacks (vm->barrier_perf_callbacks, vm,
+ vm->clib_time.last_cpu_time, 0 /* enter */ );
+
/*
* Need data to decide if we're working hard enough to honor
* the barrier hold-down timer.
@@ -1629,6 +1642,9 @@ vlib_worker_thread_barrier_release (vlib_main_t * vm)
barrier_trace_release (t_entry, t_closed_total, t_update_main);
+ if (PREDICT_FALSE (vec_len (vm->barrier_perf_callbacks) != 0))
+ clib_call_callbacks (vm->barrier_perf_callbacks, vm,
+ vm->clib_time.last_cpu_time, 1 /* leave */ );
}
/*
diff --git a/src/vlib/threads.h b/src/vlib/threads.h
index c1188cea933..e8d416997b0 100644
--- a/src/vlib/threads.h
+++ b/src/vlib/threads.h
@@ -16,6 +16,7 @@
#define included_vlib_threads_h
#include <vlib/main.h>
+#include <vppinfra/callback.h>
#include <linux/sched.h>
extern vlib_main_t **vlib_mains;
@@ -400,6 +401,10 @@ vlib_worker_thread_barrier_check (void)
u32 thread_index = vm->thread_index;
f64 t = vlib_time_now (vm);
+ if (PREDICT_FALSE (vec_len (vm->barrier_perf_callbacks) != 0))
+ clib_call_callbacks (vm->barrier_perf_callbacks, vm,
+ vm->clib_time.last_cpu_time, 0 /* enter */ );
+
if (PREDICT_FALSE (vlib_worker_threads->barrier_elog_enabled))
{
vlib_worker_thread_t *w = vlib_worker_threads + thread_index;
@@ -498,6 +503,10 @@ vlib_worker_thread_barrier_check (void)
ed->thread_index = thread_index;
ed->duration = (int) (1000000.0 * t);
}
+
+ if (PREDICT_FALSE (vec_len (vm->barrier_perf_callbacks) != 0))
+ clib_call_callbacks (vm->barrier_perf_callbacks, vm,
+ vm->clib_time.last_cpu_time, 1 /* leave */ );
}
}
diff --git a/src/vlibapi/api_common.h b/src/vlibapi/api_common.h
index 86b1c5ac3ee..915ddabaca1 100644
--- a/src/vlibapi/api_common.h
+++ b/src/vlibapi/api_common.h
@@ -224,7 +224,7 @@ typedef struct
} api_version_t;
/** API main structure, used by both vpp and binary API clients */
-typedef struct
+typedef struct api_main_t
{
/** Message handler vector */
void (**msg_handlers) (void *);
@@ -374,6 +374,12 @@ typedef struct
elog_main_t *elog_main;
int elog_trace_api_messages;
+ /** performance counter callback **/
+ void (**perf_counter_cbs)
+ (struct api_main_t *, u32 id, int before_or_after);
+ void (**perf_counter_cbs_tmp)
+ (struct api_main_t *, u32 id, int before_or_after);
+
} api_main_t;
extern __thread api_main_t *my_api_main;
diff --git a/src/vlibapi/api_shared.c b/src/vlibapi/api_shared.c
index caad6e54828..5e715d6f829 100644
--- a/src/vlibapi/api_shared.c
+++ b/src/vlibapi/api_shared.c
@@ -30,6 +30,7 @@
#include <vlib/unix/unix.h>
#include <vlibapi/api.h>
#include <vppinfra/elog.h>
+#include <vppinfra/callback.h>
/* *INDENT-OFF* */
api_main_t api_global_main =
@@ -493,7 +494,15 @@ msg_handler_internal (api_main_t * am,
(*endian_fp) (the_msg);
}
+ if (PREDICT_FALSE (vec_len (am->perf_counter_cbs) != 0))
+ clib_call_callbacks (am->perf_counter_cbs, am, id,
+ 0 /* before */ );
+
(*am->msg_handlers[id]) (the_msg);
+
+ if (PREDICT_FALSE (vec_len (am->perf_counter_cbs) != 0))
+ clib_call_callbacks (am->perf_counter_cbs, am, id,
+ 1 /* after */ );
if (!am->is_mp_safe[id])
vl_msg_api_barrier_release ();
}
@@ -620,8 +629,13 @@ vl_msg_api_handler_with_vm_node (api_main_t * am, svm_region_t * vlib_rp,
endian_fp = am->msg_endian_handlers[id];
(*endian_fp) (the_msg);
}
+ if (PREDICT_FALSE (vec_len (am->perf_counter_cbs) != 0))
+ clib_call_callbacks (am->perf_counter_cbs, am, id, 0 /* before */ );
(*handler) (the_msg, vm, node);
+
+ if (PREDICT_FALSE (vec_len (am->perf_counter_cbs) != 0))
+ clib_call_callbacks (am->perf_counter_cbs, am, id, 1 /* after */ );
if (is_private)
{
am->vlib_rp = old_vlib_rp;
diff --git a/src/vnet/interface.c b/src/vnet/interface.c
index dfefdbac921..6d5b3561f19 100644
--- a/src/vnet/interface.c
+++ b/src/vnet/interface.c
@@ -872,6 +872,8 @@ vnet_register_interface (vnet_main_t * vnm,
foreach_vlib_main ({
nrt = vlib_node_get_runtime (this_vlib_main, hw->output_node_index);
nrt->function = node->function;
+ vlib_node_runtime_perf_counter (this_vlib_main, nrt, 0, 0, 0,
+ VLIB_NODE_RUNTIME_PERF_RESET);
});
/* *INDENT-ON* */
@@ -882,6 +884,8 @@ vnet_register_interface (vnet_main_t * vnm,
foreach_vlib_main ({
nrt = vlib_node_get_runtime (this_vlib_main, hw->tx_node_index);
nrt->function = node->function;
+ vlib_node_runtime_perf_counter (this_vlib_main, nrt, 0, 0, 0,
+ VLIB_NODE_RUNTIME_PERF_RESET);
});
/* *INDENT-ON* */
diff --git a/src/vppinfra/CMakeLists.txt b/src/vppinfra/CMakeLists.txt
index a10f335dd52..8648275e0da 100644
--- a/src/vppinfra/CMakeLists.txt
+++ b/src/vppinfra/CMakeLists.txt
@@ -102,6 +102,7 @@ set(VPPINFRA_HEADERS
byte_order.h
cache.h
callback.h
+ callback_data.h
clib_error.h
clib.h
cpu.h
diff --git a/src/vppinfra/callback.h b/src/vppinfra/callback.h
index 595d69d72ab..a938ea326c9 100644
--- a/src/vppinfra/callback.h
+++ b/src/vppinfra/callback.h
@@ -70,12 +70,11 @@ do { \
* Note: fp exists to shut up gcc-6, which \
* produces a warning not seen with gcc-7 or 8 \
*/ \
- void (*fp)(void *a1, ...); \
+ typeof (h) h_ = (h); \
int i; \
- for (i = 0; i < vec_len (h); i++) \
+ for (i = 0; i < vec_len (h_); i++) \
{ \
- fp = (void *)(h[i]); \
- (*fp) (__VA_ARGS__); \
+ (h_[i]) (__VA_ARGS__); \
} \
} while (0);
diff --git a/src/vppinfra/callback_data.h b/src/vppinfra/callback_data.h
new file mode 100644
index 00000000000..9a1ad0a9778
--- /dev/null
+++ b/src/vppinfra/callback_data.h
@@ -0,0 +1,315 @@
+/*
+ * Copyright (c) 2020 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/** @file
+ * @brief Callback multiplex scheme
+ */
+
+#ifndef included_callback_data_h
+#define included_callback_data_h
+#include <vppinfra/clib.h>
+
+/** @brief Declare and define a callback set type
+ * @param set_t_ The set type to define
+ * @param cb_t_ The callback type to use
+ */
+#define clib_callback_data_typedef(set_t_, cb_t_) \
+typedef struct set_t_ \
+{ \
+ cb_t_* curr; \
+ cb_t_* volatile next; \
+ cb_t_* spare; \
+ clib_spinlock_t* lock; \
+} set_t_
+
+/** @brief Initialize a callback set
+ * @param set_ The callback set to initialize
+ * @param lock_ The lock to use, if any
+ */
+#define clib_callback_data_init(set_,lock_) \
+do { \
+ (set_)->lock = (lock_); \
+ (set_)->curr = 0; \
+ (set_)->next = 0; \
+ (set_)->spare = 0; \
+} while (0)
+
+/** @brief Add a callback to the specified callback set
+ * @param set_ The callback set
+ * @param value_ The value_ to assign the callback
+ *
+ * Add a callback from the indicated callback set. If the set is
+ * currently being iterated, then the change will be applied after the
+ * current full iteration, and prior to the next full iteration.
+ */
+#define clib_callback_data_add(set_,value_) \
+do { \
+ clib_spinlock_lock_if_init ((set_)->lock); \
+ typeof ((set_)->next) next_ = (set_)->next; \
+ if (PREDICT_TRUE (next_ == 0)) \
+ { \
+ next_ = (set_)->spare; \
+ (set_)->spare = 0; \
+ vec_append (next_, (set_)->curr); \
+ } \
+ u32 sz_ = vec_len (next_); \
+ vec_validate (next_, sz_); \
+ next_[sz_] = (value_); \
+ (set_)->next = next_; \
+ clib_spinlock_unlock_if_init ((set_)->lock); \
+} while (0)
+
+/** @brief Remove a callback from the specified callback set
+ * @param set_ The callback set
+ * @param fp_ The current callback function
+ * @return 1 if the function was removed, 0 if not
+ *
+ * Remove a callback from the indicated callback set. Idempotent. If
+ * the set is currently being iterated, then the change will be applied
+ * after the current full iteration, and prior to the next full
+ * iteration.
+ */
+#define clib_callback_data_remove(set_,fp_) \
+({ \
+ int found_ = 0; \
+ clib_spinlock_lock_if_init ((set_)->lock); \
+ typeof ((set_)->next) next_ = (set_)->next; \
+ if (PREDICT_TRUE (next_ == 0)) \
+ { \
+ next_ = (set_)->spare; \
+ (set_)->spare = 0; \
+ vec_append (next_, (set_)->curr); \
+ } \
+ u32 sz_ = vec_len (next_); \
+ u32 i_; \
+ for (i_ = 0; i_ < sz_; i_++) \
+ if (next_[i_].fp == (fp_)) \
+ { \
+ vec_delete (next_, 1, i_); \
+ found_ = 1; \
+ break; \
+ } \
+ (set_)->next = next_; \
+ clib_spinlock_unlock_if_init ((set_)->lock); \
+ found_; \
+})
+
+/** @brief Swap a callback in the specified callback set
+ * @param set_ The callback set
+ * @param fp_ The current callback function
+ * @param value_ The value_ to assign the callback
+ * @return 1 if the function was swapped, 0 if not
+ *
+ * Swap a callback in the indicated callback set. If the callback is
+ * not found, then nothing is done. If the set is currently being
+ * iterated, then the change will be applied after the current full
+ * iteration, and prior to the next full iteration.
+ */
+#define clib_callback_data_swap(set_,fp_,value_) \
+({ \
+ int found_ = 0; \
+ clib_spinlock_lock_if_init ((set_)->lock); \
+ typeof ((set_)->next) next_ = (set_)->next; \
+ if (PREDICT_TRUE (next_ == 0)) \
+ { \
+ next_ = (set_)->spare; \
+ (set_)->spare = 0; \
+ vec_append (next_, (set_)->curr); \
+ } \
+ u32 sz_ = vec_len (next_); \
+ u32 i_; \
+ for (i_ = 0; i_ < sz_; i_++) \
+ if (next_[i_].fp == (fp_)) \
+ { \
+ next_[i_] = (value_); \
+ found_ = 1; \
+ break; \
+ } \
+ (set_)->next = next_; \
+ clib_spinlock_unlock_if_init ((set_)->lock); \
+ found_; \
+})
+
+/** @brief Ensure a callback is in the specified callback set
+ * @param set_ The callback set
+ * @param value_ The value_ to assign the callback
+ * @return 1 if the function was swapped, 0 if not
+ *
+ * Add or swap a callback in the indicated callback set. If the
+ * callback is already in the set, it is replaced. If the callback is
+ * not found, then it is added. If the set is currently being
+ * iterated, then the change will be applied after the current full
+ * iteration, and prior to the next full iteration.
+ */
+#define clib_callback_data_ensure(set_,value_) \
+do { \
+ int found_ = 0; \
+ clib_spinlock_lock_if_init ((set_)->lock); \
+ typeof ((set_)->next) next_ = (set_)->next; \
+ if (PREDICT_TRUE (next_ == 0)) \
+ { \
+ next_ = (set_)->spare; \
+ (set_)->spare = 0; \
+ vec_append (next_, (set_)->curr); \
+ } \
+ u32 sz_ = vec_len (next_); \
+ u32 i_; \
+ for (i_ = 0; i_ < sz_; i_++) \
+ if (next_[i_].fp == (value_).fp) \
+ { \
+ found_ = 1; \
+ break; \
+ } \
+ if (!found_) \
+ vec_validate (next_, i_); \
+ next_[i_] = (value_); \
+ (set_)->next = next_; \
+ clib_spinlock_unlock_if_init ((set_)->lock); \
+} while(0)
+
+/** @brief Enable/Disable the specified callback
+ * @param set_ The callback set
+ * @param fp_ The callback function
+ * @param ena_ 1 to enable, 0 to disable
+ *
+ * Enable or disable a callback function, with no data.
+ */
+#define clib_callback_data_enable_disable(set_,fp_,ena_) \
+do { \
+ if (ena_) \
+ { \
+ typeof ((set_)->next[0]) data_ = { .fp = (fp_) }; \
+ clib_callback_data_add ((set_), data_); \
+ } \
+ else \
+ clib_callback_data_remove ((set_), (fp_)); \
+} while (0)
+
+/** @brief Get the value of a callback, if set.
+ * @param set_ The callback set
+ * @param fp_ The callback function
+ * @param v_ Set to the callback's current value
+ * @return 1 if the function is in the set, 0 if not
+ */
+#define clib_callback_data_get_value(set_,fp_,v_) \
+({ \
+ int found_ = 0; \
+ clib_spinlock_lock_if_init ((set_)->lock); \
+ typeof ((set_)->next) search_ = (set_)->next; \
+ if (PREDICT_TRUE (search_ == 0)) \
+ search_ = (set_)->curr; \
+ u32 sz_ = vec_len (search_); \
+ u32 i_; \
+ for (i_ = 0; i_ < sz_; i_++) \
+ if (search_[i_].fp == (fp_)) \
+ { \
+ (v_) = search_[i]; \
+ found_ = 1; \
+ break; \
+ } \
+ clib_spinlock_unlock_if_init ((set_)->lock); \
+ found_; \
+})
+
+/** @brief Check if callback is set
+ * @param set_ The callback set
+ * @param fp_ The callback function
+ * @return 1 if the function is in the set, 0 if not
+ */
+#define clib_callback_data_is_set(set_,fp_) \
+({ \
+ int found_ = 0; \
+ clib_spinlock_lock_if_init ((set_)->lock); \
+ typeof ((set_)->next) search_ = (set_)->next; \
+ if (PREDICT_TRUE (search_ == 0)) \
+ search_ = (set_)->curr; \
+ u32 sz_ = vec_len (search_); \
+ u32 i_; \
+ for (i_ = 0; i_ < sz_; i_++) \
+ if (search_[i_].fp == (fp_)) \
+ { \
+ found_ = 1; \
+ break; \
+ } \
+ clib_spinlock_unlock_if_init ((set_)->lock); \
+ found_; \
+})
+
+/** @brief Check for and get current callback set
+ * @param set_ the callback set
+ * @param varargs additional callback parameters
+ */
+#define clib_callback_data_check_and_get(set_) \
+({ \
+ typeof ((set_)->curr) curr_ = (set_)->curr; \
+ if (PREDICT_FALSE ((set_)->next != 0)) \
+ { \
+ clib_spinlock_lock_if_init ((set_)->lock); \
+ vec_reset_length (curr_); \
+ (set_)->spare = curr_; \
+ curr_ = (set_)->next; \
+ (set_)->next = 0; \
+ if (PREDICT_FALSE (0 == vec_len (curr_))) \
+ vec_free (curr_); \
+ (set_)->curr = curr_; \
+ clib_spinlock_unlock_if_init ((set_)->lock); \
+ } \
+ curr_; \
+})
+
+/** @brief Iterate and call a callback vector
+ * @param vec_ the callback vector
+ * @param varargs additional callback parameters
+ */
+#define clib_callback_data_call_vec(vec_, ...) \
+do { \
+ u32 sz_ = vec_len (vec_); \
+ u32 i_; \
+ for (i_ = 0; i_ < sz_; i_++) \
+ { \
+ CLIB_PREFETCH (&vec_[i_+1], CLIB_CACHE_LINE_BYTES, STORE); \
+ (vec_[i_].fp) (&vec_[i_], __VA_ARGS__); \
+ } \
+} while (0)
+
+/** @brief Call the specified callback set
+ * @param set_ the callback set
+ * @param varargs additional callback parameters
+ */
+#define clib_callback_data_call(set_, ...) \
+do { \
+ typeof ((set_)->curr) v_ = clib_callback_data_check_and_get(set_); \
+ clib_callback_data_iterate (v_, __VA_ARGS__); \
+} while (0)
+
+/** @brief prefetch the callback set
+ * @param set_ The callback set
+ */
+#define clib_callback_data_prefetch(set_) \
+do { \
+ if (PREDICT_FALSE ((set_)->curr)) \
+ CLIB_PREFETCH ((set_)->curr, CLIB_CACHE_LINE_BYTES, STORE); \
+} while (0)
+
+
+#endif /* included_callback_data_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */