/* * Copyright (c) 2020 Cisco and/or its affiliates. * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include "vppinfra/string.h" #include #include #include #include #include #include #include #include vlib_node_function_t *perfmon_dispatch_wrappers[PERF_MAX_EVENTS + 1]; static_always_inline void perfmon_read_pmcs (u64 *counters, u32 *indexes, u8 n_counters) { for (int i = 0; i < n_counters; i++) counters[i] = _rdpmc (indexes[i] - 1); } static_always_inline uword perfmon_dispatch_wrapper_inline (vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame, u8 n_events) { perfmon_main_t *pm = &perfmon_main; perfmon_thread_runtime_t *rt = vec_elt_at_index (pm->thread_runtimes, vm->thread_index); perfmon_node_stats_t *s = vec_elt_at_index (rt->node_stats, node->node_index); struct { u64 t[2][PERF_MAX_EVENTS]; } samples; uword rv; clib_prefetch_load (s); perfmon_read_pmcs (&samples.t[0][0], &rt->indexes[0], n_events); rv = node->function (vm, node, frame); perfmon_read_pmcs (&samples.t[1][0], &rt->indexes[0], n_events); if (rv == 0) return rv; s->n_calls += 1; s->n_packets += rv; for (int i = 0; i < n_events; i++) { if (!(rt->preserve_samples & 1 << i)) { s->value[i] += samples.t[1][i] - samples.t[0][i]; } else { s->t[0].value[i] = samples.t[0][i]; s->t[1].value[i] = samples.t[1][i]; } } return rv; } static_always_inline u32 perfmon_mmap_read_index (const struct perf_event_mmap_page *mmap_page) { u32 idx; u32 seq; /* See documentation in /usr/include/linux/perf_event.h, for more details * but the 2 main important things are: * 1) if seq != mmap_page->lock, it means the kernel is currently updating * the user page and we need to read it again * 2) if idx == 0, it means the perf event is currently turned off and we * just need to read the kernel-updated 'offset', otherwise we must also * add the current hw value (hence rdmpc) */ do { seq = mmap_page->lock; CLIB_COMPILER_BARRIER (); idx = mmap_page->index; CLIB_COMPILER_BARRIER (); } while (mmap_page->lock != seq); return idx; } static_always_inline clib_error_t * read_mmap_indexes (perfmon_bundle_t *b) { perfmon_main_t *pm = &perfmon_main; for (int i = 0; i < vec_len (pm->thread_runtimes); i++) { perfmon_thread_runtime_t *tr; tr = vec_elt_at_index (pm->thread_runtimes, i); for (int j = 0; j < b->n_events; j++) { tr->indexes[j] = perfmon_mmap_read_index (tr->mmap_pages[j]); /* if a zero index is returned generate error */ if (!tr->indexes[j]) { return clib_error_return (0, "invalid rdpmc index"); } } } return 0; } clib_error_t * intel_config_dispatch_wrapper (perfmon_bundle_t *b, vlib_node_function_t **dispatch_wrapper) { clib_error_t *err = 0; if ((err = read_mmap_indexes (b)) != 0) return err; (*dispatch_wrapper) = perfmon_dispatch_wrappers[b->n_events]; return 0; } #define foreach_n_events \ _ (1) _ (2) _ (3) _ (4) _ (5) _ (6) _ (7) _ (8) _ (9) _ (10) _ (11) _ (12) #define _(x) \ static uword perfmon_dispatch_wrapper##x ( \ vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) \ { \ return perfmon_dispatch_wrapper_inline (vm, node, frame, x); \ } foreach_n_events #undef _ vlib_node_function_t *perfmon_dispatch_wrappers[PERF_MAX_EVENTS + 1] = { #define _(x) [x] = &perfmon_dispatch_wrapper##x, foreach_n_events #undef _ };