/* * Copyright (c) 2020 Cisco and/or its affiliates. * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include "vppinfra/string.h" #include #include #include #include #include #include #include #include static_always_inline u64 perfmon_mmap_read_pmc1 (const struct perf_event_mmap_page *mmap_page) { u64 count; u32 seq; /* See documentation in /usr/include/linux/perf_event.h, for more details * but the 2 main important things are: * 1) if seq != mmap_page->lock, it means the kernel is currently updating * the user page and we need to read it again * 2) if idx == 0, it means the perf event is currently turned off and we * just need to read the kernel-updated 'offset', otherwise we must also * add the current hw value (hence rdmpc) */ do { u32 idx; seq = mmap_page->lock; CLIB_COMPILER_BARRIER (); idx = mmap_page->index; count = mmap_page->offset; if (idx) count += _rdpmc (idx - 1); CLIB_COMPILER_BARRIER (); } while (mmap_page->lock != seq); return count; } static_always_inline void perfmon_mmap_read_pmcs (u64 *counters, struct perf_event_mmap_page **mmap_pages, u8 n_counters) { switch (n_counters) { default: case 12: counters[11] = perfmon_mmap_read_pmc1 (mmap_pages[11]); case 11: counters[10] = perfmon_mmap_read_pmc1 (mmap_pages[10]); case 10: counters[9] = perfmon_mmap_read_pmc1 (mmap_pages[9]); case 9: counters[8] = perfmon_mmap_read_pmc1 (mmap_pages[8]); case 8: counters[7] = perfmon_mmap_read_pmc1 (mmap_pages[7]); case 7: counters[6] = perfmon_mmap_read_pmc1 (mmap_pages[6]); case 6: counters[5] = perfmon_mmap_read_pmc1 (mmap_pages[5]); case 5: counters[4] = perfmon_mmap_read_pmc1 (mmap_pages[4]); case 4: counters[3] = perfmon_mmap_read_pmc1 (mmap_pages[3]); case 3: counters[2] = perfmon_mmap_read_pmc1 (mmap_pages[2]); case 2: counters[1] = perfmon_mmap_read_pmc1 (mmap_pages[1]); case 1: counters[0] = perfmon_mmap_read_pmc1 (mmap_pages[0]); break; } } uword perfmon_dispatch_wrapper_mmap (vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) { perfmon_main_t *pm = &perfmon_main; perfmon_thread_runtime_t *rt = vec_elt_at_index (pm->thread_runtimes, vm->thread_index); perfmon_node_stats_t *s = vec_elt_at_index (rt->node_stats, node->node_index); u8 n_events = rt->n_events; u64 before[PERF_MAX_EVENTS]; u64 after[PERF_MAX_EVENTS]; uword rv; clib_prefetch_load (s); perfmon_mmap_read_pmcs (&before[0], rt->mmap_pages, n_events); rv = node->function (vm, node, frame); perfmon_mmap_read_pmcs (&after[0], rt->mmap_pages, n_events); if (rv == 0) return rv; s->n_calls += 1; s->n_packets += rv; for (int i = 0; i < n_events; i++) s->value[i] += after[i] - before[i]; return rv; } static_always_inline void perfmon_metric_read_pmcs (u64 *counters, int *pmc_index, u8 n_counters) { switch (n_counters) { default: case 12: counters[11] = _rdpmc (pmc_index[11]); case 11: counters[10] = _rdpmc (pmc_index[10]); case 10: counters[9] = _rdpmc (pmc_index[9]); case 9: counters[8] = _rdpmc (pmc_index[8]); case 8: counters[7] = _rdpmc (pmc_index[7]); case 7: counters[6] = _rdpmc (pmc_index[6]); case 6: counters[5] = _rdpmc (pmc_index[5]); case 5: counters[4] = _rdpmc (pmc_index[4]); case 4: counters[3] = _rdpmc (pmc_index[3]); case 3: counters[2] = _rdpmc (pmc_index[2]); case 2: counters[1] = _rdpmc (pmc_index[1]); case 1: counters[0] = _rdpmc (pmc_index[0]); break; } } static_always_inline int perfmon_metric_index (perfmon_bundle_t *b, u8 i) { return (int) (b->metrics[i]); } uword perfmon_dispatch_wrapper_metrics (vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) { perfmon_main_t *pm = &perfmon_main; perfmon_thread_runtime_t *rt = vec_elt_at_index (pm->thread_runtimes, vm->thread_index); perfmon_node_stats_t *s = vec_elt_at_index (rt->node_stats, node->node_index); u8 n_events = rt->n_events; u64 before[PERF_MAX_EVENTS]; int pmc_index[PERF_MAX_EVENTS]; uword rv; clib_prefetch_load (s); switch (n_events) { default: case 12: pmc_index[11] = perfmon_metric_index (rt->bundle, 11); case 11: pmc_index[10] = perfmon_metric_index (rt->bundle, 10); case 10: pmc_index[9] = perfmon_metric_index (rt->bundle, 9); case 9: pmc_index[8] = perfmon_metric_index (rt->bundle, 8); case 8: pmc_index[7] = perfmon_metric_index (rt->bundle, 7); case 7: pmc_index[6] = perfmon_metric_index (rt->bundle, 6); case 6: pmc_index[5] = perfmon_metric_index (rt->bundle, 5); case 5: pmc_index[4] = perfmon_metric_index (rt->bundle, 4); case 4: pmc_index[3] = perfmon_metric_index (rt->bundle, 3); case 3: pmc_index[2] = perfmon_metric_index (rt->bundle, 2); case 2: pmc_index[1] = perfmon_metric_index (rt->bundle, 1); case 1: pmc_index[0] = perfmon_metric_index (rt->bundle, 0); break; } perfmon_metric_read_pmcs (&before[0], pmc_index, n_events); rv = node->function (vm, node, frame); clib_memcpy_fast (&s->t[0].value[0], &before, sizeof (before)); perfmon_metric_read_pmcs (&s->t[1].value[0], pmc_index, n_events); if (rv == 0) return rv; s->n_calls += 1; s->n_packets += rv; return rv; }