From d255d2545ee6cdc871bc35314fad72c3c48b225b Mon Sep 17 00:00:00 2001 From: pmikus Date: Mon, 19 Apr 2021 12:22:20 +0000 Subject: Framework: Telemetry retake Signed-off-by: pmikus Change-Id: I2f019a083916aec9f7816266f6ad5b92dcc31fa0 --- resources/templates/telemetry/bpf_runtime.yaml | 130 ++++++ resources/templates/telemetry/vpp_clear_stats.yaml | 171 +++++++ resources/templates/telemetry/vpp_runtime.yaml | 508 +++++++++++++++++++++ resources/templates/telemetry/vpp_show_stats.yaml | 154 +++++++ 4 files changed, 963 insertions(+) create mode 100644 resources/templates/telemetry/bpf_runtime.yaml create mode 100644 resources/templates/telemetry/vpp_clear_stats.yaml create mode 100644 resources/templates/telemetry/vpp_runtime.yaml create mode 100644 resources/templates/telemetry/vpp_show_stats.yaml (limited to 'resources/templates/telemetry') diff --git a/resources/templates/telemetry/bpf_runtime.yaml b/resources/templates/telemetry/bpf_runtime.yaml new file mode 100644 index 0000000000..88ad7eb64f --- /dev/null +++ b/resources/templates/telemetry/bpf_runtime.yaml @@ -0,0 +1,130 @@ +--- +logging: + version: 1 + formatters: + console: + format: '%(asctime)s - %(name)s - %(message)s' + prom: + format: '%(message)s' + handlers: + console: + class: logging.StreamHandler + level: INFO + formatter: console + stream: ext://sys.stdout + prom: + class: logging.handlers.RotatingFileHandler + level: INFO + formatter: prom + filename: /tmp/metric.prom + mode: w + loggers: + prom: + handlers: [prom] + level: INFO + propagate: False + root: + level: INFO + handlers: [console] +scheduler: + duration: 1 +programs: + - name: bundle_bpf + metrics: + counter: + - name: cpu_cycle + documentation: Cycles processed by CPUs + namespace: bpf + labelnames: + - name + - cpu + - pid + - name: cpu_instruction + documentation: Instructions retired by CPUs + namespace: bpf + labelnames: + - name + - cpu + - pid + - name: llc_reference + documentation: Last level cache operations by type + namespace: bpf + labelnames: + - name + - cpu + - pid + - name: llc_miss + documentation: Last level cache operations by type + namespace: bpf + labelnames: + - name + - cpu + - pid + events: + - type: 0x0 # HARDWARE + name: 0x0 # PERF_COUNT_HW_CPU_CYCLES + target: on_cpu_cycle + table: cpu_cycle + - type: 0x0 # HARDWARE + name: 0x1 # PERF_COUNT_HW_INSTRUCTIONS + target: on_cpu_instruction + table: cpu_instruction + - type: 0x0 # HARDWARE + name: 0x2 # PERF_COUNT_HW_CACHE_REFERENCES + target: on_cache_reference + table: llc_reference + - type: 0x0 # HARDWARE + name: 0x3 # PERF_COUNT_HW_CACHE_MISSES + target: on_cache_miss + table: llc_miss + code: | + #include + #include + + const int max_cpus = 256; + + struct key_t { + int cpu; + int pid; + char name[TASK_COMM_LEN]; + }; + + BPF_HASH(llc_miss, struct key_t); + BPF_HASH(llc_reference, struct key_t); + BPF_HASH(cpu_instruction, struct key_t); + BPF_HASH(cpu_cycle, struct key_t); + + static inline __attribute__((always_inline)) void get_key(struct key_t* key) { + key->cpu = bpf_get_smp_processor_id(); + key->pid = bpf_get_current_pid_tgid(); + bpf_get_current_comm(&(key->name), sizeof(key->name)); + } + + int on_cpu_cycle(struct bpf_perf_event_data *ctx) { + struct key_t key = {}; + get_key(&key); + + cpu_cycle.increment(key, ctx->sample_period); + return 0; + } + int on_cpu_instruction(struct bpf_perf_event_data *ctx) { + struct key_t key = {}; + get_key(&key); + + cpu_instruction.increment(key, ctx->sample_period); + return 0; + } + int on_cache_reference(struct bpf_perf_event_data *ctx) { + struct key_t key = {}; + get_key(&key); + + llc_reference.increment(key, ctx->sample_period); + return 0; + } + int on_cache_miss(struct bpf_perf_event_data *ctx) { + struct key_t key = {}; + get_key(&key); + + llc_miss.increment(key, ctx->sample_period); + return 0; + } diff --git a/resources/templates/telemetry/vpp_clear_stats.yaml b/resources/templates/telemetry/vpp_clear_stats.yaml new file mode 100644 index 0000000000..9391502654 --- /dev/null +++ b/resources/templates/telemetry/vpp_clear_stats.yaml @@ -0,0 +1,171 @@ +--- +logging: + version: 1 + formatters: + console: + format: '%(asctime)s - %(name)s - %(message)s' + prom: + format: '%(message)s' + handlers: + console: + class: logging.StreamHandler + level: INFO + formatter: console + stream: ext://sys.stdout + prom: + class: logging.handlers.RotatingFileHandler + level: INFO + formatter: prom + filename: /tmp/metric.prom + mode: w + loggers: + prom: + handlers: [prom] + level: INFO + propagate: False + root: + level: INFO + handlers: [console] +scheduler: + duration: 1 +programs: + - name: bundle_vpp + metrics: + gauge: + - name: calls + documentation: Number of calls total + labelnames: + - name + - state + - thread_name + - thread_id + - thread_lcore + - name: vectors + documentation: Number of vectors total + labelnames: + - name + - state + - thread_name + - thread_id + - thread_lcore + - name: suspends + documentation: Number of suspends total + labelnames: + - name + - state + - thread_name + - thread_id + - thread_lcore + - name: clocks + documentation: Number of clocks total + labelnames: + - name + - state + - thread_name + - thread_id + - thread_lcore + - name: vectors_calls + documentation: Number of vectors per call + labelnames: + - name + - state + - thread_name + - thread_id + - thread_lcore + code: | + clear runtime + - name: bundle_vpp + metrics: + gauge: + - name: rx_packets + documentation: Number of received packets for interface + labelnames: + - name + - index + - name: rx_bytes + documentation: Number of received bytes for interface + labelnames: + - name + - index + - name: rx_error + documentation: Number of errors on interface + labelnames: + - name + - index + - name: tx_packets + documentation: Number of transitted packets for interface + labelnames: + - name + - index + - name: tx_bytes + documentation: Number of transitted bytes for interface + labelnames: + - name + - index + - name: tx_error + documentation: Number of errors on interface + labelnames: + - name + - index + - name: drops + documentation: Number of dropped packets for interface + labelnames: + - name + - index + - name: punt + documentation: Number of punted packets for interface + labelnames: + - name + - index + - name: ip4 + documentation: Number of IPv4 packets for interface + labelnames: + - name + - index + - name: ip6 + documentation: Number of IPv6 packets for interface + labelnames: + - name + - index + - name: rx_no_buf + documentation: Number of out of buffer RX packets on interface + labelnames: + - name + - index + - name: rx_miss + documentation: Number of missed RX packets on interface + labelnames: + - name + - index + code: | + clear interfaces + - name: bundle_vpp + metrics: + gauge: + - name: node_counter + documentation: Node counter + labelnames: + - name + - reason + - severity + - thread_name + - thread_id + code: | + clear node counters + - name: bundle_vpp + metrics: + gauge: + - name: calls + documentation: Load operations + labelnames: + - name + - thread_name + - thread_id + - name: packets + documentation: Load operations + labelnames: + - name + - thread_name + - thread_id + code: | + perfmon reset diff --git a/resources/templates/telemetry/vpp_runtime.yaml b/resources/templates/telemetry/vpp_runtime.yaml new file mode 100644 index 0000000000..4f0f6d7a7d --- /dev/null +++ b/resources/templates/telemetry/vpp_runtime.yaml @@ -0,0 +1,508 @@ +--- +logging: + version: 1 + formatters: + console: + format: '%(asctime)s - %(name)s - %(message)s' + prom: + format: '%(message)s' + handlers: + console: + class: logging.StreamHandler + level: INFO + formatter: console + stream: ext://sys.stdout + prom: + class: logging.handlers.RotatingFileHandler + level: INFO + formatter: prom + filename: /tmp/metric.prom + mode: w + loggers: + prom: + handlers: [prom] + level: INFO + propagate: False + root: + level: INFO + handlers: [console] +scheduler: + duration: 1 +programs: + - name: bundle_vpp + metrics: + info: + - name: version + documentation: VPP version + namespace: vpp + subsystem: version + labelnames: + - version + code: | + show version + - name: bundle_vpp + metrics: + gauge: + - name: calls + documentation: Number of calls total + namespace: vpp + subsystem: runtime + labelnames: + - name + - state + - thread_name + - thread_id + - thread_lcore + - name: vectors + documentation: Number of vectors total + namespace: vpp + subsystem: runtime + labelnames: + - name + - state + - thread_name + - thread_id + - thread_lcore + - name: suspends + documentation: Number of suspends total + namespace: vpp + subsystem: runtime + labelnames: + - name + - state + - thread_name + - thread_id + - thread_lcore + - name: clocks + documentation: Number of clocks total + namespace: vpp + subsystem: runtime + labelnames: + - name + - state + - thread_name + - thread_id + - thread_lcore + - name: vectors_calls + documentation: Number of vectors per call + namespace: vpp + subsystem: runtime + labelnames: + - name + - state + - thread_name + - thread_id + - thread_lcore + code: | + clear runtime + wait {duration} + show runtime + - name: bundle_vpp + metrics: + gauge: + - name: rx_packets + documentation: Number of received packets for interface + namespace: vpp + subsystem: interface + labelnames: + - name + - index + - name: rx_bytes + documentation: Number of received bytes for interface + namespace: vpp + subsystem: interface + labelnames: + - name + - index + - name: rx_error + documentation: Number of errors on interface + namespace: vpp + subsystem: interface + labelnames: + - name + - index + - name: tx_packets + documentation: Number of transitted packets for interface + namespace: vpp + subsystem: interface + labelnames: + - name + - index + - name: tx_bytes + documentation: Number of transitted bytes for interface + namespace: vpp + subsystem: interface + labelnames: + - name + - index + - name: tx_error + documentation: Number of errors on interface + namespace: vpp + subsystem: interface + labelnames: + - name + - index + - name: drops + documentation: Number of dropped packets for interface + namespace: vpp + subsystem: interface + labelnames: + - name + - index + - name: punt + documentation: Number of punted packets for interface + namespace: vpp + subsystem: interface + labelnames: + - name + - index + - name: ip4 + documentation: Number of IPv4 packets for interface + namespace: vpp + subsystem: interface + labelnames: + - name + - index + - name: ip6 + documentation: Number of IPv6 packets for interface + namespace: vpp + subsystem: interface + labelnames: + - name + - index + - name: rx_no_buf + documentation: Number of out of buffer RX packets on interface + namespace: vpp + subsystem: interface + labelnames: + - name + - index + - name: rx_miss + documentation: Number of missed RX packets on interface + namespace: vpp + subsystem: interface + labelnames: + - name + - index + code: | + clear interfaces + wait {duration} + show interface + - name: bundle_vpp + metrics: + gauge: + - name: node_counter + documentation: Node counter + namespace: vpp + subsystem: counters + labelnames: + - name + - reason + - severity + - thread_name + - thread_id + code: | + clear node counters + wait {duration} + show node counters verbose + - name: bundle_vpp + metrics: + gauge: + - name: context_switches + documentation: Per-thread context switches + namespace: vpp + subsystem: context_switches + labelnames: + - name + - id + code: | + perfmon reset + perfmon start bundle context-switches + wait {duration} + perfmon stop + show perfmon statistics + - name: bundle_vpp + metrics: + gauge: + - name: minor_page_faults + documentation: Per-thread page faults (minor) + namespace: vpp + subsystem: page_faults + labelnames: + - name + - id + - name: major_page_faults + documentation: Per-thread page faults (major) + namespace: vpp + subsystem: page_faults + labelnames: + - name + - id + code: | + perfmon reset + perfmon start bundle page-faults + wait {duration} + perfmon stop + show perfmon statistics + - name: bundle_vpp + metrics: + gauge: + - name: calls + documentation: Instructions/packet, cycles/packet and IPC (calls) + namespace: vpp + subsystem: inst_and_clock + labelnames: + - name + - thread_name + - thread_id + - name: packets + documentation: Instructions/packet, cycles/packet and IPC (packets) + namespace: vpp + subsystem: inst_and_clock + labelnames: + - name + - thread_name + - thread_id + - name: packets_per_call + documentation: Instructions/packet, cycles/packet and IPC (packets/call) + namespace: vpp + subsystem: inst_and_clock + labelnames: + - name + - thread_name + - thread_id + - name: clocks_per_packets + documentation: Instructions/packet, cycles/packet and IPC (clocks/packets) + namespace: vpp + subsystem: inst_and_clock + labelnames: + - name + - thread_name + - thread_id + - name: instructions_per_packets + documentation: Instructions/packet, cycles/packet and IPC (clocks/packets) + namespace: vpp + subsystem: inst_and_clock + labelnames: + - name + - thread_name + - thread_id + - name: ipc + documentation: Instructions/packet, cycles/packet and IPC (clocks/packets) + namespace: vpp + subsystem: inst_and_clock + labelnames: + - name + - thread_name + - thread_id + code: | + perfmon reset + perfmon start bundle inst-and-clock + wait {duration} + perfmon stop + show perfmon statistics + - name: bundle_vpp + metrics: + gauge: + - name: l1_hit + documentation: Cache hits and misses (L1 hit) + namespace: vpp + subsystem: cache_hierarchy + labelnames: + - name + - thread_name + - thread_id + - name: l1_miss + documentation: Cache hits and misses (L1 miss) + namespace: vpp + subsystem: cache_hierarchy + labelnames: + - name + - thread_name + - thread_id + - name: l2_hit + documentation: Cache hits and misses (L2 hit) + namespace: vpp + subsystem: cache_hierarchy + labelnames: + - name + - thread_name + - thread_id + - name: l2_miss + documentation: Cache hits and misses (L2 miss) + namespace: vpp + subsystem: cache_hierarchy + labelnames: + - name + - thread_name + - thread_id + - name: l3_hit + documentation: Cache hits and misses (L3 hit) + namespace: vpp + subsystem: cache_hierarchy + labelnames: + - name + - thread_name + - thread_id + - name: l3_miss + documentation: Cache hits and misses (L3 miss) + namespace: vpp + subsystem: cache_hierarchy + labelnames: + - name + - thread_name + - thread_id + code: | + perfmon reset + perfmon start bundle cache-hierarchy + wait {duration} + perfmon stop + show perfmon statistics + - name: bundle_vpp + metrics: + gauge: + - name: calls + documentation: Load operations (calls) + namespace: vpp + subsystem: load_blocks + labelnames: + - name + - thread_name + - thread_id + - name: packets + documentation: Load operations (packets) + namespace: vpp + subsystem: load_blocks + labelnames: + - name + - thread_name + - thread_id + code: | + perfmon reset + perfmon start bundle load-blocks + wait {duration} + perfmon stop + show perfmon statistics + - name: bundle_vpp + metrics: + gauge: + - name: branches_per_call + documentation: Branches/call + namespace: vpp + subsystem: branch_mispred + labelnames: + - name + - thread_name + - thread_id + - name: branches_per_packet + documentation: Branches/packet + namespace: vpp + subsystem: branch_mispred + labelnames: + - name + - thread_name + - thread_id + - name: taken_per_call + documentation: Taken/call + namespace: vpp + subsystem: branch_mispred + labelnames: + - name + - thread_name + - thread_id + - name: taken_per_packet + documentation: Taken/packet + namespace: vpp + subsystem: branch_mispred + labelnames: + - name + - thread_name + - thread_id + - name: mis_predictions + documentation: Mis-predictions + namespace: vpp + subsystem: branch_mispred + labelnames: + - name + - thread_name + - thread_id + code: | + perfmon reset + perfmon start bundle branch-mispred + wait {duration} + perfmon stop + show perfmon statistics + - name: bundle_vpp + metrics: + gauge: + - name: lvl0 + documentation: Branches/call + namespace: vpp + subsystem: power_licensing + labelnames: + - name + - thread_name + - thread_id + - name: lvl1 + documentation: Branches/packet + namespace: vpp + subsystem: power_licensing + labelnames: + - name + - thread_name + - thread_id + - name: lvl2 + documentation: Taken/call + namespace: vpp + subsystem: power_licensing + labelnames: + - name + - thread_name + - thread_id + - name: throttle + documentation: Taken/packet + namespace: vpp + subsystem: power_licensing + labelnames: + - name + - thread_name + - thread_id + code: | + perfmon reset + perfmon start bundle power-licensing + wait {duration} + perfmon stop + show perfmon statistics + - name: bundle_vpp + metrics: + gauge: + - name: runtime + documentation: RunTime + namespace: vpp + subsystem: memory_bandwidth + labelnames: + - name + - name: reads_mbs + documentation: Reads (MB/s) + namespace: vpp + subsystem: memory_bandwidth + labelnames: + - name + - name: writes_mbs + documentation: Writes (MB/s) + namespace: vpp + subsystem: memory_bandwidth + labelnames: + - name + - name: total_mbs + documentation: Total (MB/s) + namespace: vpp + subsystem: memory_bandwidth + labelnames: + - name + code: | + perfmon reset + perfmon start bundle memory-bandwidth + wait {duration} + perfmon stop + show perfmon statistics diff --git a/resources/templates/telemetry/vpp_show_stats.yaml b/resources/templates/telemetry/vpp_show_stats.yaml new file mode 100644 index 0000000000..16c895447c --- /dev/null +++ b/resources/templates/telemetry/vpp_show_stats.yaml @@ -0,0 +1,154 @@ +--- +logging: + version: 1 + formatters: + console: + format: '%(asctime)s - %(name)s - %(message)s' + prom: + format: '%(message)s' + handlers: + console: + class: logging.StreamHandler + level: INFO + formatter: console + stream: ext://sys.stdout + prom: + class: logging.handlers.RotatingFileHandler + level: INFO + formatter: prom + filename: /tmp/metric.prom + mode: w + loggers: + prom: + handlers: [prom] + level: INFO + propagate: False + root: + level: INFO + handlers: [console] +scheduler: + duration: 1 +programs: + - name: bundle_vpp + metrics: + counter: + - name: calls + documentation: Number of calls total + labelnames: + - name + - state + - thread_name + - thread_id + - thread_lcore + - name: vectors + documentation: Number of vectors total + labelnames: + - name + - state + - thread_name + - thread_id + - thread_lcore + - name: suspends + documentation: Number of suspends total + labelnames: + - name + - state + - thread_name + - thread_id + - thread_lcore + - name: clocks + documentation: Number of clocks total + labelnames: + - name + - state + - thread_name + - thread_id + - thread_lcore + - name: vectors_calls + documentation: Number of vectors per call + labelnames: + - name + - state + - thread_name + - thread_id + - thread_lcore + code: | + show runtime + - name: bundle_vpp + metrics: + counter: + - name: rx_packets + documentation: Number of received packets for interface + labelnames: + - name + - index + - name: rx_bytes + documentation: Number of received bytes for interface + labelnames: + - name + - index + - name: rx_error + documentation: Number of errors on interface + labelnames: + - name + - index + - name: tx_packets + documentation: Number of transitted packets for interface + labelnames: + - name + - index + - name: tx_bytes + documentation: Number of transitted bytes for interface + labelnames: + - name + - index + - name: tx_error + documentation: Number of errors on interface + labelnames: + - name + - index + - name: drops + documentation: Number of dropped packets for interface + labelnames: + - name + - index + - name: punt + documentation: Number of punted packets for interface + labelnames: + - name + - index + - name: ip4 + documentation: Number of IPv4 packets for interface + labelnames: + - name + - index + - name: ip6 + documentation: Number of IPv6 packets for interface + labelnames: + - name + - index + - name: rx_no_buf + documentation: Number of out of buffer RX packets on interface + labelnames: + - name + - index + - name: rx_miss + documentation: Number of missed RX packets on interface + labelnames: + - name + - index + code: | + show interface + - name: bundle_vpp + metrics: + counter: + - name: node_counter + documentation: Node counter + labelnames: + - name + - reason + - severity + - thread_name + - thread_id + code: | + show node counters verbose -- cgit 1.2.3-korg