diff options
Diffstat (limited to 'resources')
17 files changed, 2718 insertions, 126 deletions
diff --git a/resources/libraries/python/Constants.py b/resources/libraries/python/Constants.py index be9fe34915..f6e4f58006 100644 --- a/resources/libraries/python/Constants.py +++ b/resources/libraries/python/Constants.py @@ -133,24 +133,30 @@ class Constants: # python scripts location RESOURCES_LIB_PY = u"resources/libraries/python" + # shell scripts location + RESOURCES_TOOLS = u"resources/tools" + # Python API provider location RESOURCES_PAPI_PROVIDER = u"resources/tools/papi/vpp_papi_provider.py" + # Templates location + RESOURCES_TPL = u"resources/templates" + # vat templates location RESOURCES_TPL_VAT = u"resources/templates/vat" # Kubernetes templates location RESOURCES_TPL_K8S = u"resources/templates/kubernetes" - # Templates location - RESOURCES_TPL = u"resources/templates" - # Container templates location RESOURCES_TPL_CONTAINER = u"resources/templates/container" # VPP Communications Library templates location RESOURCES_TPL_VCL = u"resources/templates/vcl" + # VPP Communications Library templates location + RESOURCES_TPL_TELEMETRY = u"resources/templates/telemetry" + # VPP Communications Library LD_PRELOAD library VCL_LDPRELOAD_LIBRARY = u"/usr/lib/x86_64-linux-gnu/libvcl_ldpreload.so" @@ -202,8 +208,12 @@ class Constants: # TRex set number of RX/TX descriptors # Set to 0 to use default values - TREX_TX_DESCRIPTORS_COUNT = get_int_from_env(u"TREX_TX_DESCRIPTORS_COUNT", 0) - TREX_RX_DESCRIPTORS_COUNT = get_int_from_env(u"TREX_RX_DESCRIPTORS_COUNT", 0) + TREX_TX_DESCRIPTORS_COUNT = get_int_from_env( + u"TREX_TX_DESCRIPTORS_COUNT", 0 + ) + TREX_RX_DESCRIPTORS_COUNT = get_int_from_env( + u"TREX_RX_DESCRIPTORS_COUNT", 0 + ) # Trex force start regardless ports state TREX_SEND_FORCE = get_pessimistic_bool_from_env(u"TREX_SEND_FORCE") diff --git a/resources/libraries/python/ContainerUtils.py b/resources/libraries/python/ContainerUtils.py index f3b92a8b1f..75acf0027a 100644 --- a/resources/libraries/python/ContainerUtils.py +++ b/resources/libraries/python/ContainerUtils.py @@ -700,6 +700,7 @@ class ContainerEngine: vpp_config.add_buffers_per_numa(215040) vpp_config.add_plugin(u"disable", u"default") vpp_config.add_plugin(u"enable", u"memif_plugin.so") + vpp_config.add_plugin(u"enable", u"perfmon_plugin.so") vpp_config.add_main_heap_size(u"2G") vpp_config.add_main_heap_page_size(u"2M") vpp_config.add_statseg_size(u"2G") @@ -736,6 +737,7 @@ class ContainerEngine: vpp_config.add_dpdk_no_tx_checksum_offload() vpp_config.add_dpdk_dev_default_rxq(rxq) vpp_config.add_plugin(u"enable", u"dpdk_plugin.so") + vpp_config.add_plugin(u"enable", u"perfmon_plugin.so") # Apply configuration self.execute(u"mkdir -p /etc/vpp/") @@ -753,6 +755,7 @@ class ContainerEngine: vpp_config.add_plugin(u"enable", u"crypto_native_plugin.so") vpp_config.add_plugin(u"enable", u"crypto_ipsecmb_plugin.so") vpp_config.add_plugin(u"enable", u"crypto_openssl_plugin.so") + vpp_config.add_plugin(u"enable", u"perfmon_plugin.so") # Apply configuration self.execute(u"mkdir -p /etc/vpp/") diff --git a/resources/libraries/python/PerfUtil.py b/resources/libraries/python/TelemetryUtil.py index 6444cc595f..3031647915 100644 --- a/resources/libraries/python/PerfUtil.py +++ b/resources/libraries/python/TelemetryUtil.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020 Cisco and/or its affiliates. +# Copyright (c) 2021 Cisco and/or its affiliates. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at: @@ -15,14 +15,14 @@ from resources.libraries.python.Constants import Constants from resources.libraries.python.OptionString import OptionString -from resources.libraries.python.ssh import exec_cmd +from resources.libraries.python.ssh import exec_cmd, exec_cmd_no_error from resources.libraries.python.topology import NodeType -__all__ = [u"PerfUtil"] +__all__ = [u"TelemetryUtil"] -class PerfUtil: - """Class contains methods for perf utility.""" +class TelemetryUtil: + """Class contains methods for telemetry utility.""" @staticmethod def perf_stat(node, cpu_list=None, duration=1): @@ -79,4 +79,52 @@ class PerfUtil: """ for node in nodes.values(): if node[u"type"] == NodeType.DUT: - PerfUtil.perf_stat(node, cpu_list=cpu_list, duration=duration) + TelemetryUtil.perf_stat( + node, cpu_list=cpu_list, duration=duration + ) + + @staticmethod + def run_telemetry(node, profile, hook=None): + """Get telemetry stat read for duration. + + :param node: Node in the topology. + :param profile: Telemetry configuration profile. + :param hook: Process ID or socket path (optional). + :type node: dict + :type profile: str + :type hook: str + """ + config = u"" + config += f"{Constants.REMOTE_FW_DIR}/" + config += f"{Constants.RESOURCES_TPL_TELEMETRY}/" + config += f"{profile}" + + cd_cmd = u"" + cd_cmd += f"sh -c \"cd {Constants.REMOTE_FW_DIR}/" + cd_cmd += f"{Constants.RESOURCES_TOOLS}" + + bin_cmd = f"python3 -m telemetry --config {config} --hook {hook}\"" + + exec_cmd_no_error(node, f"{cd_cmd} && {bin_cmd}", sudo=True) + exec_cmd_no_error(node, f"cat /tmp/metric.prom", sudo=True) + + @staticmethod + def run_telemetry_on_all_duts(nodes, profile): + """Get telemetry stat read on all DUTs. + + :param nodes: Nodes in the topology. + :param profile: Telemetry configuration profile. + :param hooks: Dict of Process IDs or socket paths (optional). + :type nodes: dict + :type profile: str + :type hooks: dict + """ + for node in nodes.values(): + if node[u"type"] == NodeType.DUT: + try: + for socket in node[u"sockets"][u"PAPI"].values(): + TelemetryUtil.run_telemetry( + node, profile=profile, hook=socket + ) + except IndexError: + pass diff --git a/resources/libraries/robot/performance/performance_actions.robot b/resources/libraries/robot/performance/performance_actions.robot index c28b2ebdc8..bb6881ef7b 100644 --- a/resources/libraries/robot/performance/performance_actions.robot +++ b/resources/libraries/robot/performance/performance_actions.robot @@ -26,23 +26,66 @@ | | Run Keyword If | ${extended_debug}==${True} | | ... | Perf Stat On All DUTs | ${nodes} | cpu_list=${cpu_alloc_str} -| Additional Statistics Action For clear-show-runtime-with-traffic +| Additional Statistics Action For vpp-runtime | | [Documentation] | | ... | Additional Statistics Action for clear and show runtime counters with | | ... | running traffic. | | | | ... | See documentation of the called keyword for required test variables. | | -| | Clear and show runtime counters with running traffic +| | ${ppta} = | Get Packets Per Transaction Aggregated +| | ${ramp_up_duration} = | Get Ramp Up Duration +| | ${ramp_up_rate} = | Get Ramp Up Rate +| | ${runtime_duration} = | Get Runtime Duration +| | ${runtime_rate} = | Get Runtime Rate +| | ${traffic_directions} = | Get Traffic Directions +| | ${transaction_duration} = | Get Transaction Duration +| | ${transaction_scale} = | Get Transaction Scale +| | ${transaction_type} = | Get Transaction Type +| | ${use_latency} = | Get Use Latency +| | Send traffic on tg +| | ... | duration=${-1} +| | ... | rate=${runtime_rate} +| | ... | frame_size=${frame_size} +| | ... | traffic_profile=${traffic_profile} +| | ... | async_call=${True} +| | ... | ppta=${ppta} +| | ... | use_latency=${use_latency} +| | ... | traffic_directions=${traffic_directions} +| | ... | transaction_duration=${transaction_duration} +| | ... | transaction_scale=${transaction_scale} +| | ... | transaction_type=${transaction_type} +| | ... | duration_limit=${0.0} +| | ... | ramp_up_duration=${ramp_up_duration} +| | ... | ramp_up_rate=${ramp_up_rate} +| | Run Telemetry On All DUTs +| | ... | ${nodes} | profile=vpp_runtime.yaml +| | Stop traffic on tg -| Additional Statistics Action For clear-show-runtime-with-iperf3 +| Additional Statistics Action For vpp-runtime-iperf3 | | [Documentation] | | ... | Additional Statistics Action for clear and show runtime counters with | | ... | iPerf3 running traffic. | | | | ... | See documentation of the called keyword for required test variables. | | -| | Clear and show runtime counters with running iperf3 +| | ${runtime_duration} = | Get Runtime Duration +| | ${pids}= | iPerf Client Start Remote Exec +| | | ... | ${nodes['${iperf_client_node}']} +| | | ... | duration=${-1} +| | | ... | rate=${None} +| | | ... | frame_size=${None} +| | | ... | async_call=True +| | | ... | warmup_time=0 +| | | ... | traffic_directions=${1} +| | | ... | namespace=${iperf_client_namespace} +| | | ... | udp=${iperf_client_udp} +| | | ... | host=${iperf_server_bind} +| | | ... | bind=${iperf_client_bind} +| | | ... | affinity=${iperf_client_affinity} +| | Run Telemetry On All DUTs +| | ... | ${nodes} | profile=vpp_runtime.yaml +| | iPerf Client Stop Remote Exec | ${nodes['${iperf_client_node}']} | ${pids} | Additional Statistics Action For noop | | [Documentation] @@ -50,23 +93,12 @@ | | | | No operation -| Additional Statistics Action For vpp-clear-runtime -| | [Documentation] -| | ... | Additional Statistics Action for clear VPP runtime. -| | -| | VPP Clear Runtime On All DUTs | ${nodes} - | Additional Statistics Action For vpp-clear-stats | | [Documentation] | | ... | Additional Statistics Action for clear VPP statistics. | | -| | Clear Statistics On All DUTs | ${nodes} - -| Additional Statistics Action For vpp-enable-elog -| | [Documentation] -| | ... | Additional Statistics Action for enable VPP elog trace. -| | -| | VPP Enable Elog Traces On All DUTs | ${nodes} +| | Run Telemetry On All DUTs +| | ... | ${nodes} | profile=vpp_clear_stats.yaml | Additional Statistics Action For vpp-enable-packettrace | | [Documentation] @@ -75,12 +107,6 @@ | | Run Keyword If | ${extended_debug}==${True} | | ... | VPP Enable Traces On All DUTs | ${nodes} | fail_on_error=${False} -| Additional Statistics Action For vpp-show-elog -| | [Documentation] -| | ... | Additional Statistics Action for show VPP elog trace. -| | -| | Show Event Logger On All DUTs | ${nodes} - | Additional Statistics Action For vpp-show-packettrace | | [Documentation] | | ... | Additional Statistics Action for show VPP packet trace. @@ -88,14 +114,9 @@ | | Run Keyword If | ${extended_debug}==${True} | | ... | Show Packet Trace On All Duts | ${nodes} | maximum=${100} -| Additional Statistics Action For vpp-show-runtime -| | [Documentation] -| | ... | Additional Statistics Action for show VPP runtime. -| | -| | VPP Show Runtime On All DUTs | ${nodes} - | Additional Statistics Action For vpp-show-stats | | [Documentation] | | ... | Additional Statistics Action for show VPP statistics. | | -| | Show Statistics On All DUTs | ${nodes} +| | Run Telemetry On All DUTs +| | ... | ${nodes} | profile=vpp_show_stats.yaml diff --git a/resources/libraries/robot/performance/performance_utils.robot b/resources/libraries/robot/performance/performance_utils.robot index ef268ff663..8f29d975a1 100644 --- a/resources/libraries/robot/performance/performance_utils.robot +++ b/resources/libraries/robot/performance/performance_utils.robot @@ -15,9 +15,9 @@ | Library | Collections | Library | resources.libraries.python.topology.Topology | Library | resources.libraries.python.NodePath -| Library | resources.libraries.python.PerfUtil | Library | resources.libraries.python.InterfaceUtil | Library | resources.libraries.python.Iperf3 +| Library | resources.libraries.python.TelemetryUtil | Library | resources.libraries.python.TrafficGenerator | Library | resources.libraries.python.TrafficGenerator.OptimizedSearch | Library | resources.libraries.python.TrafficGenerator.TGDropRateSearchImpl @@ -38,53 +38,6 @@ | ${heap_size_mult}= | ${1} *** Keywords *** -| Clear and show runtime counters with running traffic -| | [Documentation] -| | ... | Start traffic at specified rate then clear runtime counters on all -| | ... | DUTs. Wait for specified amount of time and capture runtime counters -| | ... | on all DUTs. Finally stop traffic. -| | -| | ... | TODO: Support resetter if this is not the first trial-ish action? -| | -| | ... | *Example:* -| | -| | ... | \| Clear and show runtime counters with running traffic \| -| | -| | ${ppta} = | Get Packets Per Transaction Aggregated -| | ${ramp_up_duration} = | Get Ramp Up Duration -| | ${ramp_up_rate} = | Get Ramp Up Rate -| | ${runtime_duration} = | Get Runtime Duration -| | ${runtime_rate} = | Get Runtime Rate -| | ${traffic_directions} = | Get Traffic Directions -| | ${transaction_duration} = | Get Transaction Duration -| | ${transaction_scale} = | Get Transaction Scale -| | ${transaction_type} = | Get Transaction Type -| | ${use_latency} = | Get Use Latency -| | # Duration of -1 means we will stop traffic manually. -| | Send traffic on tg -| | ... | duration=${-1} -| | ... | rate=${runtime_rate} -| | ... | frame_size=${frame_size} -| | ... | traffic_profile=${traffic_profile} -| | ... | async_call=${True} -| | ... | ppta=${ppta} -| | ... | use_latency=${use_latency} -| | ... | traffic_directions=${traffic_directions} -| | ... | transaction_duration=${transaction_duration} -| | ... | transaction_scale=${transaction_scale} -| | ... | transaction_type=${transaction_type} -| | ... | duration_limit=${0.0} -| | ... | ramp_up_duration=${ramp_up_duration} -| | ... | ramp_up_rate=${ramp_up_rate} -| | FOR | ${action} | IN | @{pre_run_stats} -| | | Run Keyword | Additional Statistics Action For ${action} -| | END -| | Sleep | ${runtime_duration} -| | FOR | ${action} | IN | @{post_run_stats} -| | | Run Keyword | Additional Statistics Action For ${action} -| | END -| | Stop traffic on tg - | Find critical load using PLRsearch | | [Documentation] | | ... | Find boundaries for troughput (of hardcoded target loss ratio) @@ -435,7 +388,10 @@ | | ${transaction_scale} = | Get Transaction Scale | | ${transaction_type} = | Get Transaction Type | | Set Test Variable | \${rate_for_teardown} | ${rate} -| | FOR | ${action} | IN | @{pre_stats} +| | FOR | ${action} | IN | @{stat_runtime} +| | | Run Keyword | Additional Statistics Action For ${action} +| | END +| | FOR | ${action} | IN | @{stat_pre_trial} | | | Run Keyword | Additional Statistics Action For ${action} | | END | | ${results} = | Create List @@ -460,44 +416,11 @@ | | | # the approximated receive rate is the best estimate we have. | | | Append To List | ${results} | ${result.approximated_receive_rate} | | END -| | FOR | ${action} | IN | @{post_stats} +| | FOR | ${action} | IN | @{stat_post_trial} | | | Run Keyword | Additional Statistics Action For ${action} | | END | | Return From Keyword | ${results} -| Clear and show runtime counters with running iperf3 -| | [Documentation] -| | ... | Start traffic at specified rate then clear runtime counters on all -| | ... | DUTs. Wait for specified amount of time and capture runtime counters -| | ... | on all DUTs. Finally stop traffic. -| | -| | ... | *Example:* -| | -| | ... | \| Clear and show runtime counters with running traffic \| -| | -| | ${runtime_duration} = | Get Runtime Duration -| | ${pids}= | iPerf Client Start Remote Exec -| | | ... | ${nodes['${iperf_client_node}']} -| | | ... | duration=${-1} -| | | ... | rate=${None} -| | | ... | frame_size=${None} -| | | ... | async_call=True -| | | ... | warmup_time=0 -| | | ... | traffic_directions=${1} -| | | ... | namespace=${iperf_client_namespace} -| | | ... | udp=${iperf_client_udp} -| | | ... | host=${iperf_server_bind} -| | | ... | bind=${iperf_client_bind} -| | | ... | affinity=${iperf_client_affinity} -| | FOR | ${action} | IN | @{pre_run_stats} -| | | Run Keyword | Additional Statistics Action For ${action} -| | END -| | Sleep | ${runtime_duration} -| | FOR | ${action} | IN | @{post_run_stats} -| | | Run Keyword | Additional Statistics Action For ${action} -| | END -| | iPerf Client Stop Remote Exec | ${nodes['${iperf_client_node}']} | ${pids} - | Traffic should pass with maximum rate on iPerf3 | | [Documentation] | | ... | Send traffic at maximum rate on iPerf3. @@ -592,7 +515,10 @@ | | ${pre_stats}= | Create List | | ... | clear-show-runtime-with-iperf3 | | ... | vpp-clear-stats | vpp-enable-packettrace | vpp-enable-elog -| | FOR | ${action} | IN | @{pre_stats} +| | FOR | ${action} | IN | @{stat_runtime} +| | | Run Keyword | Additional Statistics Action For ${action} +| | END +| | FOR | ${action} | IN | @{stat_pre_trial} | | | Run Keyword | Additional Statistics Action For ${action} | | END | | ${results} = | Create List @@ -616,7 +542,7 @@ | | | Append To List | | | ... | ${results} | ${conv} | | END -| | FOR | ${action} | IN | @{post_stats} +| | FOR | ${action} | IN | @{stat_post_trial} | | | Run Keyword | Additional Statistics Action For ${action} | | END | | Return From Keyword | ${results} diff --git a/resources/templates/telemetry/bpf_runtime.yaml b/resources/templates/telemetry/bpf_runtime.yaml new file mode 100644 index 0000000000..88ad7eb64f --- /dev/null +++ b/resources/templates/telemetry/bpf_runtime.yaml @@ -0,0 +1,130 @@ +--- +logging: + version: 1 + formatters: + console: + format: '%(asctime)s - %(name)s - %(message)s' + prom: + format: '%(message)s' + handlers: + console: + class: logging.StreamHandler + level: INFO + formatter: console + stream: ext://sys.stdout + prom: + class: logging.handlers.RotatingFileHandler + level: INFO + formatter: prom + filename: /tmp/metric.prom + mode: w + loggers: + prom: + handlers: [prom] + level: INFO + propagate: False + root: + level: INFO + handlers: [console] +scheduler: + duration: 1 +programs: + - name: bundle_bpf + metrics: + counter: + - name: cpu_cycle + documentation: Cycles processed by CPUs + namespace: bpf + labelnames: + - name + - cpu + - pid + - name: cpu_instruction + documentation: Instructions retired by CPUs + namespace: bpf + labelnames: + - name + - cpu + - pid + - name: llc_reference + documentation: Last level cache operations by type + namespace: bpf + labelnames: + - name + - cpu + - pid + - name: llc_miss + documentation: Last level cache operations by type + namespace: bpf + labelnames: + - name + - cpu + - pid + events: + - type: 0x0 # HARDWARE + name: 0x0 # PERF_COUNT_HW_CPU_CYCLES + target: on_cpu_cycle + table: cpu_cycle + - type: 0x0 # HARDWARE + name: 0x1 # PERF_COUNT_HW_INSTRUCTIONS + target: on_cpu_instruction + table: cpu_instruction + - type: 0x0 # HARDWARE + name: 0x2 # PERF_COUNT_HW_CACHE_REFERENCES + target: on_cache_reference + table: llc_reference + - type: 0x0 # HARDWARE + name: 0x3 # PERF_COUNT_HW_CACHE_MISSES + target: on_cache_miss + table: llc_miss + code: | + #include <linux/ptrace.h> + #include <uapi/linux/bpf_perf_event.h> + + const int max_cpus = 256; + + struct key_t { + int cpu; + int pid; + char name[TASK_COMM_LEN]; + }; + + BPF_HASH(llc_miss, struct key_t); + BPF_HASH(llc_reference, struct key_t); + BPF_HASH(cpu_instruction, struct key_t); + BPF_HASH(cpu_cycle, struct key_t); + + static inline __attribute__((always_inline)) void get_key(struct key_t* key) { + key->cpu = bpf_get_smp_processor_id(); + key->pid = bpf_get_current_pid_tgid(); + bpf_get_current_comm(&(key->name), sizeof(key->name)); + } + + int on_cpu_cycle(struct bpf_perf_event_data *ctx) { + struct key_t key = {}; + get_key(&key); + + cpu_cycle.increment(key, ctx->sample_period); + return 0; + } + int on_cpu_instruction(struct bpf_perf_event_data *ctx) { + struct key_t key = {}; + get_key(&key); + + cpu_instruction.increment(key, ctx->sample_period); + return 0; + } + int on_cache_reference(struct bpf_perf_event_data *ctx) { + struct key_t key = {}; + get_key(&key); + + llc_reference.increment(key, ctx->sample_period); + return 0; + } + int on_cache_miss(struct bpf_perf_event_data *ctx) { + struct key_t key = {}; + get_key(&key); + + llc_miss.increment(key, ctx->sample_period); + return 0; + } diff --git a/resources/templates/telemetry/vpp_clear_stats.yaml b/resources/templates/telemetry/vpp_clear_stats.yaml new file mode 100644 index 0000000000..9391502654 --- /dev/null +++ b/resources/templates/telemetry/vpp_clear_stats.yaml @@ -0,0 +1,171 @@ +--- +logging: + version: 1 + formatters: + console: + format: '%(asctime)s - %(name)s - %(message)s' + prom: + format: '%(message)s' + handlers: + console: + class: logging.StreamHandler + level: INFO + formatter: console + stream: ext://sys.stdout + prom: + class: logging.handlers.RotatingFileHandler + level: INFO + formatter: prom + filename: /tmp/metric.prom + mode: w + loggers: + prom: + handlers: [prom] + level: INFO + propagate: False + root: + level: INFO + handlers: [console] +scheduler: + duration: 1 +programs: + - name: bundle_vpp + metrics: + gauge: + - name: calls + documentation: Number of calls total + labelnames: + - name + - state + - thread_name + - thread_id + - thread_lcore + - name: vectors + documentation: Number of vectors total + labelnames: + - name + - state + - thread_name + - thread_id + - thread_lcore + - name: suspends + documentation: Number of suspends total + labelnames: + - name + - state + - thread_name + - thread_id + - thread_lcore + - name: clocks + documentation: Number of clocks total + labelnames: + - name + - state + - thread_name + - thread_id + - thread_lcore + - name: vectors_calls + documentation: Number of vectors per call + labelnames: + - name + - state + - thread_name + - thread_id + - thread_lcore + code: | + clear runtime + - name: bundle_vpp + metrics: + gauge: + - name: rx_packets + documentation: Number of received packets for interface + labelnames: + - name + - index + - name: rx_bytes + documentation: Number of received bytes for interface + labelnames: + - name + - index + - name: rx_error + documentation: Number of errors on interface + labelnames: + - name + - index + - name: tx_packets + documentation: Number of transitted packets for interface + labelnames: + - name + - index + - name: tx_bytes + documentation: Number of transitted bytes for interface + labelnames: + - name + - index + - name: tx_error + documentation: Number of errors on interface + labelnames: + - name + - index + - name: drops + documentation: Number of dropped packets for interface + labelnames: + - name + - index + - name: punt + documentation: Number of punted packets for interface + labelnames: + - name + - index + - name: ip4 + documentation: Number of IPv4 packets for interface + labelnames: + - name + - index + - name: ip6 + documentation: Number of IPv6 packets for interface + labelnames: + - name + - index + - name: rx_no_buf + documentation: Number of out of buffer RX packets on interface + labelnames: + - name + - index + - name: rx_miss + documentation: Number of missed RX packets on interface + labelnames: + - name + - index + code: | + clear interfaces + - name: bundle_vpp + metrics: + gauge: + - name: node_counter + documentation: Node counter + labelnames: + - name + - reason + - severity + - thread_name + - thread_id + code: | + clear node counters + - name: bundle_vpp + metrics: + gauge: + - name: calls + documentation: Load operations + labelnames: + - name + - thread_name + - thread_id + - name: packets + documentation: Load operations + labelnames: + - name + - thread_name + - thread_id + code: | + perfmon reset diff --git a/resources/templates/telemetry/vpp_runtime.yaml b/resources/templates/telemetry/vpp_runtime.yaml new file mode 100644 index 0000000000..4f0f6d7a7d --- /dev/null +++ b/resources/templates/telemetry/vpp_runtime.yaml @@ -0,0 +1,508 @@ +--- +logging: + version: 1 + formatters: + console: + format: '%(asctime)s - %(name)s - %(message)s' + prom: + format: '%(message)s' + handlers: + console: + class: logging.StreamHandler + level: INFO + formatter: console + stream: ext://sys.stdout + prom: + class: logging.handlers.RotatingFileHandler + level: INFO + formatter: prom + filename: /tmp/metric.prom + mode: w + loggers: + prom: + handlers: [prom] + level: INFO + propagate: False + root: + level: INFO + handlers: [console] +scheduler: + duration: 1 +programs: + - name: bundle_vpp + metrics: + info: + - name: version + documentation: VPP version + namespace: vpp + subsystem: version + labelnames: + - version + code: | + show version + - name: bundle_vpp + metrics: + gauge: + - name: calls + documentation: Number of calls total + namespace: vpp + subsystem: runtime + labelnames: + - name + - state + - thread_name + - thread_id + - thread_lcore + - name: vectors + documentation: Number of vectors total + namespace: vpp + subsystem: runtime + labelnames: + - name + - state + - thread_name + - thread_id + - thread_lcore + - name: suspends + documentation: Number of suspends total + namespace: vpp + subsystem: runtime + labelnames: + - name + - state + - thread_name + - thread_id + - thread_lcore + - name: clocks + documentation: Number of clocks total + namespace: vpp + subsystem: runtime + labelnames: + - name + - state + - thread_name + - thread_id + - thread_lcore + - name: vectors_calls + documentation: Number of vectors per call + namespace: vpp + subsystem: runtime + labelnames: + - name + - state + - thread_name + - thread_id + - thread_lcore + code: | + clear runtime + wait {duration} + show runtime + - name: bundle_vpp + metrics: + gauge: + - name: rx_packets + documentation: Number of received packets for interface + namespace: vpp + subsystem: interface + labelnames: + - name + - index + - name: rx_bytes + documentation: Number of received bytes for interface + namespace: vpp + subsystem: interface + labelnames: + - name + - index + - name: rx_error + documentation: Number of errors on interface + namespace: vpp + subsystem: interface + labelnames: + - name + - index + - name: tx_packets + documentation: Number of transitted packets for interface + namespace: vpp + subsystem: interface + labelnames: + - name + - index + - name: tx_bytes + documentation: Number of transitted bytes for interface + namespace: vpp + subsystem: interface + labelnames: + - name + - index + - name: tx_error + documentation: Number of errors on interface + namespace: vpp + subsystem: interface + labelnames: + - name + - index + - name: drops + documentation: Number of dropped packets for interface + namespace: vpp + subsystem: interface + labelnames: + - name + - index + - name: punt + documentation: Number of punted packets for interface + namespace: vpp + subsystem: interface + labelnames: + - name + - index + - name: ip4 + documentation: Number of IPv4 packets for interface + namespace: vpp + subsystem: interface + labelnames: + - name + - index + - name: ip6 + documentation: Number of IPv6 packets for interface + namespace: vpp + subsystem: interface + labelnames: + - name + - index + - name: rx_no_buf + documentation: Number of out of buffer RX packets on interface + namespace: vpp + subsystem: interface + labelnames: + - name + - index + - name: rx_miss + documentation: Number of missed RX packets on interface + namespace: vpp + subsystem: interface + labelnames: + - name + - index + code: | + clear interfaces + wait {duration} + show interface + - name: bundle_vpp + metrics: + gauge: + - name: node_counter + documentation: Node counter + namespace: vpp + subsystem: counters + labelnames: + - name + - reason + - severity + - thread_name + - thread_id + code: | + clear node counters + wait {duration} + show node counters verbose + - name: bundle_vpp + metrics: + gauge: + - name: context_switches + documentation: Per-thread context switches + namespace: vpp + subsystem: context_switches + labelnames: + - name + - id + code: | + perfmon reset + perfmon start bundle context-switches + wait {duration} + perfmon stop + show perfmon statistics + - name: bundle_vpp + metrics: + gauge: + - name: minor_page_faults + documentation: Per-thread page faults (minor) + namespace: vpp + subsystem: page_faults + labelnames: + - name + - id + - name: major_page_faults + documentation: Per-thread page faults (major) + namespace: vpp + subsystem: page_faults + labelnames: + - name + - id + code: | + perfmon reset + perfmon start bundle page-faults + wait {duration} + perfmon stop + show perfmon statistics + - name: bundle_vpp + metrics: + gauge: + - name: calls + documentation: Instructions/packet, cycles/packet and IPC (calls) + namespace: vpp + subsystem: inst_and_clock + labelnames: + - name + - thread_name + - thread_id + - name: packets + documentation: Instructions/packet, cycles/packet and IPC (packets) + namespace: vpp + subsystem: inst_and_clock + labelnames: + - name + - thread_name + - thread_id + - name: packets_per_call + documentation: Instructions/packet, cycles/packet and IPC (packets/call) + namespace: vpp + subsystem: inst_and_clock + labelnames: + - name + - thread_name + - thread_id + - name: clocks_per_packets + documentation: Instructions/packet, cycles/packet and IPC (clocks/packets) + namespace: vpp + subsystem: inst_and_clock + labelnames: + - name + - thread_name + - thread_id + - name: instructions_per_packets + documentation: Instructions/packet, cycles/packet and IPC (clocks/packets) + namespace: vpp + subsystem: inst_and_clock + labelnames: + - name + - thread_name + - thread_id + - name: ipc + documentation: Instructions/packet, cycles/packet and IPC (clocks/packets) + namespace: vpp + subsystem: inst_and_clock + labelnames: + - name + - thread_name + - thread_id + code: | + perfmon reset + perfmon start bundle inst-and-clock + wait {duration} + perfmon stop + show perfmon statistics + - name: bundle_vpp + metrics: + gauge: + - name: l1_hit + documentation: Cache hits and misses (L1 hit) + namespace: vpp + subsystem: cache_hierarchy + labelnames: + - name + - thread_name + - thread_id + - name: l1_miss + documentation: Cache hits and misses (L1 miss) + namespace: vpp + subsystem: cache_hierarchy + labelnames: + - name + - thread_name + - thread_id + - name: l2_hit + documentation: Cache hits and misses (L2 hit) + namespace: vpp + subsystem: cache_hierarchy + labelnames: + - name + - thread_name + - thread_id + - name: l2_miss + documentation: Cache hits and misses (L2 miss) + namespace: vpp + subsystem: cache_hierarchy + labelnames: + - name + - thread_name + - thread_id + - name: l3_hit + documentation: Cache hits and misses (L3 hit) + namespace: vpp + subsystem: cache_hierarchy + labelnames: + - name + - thread_name + - thread_id + - name: l3_miss + documentation: Cache hits and misses (L3 miss) + namespace: vpp + subsystem: cache_hierarchy + labelnames: + - name + - thread_name + - thread_id + code: | + perfmon reset + perfmon start bundle cache-hierarchy + wait {duration} + perfmon stop + show perfmon statistics + - name: bundle_vpp + metrics: + gauge: + - name: calls + documentation: Load operations (calls) + namespace: vpp + subsystem: load_blocks + labelnames: + - name + - thread_name + - thread_id + - name: packets + documentation: Load operations (packets) + namespace: vpp + subsystem: load_blocks + labelnames: + - name + - thread_name + - thread_id + code: | + perfmon reset + perfmon start bundle load-blocks + wait {duration} + perfmon stop + show perfmon statistics + - name: bundle_vpp + metrics: + gauge: + - name: branches_per_call + documentation: Branches/call + namespace: vpp + subsystem: branch_mispred + labelnames: + - name + - thread_name + - thread_id + - name: branches_per_packet + documentation: Branches/packet + namespace: vpp + subsystem: branch_mispred + labelnames: + - name + - thread_name + - thread_id + - name: taken_per_call + documentation: Taken/call + namespace: vpp + subsystem: branch_mispred + labelnames: + - name + - thread_name + - thread_id + - name: taken_per_packet + documentation: Taken/packet + namespace: vpp + subsystem: branch_mispred + labelnames: + - name + - thread_name + - thread_id + - name: mis_predictions + documentation: Mis-predictions + namespace: vpp + subsystem: branch_mispred + labelnames: + - name + - thread_name + - thread_id + code: | + perfmon reset + perfmon start bundle branch-mispred + wait {duration} + perfmon stop + show perfmon statistics + - name: bundle_vpp + metrics: + gauge: + - name: lvl0 + documentation: Branches/call + namespace: vpp + subsystem: power_licensing + labelnames: + - name + - thread_name + - thread_id + - name: lvl1 + documentation: Branches/packet + namespace: vpp + subsystem: power_licensing + labelnames: + - name + - thread_name + - thread_id + - name: lvl2 + documentation: Taken/call + namespace: vpp + subsystem: power_licensing + labelnames: + - name + - thread_name + - thread_id + - name: throttle + documentation: Taken/packet + namespace: vpp + subsystem: power_licensing + labelnames: + - name + - thread_name + - thread_id + code: | + perfmon reset + perfmon start bundle power-licensing + wait {duration} + perfmon stop + show perfmon statistics + - name: bundle_vpp + metrics: + gauge: + - name: runtime + documentation: RunTime + namespace: vpp + subsystem: memory_bandwidth + labelnames: + - name + - name: reads_mbs + documentation: Reads (MB/s) + namespace: vpp + subsystem: memory_bandwidth + labelnames: + - name + - name: writes_mbs + documentation: Writes (MB/s) + namespace: vpp + subsystem: memory_bandwidth + labelnames: + - name + - name: total_mbs + documentation: Total (MB/s) + namespace: vpp + subsystem: memory_bandwidth + labelnames: + - name + code: | + perfmon reset + perfmon start bundle memory-bandwidth + wait {duration} + perfmon stop + show perfmon statistics diff --git a/resources/templates/telemetry/vpp_show_stats.yaml b/resources/templates/telemetry/vpp_show_stats.yaml new file mode 100644 index 0000000000..16c895447c --- /dev/null +++ b/resources/templates/telemetry/vpp_show_stats.yaml @@ -0,0 +1,154 @@ +--- +logging: + version: 1 + formatters: + console: + format: '%(asctime)s - %(name)s - %(message)s' + prom: + format: '%(message)s' + handlers: + console: + class: logging.StreamHandler + level: INFO + formatter: console + stream: ext://sys.stdout + prom: + class: logging.handlers.RotatingFileHandler + level: INFO + formatter: prom + filename: /tmp/metric.prom + mode: w + loggers: + prom: + handlers: [prom] + level: INFO + propagate: False + root: + level: INFO + handlers: [console] +scheduler: + duration: 1 +programs: + - name: bundle_vpp + metrics: + counter: + - name: calls + documentation: Number of calls total + labelnames: + - name + - state + - thread_name + - thread_id + - thread_lcore + - name: vectors + documentation: Number of vectors total + labelnames: + - name + - state + - thread_name + - thread_id + - thread_lcore + - name: suspends + documentation: Number of suspends total + labelnames: + - name + - state + - thread_name + - thread_id + - thread_lcore + - name: clocks + documentation: Number of clocks total + labelnames: + - name + - state + - thread_name + - thread_id + - thread_lcore + - name: vectors_calls + documentation: Number of vectors per call + labelnames: + - name + - state + - thread_name + - thread_id + - thread_lcore + code: | + show runtime + - name: bundle_vpp + metrics: + counter: + - name: rx_packets + documentation: Number of received packets for interface + labelnames: + - name + - index + - name: rx_bytes + documentation: Number of received bytes for interface + labelnames: + - name + - index + - name: rx_error + documentation: Number of errors on interface + labelnames: + - name + - index + - name: tx_packets + documentation: Number of transitted packets for interface + labelnames: + - name + - index + - name: tx_bytes + documentation: Number of transitted bytes for interface + labelnames: + - name + - index + - name: tx_error + documentation: Number of errors on interface + labelnames: + - name + - index + - name: drops + documentation: Number of dropped packets for interface + labelnames: + - name + - index + - name: punt + documentation: Number of punted packets for interface + labelnames: + - name + - index + - name: ip4 + documentation: Number of IPv4 packets for interface + labelnames: + - name + - index + - name: ip6 + documentation: Number of IPv6 packets for interface + labelnames: + - name + - index + - name: rx_no_buf + documentation: Number of out of buffer RX packets on interface + labelnames: + - name + - index + - name: rx_miss + documentation: Number of missed RX packets on interface + labelnames: + - name + - index + code: | + show interface + - name: bundle_vpp + metrics: + counter: + - name: node_counter + documentation: Node counter + labelnames: + - name + - reason + - severity + - thread_name + - thread_id + code: | + show node counters verbose diff --git a/resources/tools/telemetry/__init__.py b/resources/tools/telemetry/__init__.py new file mode 100644 index 0000000000..284a1ce972 --- /dev/null +++ b/resources/tools/telemetry/__init__.py @@ -0,0 +1,12 @@ +# Copyright (c) 2021 Cisco and/or its affiliates. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/resources/tools/telemetry/__main__.py b/resources/tools/telemetry/__main__.py new file mode 100755 index 0000000000..2ab87b661a --- /dev/null +++ b/resources/tools/telemetry/__main__.py @@ -0,0 +1,49 @@ +#!/usr/bin/env python3 + +# Copyright (c) 2021 Cisco and/or its affiliates. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Telemetry Exporter.""" + +from argparse import ArgumentParser, RawDescriptionHelpFormatter + +from .executor import Executor + +def main(): + """ + Main entry function when called from cli + """ + parser = ArgumentParser( + description=u"Telemetry Exporter.", + formatter_class=RawDescriptionHelpFormatter + ) + parser.add_argument( + u"--config", required=True, type=str, + help=u"YAML configuration file." + ) + parser.add_argument( + u"--hook", required=False, type=str, + help=u"Process ID or socket." + ) + parser.add_argument( + u"--daemon", required=False, type=bool, + help=u"Run as daemon." + ) + args = parser.parse_args() + if args.daemon: + Executor(args.config).execute_daemon(args.hook) + else: + Executor(args.config).execute(args.hook) + +if __name__ == u"__main__": + main() diff --git a/resources/tools/telemetry/bundle_bpf.py b/resources/tools/telemetry/bundle_bpf.py new file mode 100644 index 0000000000..77bc9acf91 --- /dev/null +++ b/resources/tools/telemetry/bundle_bpf.py @@ -0,0 +1,112 @@ +# Copyright (c) 2021 Cisco and/or its affiliates. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""BPF performance bundle.""" + +from logging import getLogger +import sys + +from bcc import BPF + + +class BundleBpf: + """ + Creates a BPF object. This is the main object for defining a BPF program, + and interacting with its output. + + Syntax: BPF({text=BPF_program | src_file=filename} + [, usdt_contexts=[USDT_object, ...]] + [, cflags=[arg1, ...]] [, debug=int] + ) + + Exactly one of text or src_file must be supplied (not both). + """ + def __init__(self, program, serializer, hook): + """Initialize Bundle BPF Perf event class. + + :param program: BPF C code. + :param serializer: Metric serializer. + :param hook: Process ID. + :type program: dict + :type serializer: Serializer + :type hook: int + """ + self.obj = None + self.code = program[u"code"] + self.metrics = program[u"metrics"] + self.events = program[u"events"] + self.api_replies_list = list() + self.serializer = serializer + self.hook = hook + + self.obj = BPF(text=self.code) + + def attach(self, duration): + """ + Attach events to BPF. + + :param duration: Trial duration. + :type duration: int + """ + try: + for event in self.events: + self.obj.attach_perf_event( + ev_type=event[u"type"], + ev_config=event[u"name"], + fn_name=event[u"target"], + sample_period=duration + ) + except AttributeError: + getLogger(__name__).error(u"Cannot attach BPF events!") + sys.exit(1) + + def detach(self): + """ + Dettach events from BPF. + """ + try: + for event in self.events: + self.obj.detach_perf_event( + ev_type=event[u"type"], + ev_config=event[u"name"] + ) + except AttributeError: + getLogger(__name__).error(u"Cannot dettach BPF events!") + sys.exit(1) + + def fetch_data(self): + """ + Fetch data by invoking API calls to BPF. + """ + self.serializer.create(metrics=self.metrics) + for _, metric_list in self.metrics.items(): + for metric in metric_list: + for (key, val) in self.obj.get_table(metric[u"name"]).items(): + item = dict() + labels = dict() + item[u"name"] = metric[u"name"] + item[u"value"] = val.value + for label in metric[u"labels"]: + labels[label] = getattr(key, label) + item[u"labels"] = labels + self.api_replies_list.append(item) + getLogger(__name__).info(item) + + def process_data(self): + """ + Post process API replies. + """ + for item in self.api_replies_list: + self.serializer.serialize( + metric=item[u"name"], labels=item[u"labels"], item=item + ) diff --git a/resources/tools/telemetry/bundle_vpp.py b/resources/tools/telemetry/bundle_vpp.py new file mode 100644 index 0000000000..01526fe83f --- /dev/null +++ b/resources/tools/telemetry/bundle_vpp.py @@ -0,0 +1,505 @@ +# Copyright (c) 2021 Cisco and/or its affiliates. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""VPP execution bundle.""" + +from copy import deepcopy +from logging import getLogger +from re import fullmatch, sub +import struct +import sys + +from vpp_papi.vpp_papi import VPPApiClient as vpp_class + + +M_RUN_THREAD = ( + r"Thread\s" + r"(?P<thread_id>\d+)\s" + r"(?P<thread_name>\S+)\s.*" + r"(?P<thread_lcore>\d+).*" +) +M_RUN_SEPARATOR = ( + r"(-)+" +) +M_RUN_NODES = ( + r"(?P<name>\S+)\s+" + r"(?P<state>\S+\s\S+|\S+)\s+" + r"(?P<calls>\d+)\s+" + r"(?P<vectors>\d+)\s+" + r"(?P<suspends>\d+)\s+" + r"(?P<clocks>\S+)\s+" + r"(?P<vectors_calls>\S+)" +) +M_RUN_TIME = ( + r"Time\s\S+,\s\d+\ssec\sinternal\snode\svector\srate\s" + r"(?P<rate>\S+)\sloops/sec\s" + r"(?P<loops>\S+)" +) +M_INT_BEGIN = ( + r"(?P<name>\S+)\s+" + r"(?P<index>\S+)\s+" + r"(?P<state>\S+)\s+" + r"(?P<mtu>\S+)\s+" + r"(?P<counter>\S+\s\S+|\S+)\s+" + r"(?P<count>\d+)" +) +M_INT_CONT = ( + r"\s+" + r"(?P<counter>\S+\s\S+|\S+)\s+" + r"(?P<count>\d+)" +) +M_NODE_COUNTERS_THREAD = ( + r"Thread\s" + r"(?P<thread_id>\d+)\s\(" + r"(?P<thread_name>\S+)\):\s*" +) +M_NODE_COUNTERS = ( + r"\s*" + r"(?P<count>\d+)\s+" + r"(?P<name>\S+)\s+" + r"(?P<reason>(\S+\s)+)\s+" + r"(?P<severity>\S+)\s+" + r"(?P<index>\d+)\s*" +) +M_PMB_CS_HEADER = ( + r"\s*per-thread\s+context\s+switches.*" +) +M_PMB_CS = ( + r"(?P<thread_name>\S+)\s+\(" + r"(?P<thread_id>\S+)\)\s+\S+\s+" + r"(?P<context_switches>[\d\.]+)" +) +M_PMB_PF_HEADER = ( + r"\s*per-thread\s+page\s+faults.*" +) +M_PMB_PF = ( + r"(?P<thread_name>\S+)\s+\(" + r"(?P<thread_id>\S+)\)\s+\S+\s+" + r"(?P<minor_page_faults>[\d\.]+)\s+" + r"(?P<major_page_faults>[\d\.]+)" +) +M_PMB_THREAD = ( + r"\s*" + r"(?P<thread_name>\S+)\s+\(" + r"(?P<thread_id>\d+)\)\s*" +) +M_PMB_IC_HEADER = ( + r"\s*instructions/packet,\s+cycles/packet\s+and\s+IPC.*" +) +M_PMB_IC_NODE = ( + r"\s*" + r"(?P<node_name>\S+)\s+" + r"(?P<calls>[\d\.]+)\s+" + r"(?P<packets>[\d\.]+)\s+" + r"(?P<packets_per_call>[\d\.]+)\s+" + r"(?P<clocks_per_packets>[\d\.]+)\s+" + r"(?P<instructions_per_packets>[\d\.]+)\s+" + r"(?P<ipc>[\d\.]+)" +) +M_PMB_CM_HEADER = ( + r"\s*cache\s+hits\s+and\s+misses.*" +) +M_PMB_CM_NODE = ( + r"\s*" + r"(?P<node_name>\S+)\s+" + r"(?P<l1_hit>[\d\.]+)\s+" + r"(?P<l1_miss>[\d\.]+)\s+" + r"(?P<l2_hit>[\d\.]+)\s+" + r"(?P<l2_miss>[\d\.]+)\s+" + r"(?P<l3_hit>[\d\.]+)\s+" + r"(?P<l3_miss>[\d\.]+)" +) +M_PMB_LO_HEADER = ( + r"\s*load\s+operations.*" +) +M_PMB_LO_NODE = ( + r"\s*" + r"(?P<node_name>\S+)\s+" + r"(?P<calls>[\d\.]+)\s+" + r"(?P<packets>[\d\.]+)\s+" + r"(?P<one>[\d\.]+)\s+" + r"(?P<two>[\d\.]+)\s+" + r"(?P<three>[\d\.]+)" +) +M_PMB_BM_HEADER = ( + r"\s*Branches,\s+branches\s+taken\s+and\s+mis-predictions.*" +) +M_PMB_BM_NODE = ( + r"\s*" + r"(?P<node_name>\S+)\s+" + r"(?P<branches_per_call>[\d\.]+)\s+" + r"(?P<branches_per_packet>[\d\.]+)\s+" + r"(?P<taken_per_call>[\d\.]+)\s+" + r"(?P<taken_per_packet>[\d\.]+)\s+" + r"(?P<mis_predictions>[\d\.]+)" +) +M_PMB_PL_HEADER = ( + r"\s*Thread\s+power\s+licensing.*" +) +M_PMB_PL_NODE = ( + r"\s*" + r"(?P<node_name>\S+)\s+" + r"(?P<lvl0>[\d\.]+)\s+" + r"(?P<lvl1>[\d\.]+)\s+" + r"(?P<lvl2>[\d\.]+)\s+" + r"(?P<throttle>[\d\.]+)" +) +M_PMB_MB_HEADER = ( + r"\s*memory\s+reads\s+and\s+writes\s+per\s+memory\s+controller.*" +) +M_PMB_MB = ( + r"\s*" + r"(?P<name>\S+)\s+" + r"(?P<runtime>[\d\.]+)\s+" + r"(?P<reads_mbs>[\d\.]+)\s+" + r"(?P<writes_mbs>[\d\.]+)\s+" + r"(?P<total_mbs>[\d\.]+)" +) + + +class BundleVpp: + """ + Creates a VPP object. This is the main object for defining a VPP program, + and interacting with its output. + """ + def __init__(self, program, serializer, hook): + """ + Initialize Bundle VPP class. + + :param program: VPP instructions. + :param serializer: Metric serializer. + :param hook: VPP API socket. + :type program: dict + :type serializer: Serializer + :type hook: int + """ + self.obj = None + self.code = program[u"code"] + self.metrics = program[u"metrics"] + self.api_command_list = list() + self.api_replies_list = list() + self.serializer = serializer + + vpp_class.apidir = u"/usr/share/vpp/api" + self.obj = vpp_class( + use_socket=True, + server_address=hook, + async_thread=False, + read_timeout=14, + logger=getLogger(__name__) + ) + + def attach(self, duration): + """ + Attach events to VPP. + + :param duration: Trial duration. + :type duration: int + """ + try: + self.obj.connect(name=u"telemetry") + except (ConnectionRefusedError, OSError): + getLogger(__name__).error(u"Cannot connect to VPP!") + sys.exit(1) + + for command in self.code.splitlines(): + api_name = u"cli_inband" + api_args = dict(cmd=command.format(duration=duration)) + self.api_command_list.append( + dict(api_name=api_name, api_args=deepcopy(api_args)) + ) + + def detach(self): + """ + Detach from VPP. + """ + self.obj.disconnect() + + def fetch_data(self): + """ + Fetch data by invoking API calls to VPP socket. + """ + for command in self.api_command_list: + try: + papi_fn = getattr(self.obj.api, command[u"api_name"]) + getLogger(__name__).info(command[u"api_args"][u"cmd"]) + replies = papi_fn(**command[u"api_args"]) + except (AttributeError, IOError, struct.error) as err: + raise AssertionError(err) + + if not isinstance(replies, list): + replies = [replies] + for reply in replies: + self.api_replies_list.append(reply) + reply = sub(r"\x1b[^m]*m", u"", reply.reply) + if reply: + getLogger(__name__).info(reply) + else: + getLogger(__name__).info(u"<no reply>") + self.serializer.create(metrics=self.metrics) + + def process_data(self): + """ + Post process command reply. + """ + for command in zip(self.api_command_list, self.api_replies_list): + self_fn = command[0][u"api_args"][u"cmd"] + try: + self_fn = getattr(self, self_fn.replace(u" ", u"_")) + self_fn(command[1].reply) + except AttributeError: + pass + + def show_interface(self, reply): + """ + Parse the show interface output. + + Output format: + { + "name": "rx_packets", + "labels": { + "name": "tap0", + "index": "0", + }, + "value": "31", + }, + + :param reply: API reply. + :type reply: str + """ + for line in reply.splitlines(): + item = dict() + labels = dict() + if fullmatch(M_INT_BEGIN, line): + ifc = fullmatch(M_INT_BEGIN, line).groupdict() + metric = ifc[u"counter"].replace(" ", "_").replace("-", "_") + item[u"name"] = metric + item[u"value"] = ifc[u"count"] + if fullmatch(M_INT_CONT, line): + ifc_cnt = fullmatch(M_INT_CONT, line).groupdict() + metric = ifc_cnt[u"counter"].replace(" ", "_").replace("-", "_") + item[u"name"] = metric + item[u"value"] = ifc_cnt[u"count"] + if fullmatch(M_INT_BEGIN, line) or fullmatch(M_INT_CONT, line): + labels[u"name"] = ifc[u"name"] + labels[u"index"] = ifc[u"index"] + item[u"labels"] = labels + self.serializer.serialize( + metric=metric, labels=labels, item=item + ) + + def show_runtime(self, reply): + """ + Parse the show runtime output. + + Output format: + { + "name": "clocks", + "labels": { + "name": "virtio-input", + "state": "polling", + "thread_name": "vpp_wk_1", + "thread_id": "2", + "thread_lcore": "3", + }, + "value": "3.17e2", + }, + + :param reply: API reply. + :type reply: str + """ + for line in reply.splitlines(): + if fullmatch(M_RUN_THREAD, line): + thread = fullmatch(M_RUN_THREAD, line).groupdict() + if fullmatch(M_RUN_NODES, line): + nodes = fullmatch(M_RUN_NODES, line).groupdict() + for metric in self.serializer.metric_registry: + item = dict() + labels = dict() + item[u"name"] = metric + labels[u"name"] = nodes[u"name"] + labels[u"state"] = nodes[u"state"] + try: + labels[u"thread_name"] = thread[u"thread_name"] + labels[u"thread_id"] = thread[u"thread_id"] + labels[u"thread_lcore"] = thread[u"thread_lcore"] + except UnboundLocalError: + labels[u"thread_name"] = u"vpp_main" + labels[u"thread_id"] = u"0" + labels[u"thread_lcore"] = u"0" + item[u"labels"] = labels + item[u"value"] = nodes[metric] + self.serializer.serialize( + metric=metric, labels=labels, item=item + ) + + def show_node_counters_verbose(self, reply): + """ + Parse the show node conuter output. + + Output format: + { + "name": "node_counters", + "labels": { + "name": "dpdk-input", + "reason": "no_error", + "severity": "error", + "thread_name": "vpp_wk_1", + "thread_id": "2", + }, + "value": "1", + }, + + :param reply: API reply. + :type reply: str + """ + for line in reply.splitlines(): + if fullmatch(M_NODE_COUNTERS_THREAD, line): + thread = fullmatch(M_NODE_COUNTERS_THREAD, line).groupdict() + if fullmatch(M_NODE_COUNTERS, line): + nodes = fullmatch(M_NODE_COUNTERS, line).groupdict() + for metric in self.serializer.metric_registry_registry: + item = dict() + labels = dict() + item[u"name"] = metric + labels[u"name"] = nodes[u"name"] + labels[u"reason"] = nodes[u"reason"] + labels[u"severity"] = nodes[u"severity"] + try: + labels[u"thread_name"] = thread[u"thread_name"] + labels[u"thread_id"] = thread[u"thread_id"] + except UnboundLocalError: + labels[u"thread_name"] = u"vpp_main" + labels[u"thread_id"] = u"0" + item[u"labels"] = labels + item[u"value"] = nodes[u"count"] + self.serializer.serialize( + metric=metric, labels=labels, item=item + ) + + def show_perfmon_statistics(self, reply): + """ + Parse the permon output. + + Output format: + { + "name": "clocks", + "labels": { + "name": "virtio-input", + "state": "polling", + "thread_name": "vpp_wk_1", + "thread_id": "2", + "thread_lcore": "3", + }, + "value": "3.17e2", + }, + + :param reply: API reply. + :type reply: str + """ + def perfmon_threads(reply, regex_threads): + for line in reply.splitlines(): + if fullmatch(regex_threads, line): + threads = fullmatch(regex_threads, line).groupdict() + for metric in self.serializer.metric_registry: + item = dict() + labels = dict() + item[u"name"] = metric + labels[u"name"] = threads[u"thread_name"] + labels[u"id"] = threads[u"thread_id"] + item[u"labels"] = labels + item[u"value"] = threads[metric] + self.serializer.serialize( + metric=metric, labels=labels, item=item + ) + + def perfmon_nodes(reply, regex_threads, regex_nodes): + for line in reply.splitlines(): + if fullmatch(regex_threads, line): + thread = fullmatch(regex_threads, line).groupdict() + if fullmatch(regex_nodes, line): + node = fullmatch(regex_nodes, line).groupdict() + for metric in self.serializer.metric_registry: + item = dict() + labels = dict() + item[u"name"] = metric + labels[u"name"] = node[u"node_name"] + labels[u"thread_name"] = thread[u"thread_name"] + labels[u"thread_id"] = thread[u"thread_id"] + item[u"labels"] = labels + item[u"value"] = node[metric] + self.serializer.serialize( + metric=metric, labels=labels, item=item + ) + + def perfmon_system(reply, regex_line): + for line in reply.splitlines(): + if fullmatch(regex_line, line): + name = fullmatch(regex_line, line).groupdict() + for metric in self.serializer.metric_registry: + item = dict() + labels = dict() + item[u"name"] = metric + labels[u"name"] = name[u"name"] + item[u"labels"] = labels + item[u"value"] = name[metric] + self.serializer.serialize( + metric=metric, labels=labels, item=item + ) + + reply = sub(r"\x1b[^m]*m", u"", reply) + + if fullmatch(M_PMB_CS_HEADER, reply.splitlines()[0]): + perfmon_threads(reply, M_PMB_CS) + if fullmatch(M_PMB_PF_HEADER, reply.splitlines()[0]): + perfmon_threads(reply, M_PMB_PF) + if fullmatch(M_PMB_IC_HEADER, reply.splitlines()[0]): + perfmon_nodes(reply, M_PMB_THREAD, M_PMB_IC_NODE) + if fullmatch(M_PMB_CM_HEADER, reply.splitlines()[0]): + perfmon_nodes(reply, M_PMB_THREAD, M_PMB_CM_NODE) + if fullmatch(M_PMB_LO_HEADER, reply.splitlines()[0]): + perfmon_nodes(reply, M_PMB_THREAD, M_PMB_LO_NODE) + if fullmatch(M_PMB_BM_HEADER, reply.splitlines()[0]): + perfmon_nodes(reply, M_PMB_THREAD, M_PMB_BM_NODE) + if fullmatch(M_PMB_PL_HEADER, reply.splitlines()[0]): + perfmon_nodes(reply, M_PMB_THREAD, M_PMB_PL_NODE) + if fullmatch(M_PMB_MB_HEADER, reply.splitlines()[0]): + perfmon_system(reply, M_PMB_MB) + + def show_version(self, reply): + """ + Parse the version output. + + Output format: + { + "name": "version", + "labels": { + "version": "v21.06-rc0~596-g1ca6c65e5~b1065", + }, + "value": 1.0, + }, + + :param reply: API reply. + :type reply: str + """ + for metric in self.serializer.metric_registry: + version = reply.split()[1] + item = dict() + labels = dict() + item[u"name"] = metric + labels[u"version"] = version + item[u"labels"] = labels + item[u"value"] = 1.0 + self.serializer.serialize( + metric=metric, labels=labels, item=item + ) diff --git a/resources/tools/telemetry/executor.py b/resources/tools/telemetry/executor.py new file mode 100644 index 0000000000..75db4b6a40 --- /dev/null +++ b/resources/tools/telemetry/executor.py @@ -0,0 +1,107 @@ +# Copyright (c) 2021 Cisco and/or its affiliates. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Config executor library.""" + +from importlib import import_module +from logging.config import dictConfig +from logging import getLogger +import sys + +from .parser import Parser +from .serializer import Serializer + + +class Executor: + """ + Executor class reponsible for executing configuration. + """ + def __init__(self, configuration_file): + """ + Config Executor init. + + :param configuration_file: Telemetry configuration file path. + :type configuration_file: str + """ + self.parser = Parser(configuration_file) + self.log = self.parser.config[u"logging"] + self.programs = self.parser.config[u"programs"] + self.scheduler = self.parser.config[u"scheduler"] + + dictConfig(self.log) + + def execute(self, hook=None): + """ + Main executor function will run programs from all bundles in a loop. + + Function call: + attach(duration) + fetch_data() + process_data() + detach() + + :param hook: Process ID or socket to attach. None by default. + :type hook: int + """ + for program in self.programs: + serializer = Serializer() + try: + package = program[u"name"] + name = f"telemetry.{package}" + package = package.replace("_", " ").title().replace(" ", "") + module = import_module( + name=name, + package=package + ) + bundle = getattr(module, package)( + program=program, + serializer=serializer, + hook=hook + ) + bundle.attach(duration=self.scheduler[u"duration"]) + bundle.fetch_data() + bundle.process_data() + bundle.detach() + except (ImportError, AttributeError) as exc: + raise ExecutorError( + f"Error executing bundle {package!r}! - {exc}" + ) + serializer.publish() + + def execute_daemon(self, hook=None): + """ + Daemon executor will execute endless loop. + + :param hook: Process ID to attach. None by default. + :type hook: int + """ + while True: + self.execute(hook=hook) + + +class ExecutorError(Exception): + """ + Creates a Executor Error Exception. This exception is supposed to handle + all the errors raised during executing. + """ + def __init__(self, message): + """ + Execute Error Excpetion init. + + :param message: Exception error message. + :type message: str + """ + super().__init__() + self.message = message + getLogger(__name__).error(message) + sys.exit(1) diff --git a/resources/tools/telemetry/metrics.py b/resources/tools/telemetry/metrics.py new file mode 100644 index 0000000000..e5a66b3e0c --- /dev/null +++ b/resources/tools/telemetry/metrics.py @@ -0,0 +1,619 @@ +# Copyright (c) 2021 Cisco and/or its affiliates. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Metric library.""" + +from collections import namedtuple +from threading import Lock +from time import monotonic +import re + + +class Value: + """ + A value storage protected by a mutex. + """ + def __init__(self): + """ + Initialize value to default and create a lock. + """ + self._value = 0.0 + self._lock = Lock() + self._timestamp = None + + def inc(self, amount): + """ + Increment value by amount under mutex. + Add a timestamp of capturing value. + + :param amount: Amount of increment. + :type amount: int or float + """ + with self._lock: + self._value += amount + self._timestamp = monotonic() + + def set(self, value): + """ + Set to a specific value under mutex. + Add a timestamp of capturing value. + + :param value: Amount of increment. + :type value: int or float + """ + with self._lock: + self._value = value + self._timestamp = monotonic() + + def get(self): + """ + Get a value under mutex. + + :returns: Stored value. + :rtype: int or float + """ + with self._lock: + return self._value + + def get_timestamp(self): + """ + Get a timestamp under mutex. + + :returns: Stored timestamp. + :rtype: str + """ + with self._lock: + return self._timestamp + + +class Metric: + """ + A single metric parent and its samples. + """ + def __init__(self, name, documentation, typ): + """ + Initialize class and do basic sanitize. + + :param name: Full metric name. + :param documentation: Metric HELP string. + :param typ: Metric type [counter|gauge|info]. + :type name: str + :type documentation: str + :type typ: str + """ + self.metric_types = ( + u"counter", u"gauge", u"info" + ) + self.metric_sample = namedtuple( + u"Sample", [u"name", u"labels", u"value", u"timestamp"] + ) + + if not re.compile(r"^[a-zA-Z_:][a-zA-Z0-9_:]*$").match(name): + raise ValueError(f"Invalid metric name: {name}!") + if typ not in self.metric_types: + raise ValueError(f"Invalid metric type: {typ}!") + + self.name = name + self.documentation = documentation + self.type = typ + self.samples = [] + + def add_sample(self, name, labels, value, timestamp): + """ + Add a sample (entry) to the metric. + + :param name: Full metric name. + :param labels: Metric labels. + :param value: Metric value. + :param timestamp: Timestamp. Default to be when accessed. + :type name: str + :type lables: tuple + :type value: int or float + :type timestamp: float + """ + self.samples.append( + self.metric_sample(name, labels, value, timestamp) + ) + + def __eq__(self, other): + """ + Check equality of added metric. + + :param other: Metric to compare. + :type other: Metric + """ + return (isinstance(other, Metric) + and self.name == other.name + and self.documentation == other.documentation + and self.type == other.type + and self.samples == other.samples) + + def __repr__(self): + """ + Represantation as a string for a debug print. + """ + return ( + f"Metric({self.name}, " + f"{self.documentation}, " + f"{self.type}, " + f"{self.samples})" + ) + + +class MetricBase: + """ + Abstract class for Metric implementation. + """ + _type = None + + def __init__( + self, name, documentation, labelnames=(), namespace="", + subsystem="", labelvalues=None, + ): + """ + Metric initialization. + + :param name: Metric name. + :param documentation: Metric HELP string. + :param labelnames: Metric label list. + :param namespace: Metric namespace (will be added as prefix). + :param subsystem: Metric susbsystem (will be added as prefix). + :param labelvalues: Metric label values. + :type name: str + :type documentation: str + :type labelnames: list + :type namespace: str + :type subsystem: str + :type labelvalues: list + """ + self._name = self.validate_name(name, namespace, subsystem) + self._labelnames = self.validate_labelnames(labelnames) + self._labelvalues = tuple(labelvalues or ()) + self._documentation = documentation + + if self._is_parent(): + self._lock = Lock() + self._metrics = {} + + if self._is_observable(): + self._metric_init() + + @staticmethod + def validate_name(name, namespace, subsystem): + """ + Construct metric full name and validate naming convention. + + :param name: Metric name. + :param namespace: Metric namespace (will be added as prefix). + :param subsystem: Metric susbsystem (will be added as prefix). + :type name: str + :type namespace: str + :type subsystem: str + :returns: Metric full name. + :rtype: str + :rasies ValueError: If name does not conform with naming conventions. + """ + full_name = u"" + full_name += f"{namespace}_" if namespace else u"" + full_name += f"{subsystem}_" if subsystem else u"" + full_name += name + + if not re.compile(r"^[a-zA-Z_:][a-zA-Z0-9_:]*$").match(full_name): + raise ValueError( + f"Invalid metric name: {full_name}!" + ) + return full_name + + @staticmethod + def validate_labelnames(labelnames): + """ + Create label tuple and validate naming convention. + + :param labelnames: Metric label list. + :type labelnames: list + :returns: Label names. + :rtype: tuple + :rasies ValueError: If name does not conform with naming conventions. + """ + labelnames = tuple(labelnames) + for label in labelnames: + if not re.compile(r"^[a-zA-Z_][a-zA-Z0-9_]*$").match(label): + raise ValueError(f"Invalid label metric name: {label}!") + if re.compile(r"^__.*$").match(label): + raise ValueError(f"Reserved label metric name: {label}!") + return labelnames + + def _is_observable(self): + """ + Check whether this metric is observable, i.e. + * a metric without label names and values, or + * the child of a labelled metric. + + :return: Observable + :rtype: bool + """ + return not self._labelnames or (self._labelnames and self._labelvalues) + + def _is_parent(self): + """ + Check whether metric is parent, i.e. + * a metric with label names but not its values. + + :return: Parent + :rtype: bool + """ + return self._labelnames and not self._labelvalues + + def _get_metric(self): + """ + Returns metric that will handle samples. + + :returns: Metric object. + :rtype: Metric + """ + return Metric(self._name, self._documentation, self._type) + + def describe(self): + """ + Returns metric that will handle samples. + + :returns: List of metric objects. + :rtype: list + """ + return [self._get_metric()] + + def collect(self): + """ + Returns metric with samples. + + :returns: List with metric object. + :rtype: list + """ + metric = self._get_metric() + for suffix, labels, value, timestamp in self.samples(): + metric.add_sample(self._name + suffix, labels, value, timestamp) + return [metric] + + def labels(self, *labelvalues, **labelkwargs): + """ + Return the child for the given labelset. + + :param labelvalues: Label values. + :param labelkwargs: Dictionary with label names and values. + :type labelvalues: list + :type labelkwargs: dict + :returns: Metric with labels and values. + :rtype: Metric + :raises ValueError: If labels were not initialized. + :raises ValueError: If labels are already set (chaining). + :raises ValueError: If both parameters are passed. + :raises ValueError: If label values are not matching label names. + """ + if not self._labelnames: + raise ValueError( + f"No label names were set when constructing {self}!" + ) + + if self._labelvalues: + raise ValueError( + f"{self} already has labels set; can not chain .labels() calls!" + ) + + if labelvalues and labelkwargs: + raise ValueError( + u"Can't pass both *args and **kwargs!" + ) + + if labelkwargs: + if sorted(labelkwargs) != sorted(self._labelnames): + raise ValueError(u"Incorrect label names!") + labelvalues = tuple(labelkwargs[l] for l in self._labelnames) + else: + if len(labelvalues) != len(self._labelnames): + raise ValueError(u"Incorrect label count!") + labelvalues = tuple(l for l in labelvalues) + with self._lock: + if labelvalues not in self._metrics: + self._metrics[labelvalues] = self.__class__( + self._name, + documentation=self._documentation, + labelnames=self._labelnames, + labelvalues=labelvalues + ) + return self._metrics[labelvalues] + + def samples(self): + """ + Returns samples wheter an object is parent or child. + + :returns: List of Metric objects with values. + :rtype: list + """ + if self._is_parent(): + return self._multi_samples() + return self._child_samples() + + def _multi_samples(self): + """ + Returns parent and its childs with its values. + + :returns: List of Metric objects with values. + :rtype: list + """ + with self._lock: + metrics = self._metrics.copy() + for labels, metric in metrics.items(): + series_labels = list(zip(self._labelnames, labels)) + for suffix, sample_labels, value, timestamp in metric.samples(): + yield ( + suffix, dict(series_labels + list(sample_labels.items())), + value, timestamp + ) + + def _child_samples(self): + """ + Returns child with its values. Should be implemented by child class. + + :raises NotImplementedError: If implementation in not in subclass. + """ + raise NotImplementedError( + f"_child_samples() must be implemented by {self}!" + ) + + def _metric_init(self): + """ + Initialize the metric object as a child. + + :raises NotImplementedError: If implementation in not in subclass. + """ + raise NotImplementedError( + f"_metric_init() must be implemented by {self}!" + ) + + def __str__(self): + """ + String for a debug print. + """ + return f"{self._type}:{self._name}" + + def __repr__(self): + """ + Represantation as a string for a debug print. + """ + metric_type = type(self) + return f"{metric_type.__module__}.{metric_type.__name__}({self._name})" + + +class Counter(MetricBase): + """ + A Counter tracks counts of events or running totals. + """ + _type = u"counter" + + def __init__(self, + name, + documentation, + labelnames=(), + namespace=u"", + subsystem=u"", + labelvalues=None + ): + """ + Initialize the Counter metric object. + + :param name: Metric name. + :param documentation: Metric HELP string. + :param labelnames: Metric label list. + :param namespace: Metric namespace (will be added as prefix). + :param subsystem: Metric susbsystem (will be added as prefix). + :param labelvalues: Metric label values. + :type name: str + :type documentation: str + :type labelnames: list + :type namespace: str + :type subsystem: str + :type labelvalues: list + """ + super(Counter, self).__init__( + name=name, + documentation=documentation, + labelnames=labelnames, + namespace=namespace, + subsystem=subsystem, + labelvalues=labelvalues, + ) + + def _metric_init(self): + """ + Initialize counter value. + """ + self._value = Value() + + def inc(self, amount=1): + """ + Increment counter by the given amount. + + :param amount: Amount to increment. + :type amount: int or float + :raises ValueError: If amout is not positive. + """ + if amount < 0: + raise ValueError( + u"Counters can only be incremented by non-negative amounts." + ) + self._value.inc(amount) + + def _child_samples(self): + """ + Returns list of child samples. + + :returns: List of child samples. + :rtype: tuple + """ + return ((u"", {}, self._value.get(), self._value.get_timestamp()),) + + +class Gauge(MetricBase): + """ + Gauge metric, to report instantaneous values. + """ + _type = u"gauge" + + def __init__(self, + name, + documentation, + labelnames=(), + namespace=u"", + subsystem=u"", + labelvalues=None + ): + """ + Initialize the Gauge metric object. + + :param name: Metric name. + :param documentation: Metric HELP string. + :param labelnames: Metric label list. + :param namespace: Metric namespace (will be added as prefix). + :param subsystem: Metric susbsystem (will be added as prefix). + :param labelvalues: Metric label values. + :type name: str + :type documentation: str + :type labelnames: list + :type namespace: str + :type subsystem: str + :type labelvalues: list + """ + super(Gauge, self).__init__( + name=name, + documentation=documentation, + labelnames=labelnames, + namespace=namespace, + subsystem=subsystem, + labelvalues=labelvalues, + ) + + def _metric_init(self): + """ + Initialize gauge value. + """ + self._value = Value() + + def inc(self, amount=1): + """ + Increment gauge by the given amount. + + :param amount: Amount to increment. + :type amount: int or float + """ + self._value.inc(amount) + + def dec(self, amount=1): + """ + Decrement gauge by the given amount. + + :param amount: Amount to decrement. + :type amount: int or float + """ + self._value.inc(-amount) + + def set(self, value): + """ + Set gauge to the given value. + + :param amount: Value to set. + :type amount: int or float + """ + self._value.set(float(value)) + + def _child_samples(self): + """ + Returns list of child samples. + + :returns: List of child samples. + :rtype: tuple + """ + return ((u"", {}, self._value.get(), self._value.get_timestamp()),) + + +class Info(MetricBase): + """ + Info metric, key-value pairs. + """ + _type = u"info" + + def __init__(self, + name, + documentation, + labelnames=(), + namespace=u"", + subsystem=u"", + labelvalues=None + ): + """ + Initialize the Info metric object. + + :param name: Metric name. + :param documentation: Metric HELP string. + :param labelnames: Metric label list. + :param namespace: Metric namespace (will be added as prefix). + :param subsystem: Metric susbsystem (will be added as prefix). + :param labelvalues: Metric label values. + :type name: str + :type documentation: str + :type labelnames: list + :type namespace: str + :type subsystem: str + :type labelvalues: list + """ + super(Info, self).__init__( + name=name, + documentation=documentation, + labelnames=labelnames, + namespace=namespace, + subsystem=subsystem, + labelvalues=labelvalues, + ) + + def _metric_init(self): + """ + Initialize gauge value and time it was created. + """ + self._labelname_set = set(self._labelnames) + self._lock = Lock() + self._value = {} + + def info(self, value): + """ + Set info to the given value. + + :param amount: Value to set. + :type amount: int or float + :raises ValueError: If lables are overlapping. + """ + if self._labelname_set.intersection(value.keys()): + raise ValueError( + u"Overlapping labels for Info metric, " + f"metric: {self._labelnames} child: {value}!" + ) + with self._lock: + self._value = dict(value) + + def _child_samples(self): + """ + Returns list of child samples. + + :returns: List of child samples. + :rtype: tuple + """ + with self._lock: + return ((u"_info", self._value, 1.0, monotonic()),) diff --git a/resources/tools/telemetry/parser.py b/resources/tools/telemetry/parser.py new file mode 100644 index 0000000000..c6cc167066 --- /dev/null +++ b/resources/tools/telemetry/parser.py @@ -0,0 +1,107 @@ +# Copyright (c) 2021 Cisco and/or its affiliates. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Configuration parsing library.""" + +from logging import getLogger +from pathlib import Path +import sys + +from yaml import safe_load, YAMLError + + +class Parser: + """ + Parser class reponsible for loading configuration. + """ + def __init__(self, configuration_file): + """ + Config Parser init. + + :param configuration_file: Telemetry configuration file path. + :type configuration_file: str + """ + self.instance = None + self.config = None + self.suffix = Path(configuration_file).suffix[1:].capitalize() + + try: + self.instance = globals()[self.suffix+"Loader"](configuration_file) + except KeyError: + raise ParserError(u"Unsupported file format!") + + self.config = FileLoader(self.instance).load() + + +class FileLoader: + """ + Creates a File Loader object. This is the main object for interacting + with configuration file. + """ + def __init__(self, loader): + """ + File Loader class init. + + :param loader: Loader object responsible for handling file type. + :type loader: obj + """ + self.loader = loader + + def load(self): + """ + File format parser. + """ + return self.loader.load() + + +class YamlLoader: + """ + Creates a YAML Loader object. This is the main object for interacting + with YAML file. + """ + def __init__(self, configuration_file): + """ + YAML Loader class init. + + :param configuration_file: YAML configuration file path. + :type configuration_file: str + """ + self.configuration_file = configuration_file + + def load(self): + """ + YAML format parser. + """ + with open(self.configuration_file, u"r") as stream: + try: + return safe_load(stream) + except YAMLError as exc: + raise ParserError(str(exc)) + + +class ParserError(Exception): + """ + Creates a Parser Error Exception. This exception is supposed to handle + all the errors raised during processing. + """ + def __init__(self, message): + """ + Parser Error Excpetion init. + + :param message: Exception error message. + :type message: str + """ + super().__init__() + self.message = message + getLogger(__name__).error(self.message) + sys.exit(1) diff --git a/resources/tools/telemetry/serializer.py b/resources/tools/telemetry/serializer.py new file mode 100644 index 0000000000..e8315c832d --- /dev/null +++ b/resources/tools/telemetry/serializer.py @@ -0,0 +1,110 @@ +# Copyright (c) 2021 Cisco and/or its affiliates. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Config executor library.""" + +from importlib import import_module +from logging import getLogger + + +class Serializer: + """ + Executor class reponsible for executing configuration. + """ + def __init__(self): + """ + Config Executor init.= + """ + self.metric_registry = dict() + + def create(self, metrics): + """ + Create metrics based on input configuration. + + :param metrics: Metric list to create. + :type metrics: list + """ + for metric_type, metric_list in metrics.items(): + for metric in metric_list: + module = import_module( + name=u"telemetry.metrics", package=metric_type.capitalize() + ) + self.metric_registry[metric[u"name"]] = getattr( + module, metric_type.capitalize() + )(**metric) + + def serialize(self, metric, labels, item): + """ + Serialize metric into destination format. + + :param metrics: Metric name. + :param labels: Metric labels. + :param item: Metric dict. + :type metrics: str + :type labels: dict + :type item: dict + """ + if type(self.metric_registry[metric]).__name__ == u"Counter": + self.metric_registry[metric].labels(**labels).inc( + float(item[u"value"]) + ) + if type(self.metric_registry[metric]).__name__ == u"Gauge": + self.metric_registry[metric].labels(**labels).set( + float(item[u"value"]) + ) + if type(self.metric_registry[metric]).__name__ == u"Info": + self.metric_registry[metric].labels(**labels).info( + item[u"value"] + ) + + def publish(self): + """ + Publish metric into logger. + """ + output = [] + for _, metric_list in self.metric_registry.items(): + for metric in metric_list.collect(): + mname = metric.name + mtype = metric.type + + # Adjust from OpenMetrics into Prometheus format. + mname = f"{mname}_total" if mtype == u"counter" else mname + mname = f"{mname}_info" if mtype == u"info" else mname + if mtype in (u"info", u"stateset"): + mtype = u"gauge" + if mtype in (u"gaugehistogram", u"histogram"): + mtype = u"histogram" + + mdocumentation = metric.documentation.replace(u"\\", r"\\") + mdocumentation = mdocumentation.replace(u"\n", r"\n") + output.append(f"# HELP {mname} {mdocumentation}\n") + output.append(f"# TYPE {mname} {mtype}\n") + + for line in metric.samples: + if line.labels: + llabel = [] + for k, value in sorted(line.labels.items()): + value = value.replace(u"\\", r"\\") + value = value.replace(u"\n", r"\n") + value = value.replace(u'"', r'\"') + llabel.append(f'{k}="{value}"') + labelstr = f"{{{','.join(llabel)}}}" + else: + labelstr = u"" + + timestamp = f" {int(float(line.timestamp) * 1000):d}" \ + if line.timestamp else u"" + output.append( + f"{line.name}{labelstr} {line.value}{timestamp}\n" + ) + getLogger(u"prom").info(u"".join(output)) |