From d255d2545ee6cdc871bc35314fad72c3c48b225b Mon Sep 17 00:00:00 2001 From: pmikus Date: Mon, 19 Apr 2021 12:22:20 +0000 Subject: Framework: Telemetry retake Signed-off-by: pmikus Change-Id: I2f019a083916aec9f7816266f6ad5b92dcc31fa0 --- resources/libraries/python/Constants.py | 20 +++- resources/libraries/python/ContainerUtils.py | 3 + resources/libraries/python/PerfUtil.py | 82 ------------- resources/libraries/python/TelemetryUtil.py | 130 +++++++++++++++++++++ .../robot/performance/performance_actions.robot | 81 ++++++++----- .../robot/performance/performance_utils.robot | 96 ++------------- 6 files changed, 210 insertions(+), 202 deletions(-) delete mode 100644 resources/libraries/python/PerfUtil.py create mode 100644 resources/libraries/python/TelemetryUtil.py (limited to 'resources/libraries') diff --git a/resources/libraries/python/Constants.py b/resources/libraries/python/Constants.py index be9fe34915..f6e4f58006 100644 --- a/resources/libraries/python/Constants.py +++ b/resources/libraries/python/Constants.py @@ -133,24 +133,30 @@ class Constants: # python scripts location RESOURCES_LIB_PY = u"resources/libraries/python" + # shell scripts location + RESOURCES_TOOLS = u"resources/tools" + # Python API provider location RESOURCES_PAPI_PROVIDER = u"resources/tools/papi/vpp_papi_provider.py" + # Templates location + RESOURCES_TPL = u"resources/templates" + # vat templates location RESOURCES_TPL_VAT = u"resources/templates/vat" # Kubernetes templates location RESOURCES_TPL_K8S = u"resources/templates/kubernetes" - # Templates location - RESOURCES_TPL = u"resources/templates" - # Container templates location RESOURCES_TPL_CONTAINER = u"resources/templates/container" # VPP Communications Library templates location RESOURCES_TPL_VCL = u"resources/templates/vcl" + # VPP Communications Library templates location + RESOURCES_TPL_TELEMETRY = u"resources/templates/telemetry" + # VPP Communications Library LD_PRELOAD library VCL_LDPRELOAD_LIBRARY = u"/usr/lib/x86_64-linux-gnu/libvcl_ldpreload.so" @@ -202,8 +208,12 @@ class Constants: # TRex set number of RX/TX descriptors # Set to 0 to use default values - TREX_TX_DESCRIPTORS_COUNT = get_int_from_env(u"TREX_TX_DESCRIPTORS_COUNT", 0) - TREX_RX_DESCRIPTORS_COUNT = get_int_from_env(u"TREX_RX_DESCRIPTORS_COUNT", 0) + TREX_TX_DESCRIPTORS_COUNT = get_int_from_env( + u"TREX_TX_DESCRIPTORS_COUNT", 0 + ) + TREX_RX_DESCRIPTORS_COUNT = get_int_from_env( + u"TREX_RX_DESCRIPTORS_COUNT", 0 + ) # Trex force start regardless ports state TREX_SEND_FORCE = get_pessimistic_bool_from_env(u"TREX_SEND_FORCE") diff --git a/resources/libraries/python/ContainerUtils.py b/resources/libraries/python/ContainerUtils.py index f3b92a8b1f..75acf0027a 100644 --- a/resources/libraries/python/ContainerUtils.py +++ b/resources/libraries/python/ContainerUtils.py @@ -700,6 +700,7 @@ class ContainerEngine: vpp_config.add_buffers_per_numa(215040) vpp_config.add_plugin(u"disable", u"default") vpp_config.add_plugin(u"enable", u"memif_plugin.so") + vpp_config.add_plugin(u"enable", u"perfmon_plugin.so") vpp_config.add_main_heap_size(u"2G") vpp_config.add_main_heap_page_size(u"2M") vpp_config.add_statseg_size(u"2G") @@ -736,6 +737,7 @@ class ContainerEngine: vpp_config.add_dpdk_no_tx_checksum_offload() vpp_config.add_dpdk_dev_default_rxq(rxq) vpp_config.add_plugin(u"enable", u"dpdk_plugin.so") + vpp_config.add_plugin(u"enable", u"perfmon_plugin.so") # Apply configuration self.execute(u"mkdir -p /etc/vpp/") @@ -753,6 +755,7 @@ class ContainerEngine: vpp_config.add_plugin(u"enable", u"crypto_native_plugin.so") vpp_config.add_plugin(u"enable", u"crypto_ipsecmb_plugin.so") vpp_config.add_plugin(u"enable", u"crypto_openssl_plugin.so") + vpp_config.add_plugin(u"enable", u"perfmon_plugin.so") # Apply configuration self.execute(u"mkdir -p /etc/vpp/") diff --git a/resources/libraries/python/PerfUtil.py b/resources/libraries/python/PerfUtil.py deleted file mode 100644 index 6444cc595f..0000000000 --- a/resources/libraries/python/PerfUtil.py +++ /dev/null @@ -1,82 +0,0 @@ -# Copyright (c) 2020 Cisco and/or its affiliates. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at: -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Linux perf utility.""" - -from resources.libraries.python.Constants import Constants -from resources.libraries.python.OptionString import OptionString -from resources.libraries.python.ssh import exec_cmd -from resources.libraries.python.topology import NodeType - -__all__ = [u"PerfUtil"] - - -class PerfUtil: - """Class contains methods for perf utility.""" - - @staticmethod - def perf_stat(node, cpu_list=None, duration=1): - """Get perf stat read for duration. - - :param node: Node in the topology. - :param cpu_list: CPU List as a string separated by comma. - :param duration: Measure time in seconds. - :type node: dict - :type cpu_list: str - :type duration: int - """ - if cpu_list: - cpu_list = list(dict.fromkeys(cpu_list.split(u","))) - cpu_list = ",".join(str(cpu) for cpu in cpu_list) - - cmd_opts = OptionString(prefix=u"--") - cmd_opts.add(u"no-aggr") - cmd_opts.add_with_value_if( - u"cpu", cpu_list, cpu_list - ) - cmd_opts.add_if( - u"all-cpus", not(cpu_list) - ) - cmd_opts.add_with_value_if( - u"event", f"'{{{Constants.PERF_STAT_EVENTS}}}'", - Constants.PERF_STAT_EVENTS - ) - cmd_opts.add_with_value( - u"interval-print", 1000 - ) - cmd_opts.add_with_value( - u"field-separator", u"';'" - ) - - cmd_base = OptionString() - cmd_base.add(f"perf stat") - cmd_base.extend(cmd_opts) - cmd_base.add(u"--") - cmd_base.add_with_value(u"sleep", int(duration)) - - exec_cmd(node, cmd_base, sudo=True) - - @staticmethod - def perf_stat_on_all_duts(nodes, cpu_list=None, duration=1): - """Get perf stat read for duration on all DUTs. - - :param nodes: Nodes in the topology. - :param cpu_list: CPU List. - :param duration: Measure time in seconds. - :type nodes: dict - :type cpu_list: str - :type duration: int - """ - for node in nodes.values(): - if node[u"type"] == NodeType.DUT: - PerfUtil.perf_stat(node, cpu_list=cpu_list, duration=duration) diff --git a/resources/libraries/python/TelemetryUtil.py b/resources/libraries/python/TelemetryUtil.py new file mode 100644 index 0000000000..3031647915 --- /dev/null +++ b/resources/libraries/python/TelemetryUtil.py @@ -0,0 +1,130 @@ +# Copyright (c) 2021 Cisco and/or its affiliates. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Linux perf utility.""" + +from resources.libraries.python.Constants import Constants +from resources.libraries.python.OptionString import OptionString +from resources.libraries.python.ssh import exec_cmd, exec_cmd_no_error +from resources.libraries.python.topology import NodeType + +__all__ = [u"TelemetryUtil"] + + +class TelemetryUtil: + """Class contains methods for telemetry utility.""" + + @staticmethod + def perf_stat(node, cpu_list=None, duration=1): + """Get perf stat read for duration. + + :param node: Node in the topology. + :param cpu_list: CPU List as a string separated by comma. + :param duration: Measure time in seconds. + :type node: dict + :type cpu_list: str + :type duration: int + """ + if cpu_list: + cpu_list = list(dict.fromkeys(cpu_list.split(u","))) + cpu_list = ",".join(str(cpu) for cpu in cpu_list) + + cmd_opts = OptionString(prefix=u"--") + cmd_opts.add(u"no-aggr") + cmd_opts.add_with_value_if( + u"cpu", cpu_list, cpu_list + ) + cmd_opts.add_if( + u"all-cpus", not(cpu_list) + ) + cmd_opts.add_with_value_if( + u"event", f"'{{{Constants.PERF_STAT_EVENTS}}}'", + Constants.PERF_STAT_EVENTS + ) + cmd_opts.add_with_value( + u"interval-print", 1000 + ) + cmd_opts.add_with_value( + u"field-separator", u"';'" + ) + + cmd_base = OptionString() + cmd_base.add(f"perf stat") + cmd_base.extend(cmd_opts) + cmd_base.add(u"--") + cmd_base.add_with_value(u"sleep", int(duration)) + + exec_cmd(node, cmd_base, sudo=True) + + @staticmethod + def perf_stat_on_all_duts(nodes, cpu_list=None, duration=1): + """Get perf stat read for duration on all DUTs. + + :param nodes: Nodes in the topology. + :param cpu_list: CPU List. + :param duration: Measure time in seconds. + :type nodes: dict + :type cpu_list: str + :type duration: int + """ + for node in nodes.values(): + if node[u"type"] == NodeType.DUT: + TelemetryUtil.perf_stat( + node, cpu_list=cpu_list, duration=duration + ) + + @staticmethod + def run_telemetry(node, profile, hook=None): + """Get telemetry stat read for duration. + + :param node: Node in the topology. + :param profile: Telemetry configuration profile. + :param hook: Process ID or socket path (optional). + :type node: dict + :type profile: str + :type hook: str + """ + config = u"" + config += f"{Constants.REMOTE_FW_DIR}/" + config += f"{Constants.RESOURCES_TPL_TELEMETRY}/" + config += f"{profile}" + + cd_cmd = u"" + cd_cmd += f"sh -c \"cd {Constants.REMOTE_FW_DIR}/" + cd_cmd += f"{Constants.RESOURCES_TOOLS}" + + bin_cmd = f"python3 -m telemetry --config {config} --hook {hook}\"" + + exec_cmd_no_error(node, f"{cd_cmd} && {bin_cmd}", sudo=True) + exec_cmd_no_error(node, f"cat /tmp/metric.prom", sudo=True) + + @staticmethod + def run_telemetry_on_all_duts(nodes, profile): + """Get telemetry stat read on all DUTs. + + :param nodes: Nodes in the topology. + :param profile: Telemetry configuration profile. + :param hooks: Dict of Process IDs or socket paths (optional). + :type nodes: dict + :type profile: str + :type hooks: dict + """ + for node in nodes.values(): + if node[u"type"] == NodeType.DUT: + try: + for socket in node[u"sockets"][u"PAPI"].values(): + TelemetryUtil.run_telemetry( + node, profile=profile, hook=socket + ) + except IndexError: + pass diff --git a/resources/libraries/robot/performance/performance_actions.robot b/resources/libraries/robot/performance/performance_actions.robot index c28b2ebdc8..bb6881ef7b 100644 --- a/resources/libraries/robot/performance/performance_actions.robot +++ b/resources/libraries/robot/performance/performance_actions.robot @@ -26,23 +26,66 @@ | | Run Keyword If | ${extended_debug}==${True} | | ... | Perf Stat On All DUTs | ${nodes} | cpu_list=${cpu_alloc_str} -| Additional Statistics Action For clear-show-runtime-with-traffic +| Additional Statistics Action For vpp-runtime | | [Documentation] | | ... | Additional Statistics Action for clear and show runtime counters with | | ... | running traffic. | | | | ... | See documentation of the called keyword for required test variables. | | -| | Clear and show runtime counters with running traffic +| | ${ppta} = | Get Packets Per Transaction Aggregated +| | ${ramp_up_duration} = | Get Ramp Up Duration +| | ${ramp_up_rate} = | Get Ramp Up Rate +| | ${runtime_duration} = | Get Runtime Duration +| | ${runtime_rate} = | Get Runtime Rate +| | ${traffic_directions} = | Get Traffic Directions +| | ${transaction_duration} = | Get Transaction Duration +| | ${transaction_scale} = | Get Transaction Scale +| | ${transaction_type} = | Get Transaction Type +| | ${use_latency} = | Get Use Latency +| | Send traffic on tg +| | ... | duration=${-1} +| | ... | rate=${runtime_rate} +| | ... | frame_size=${frame_size} +| | ... | traffic_profile=${traffic_profile} +| | ... | async_call=${True} +| | ... | ppta=${ppta} +| | ... | use_latency=${use_latency} +| | ... | traffic_directions=${traffic_directions} +| | ... | transaction_duration=${transaction_duration} +| | ... | transaction_scale=${transaction_scale} +| | ... | transaction_type=${transaction_type} +| | ... | duration_limit=${0.0} +| | ... | ramp_up_duration=${ramp_up_duration} +| | ... | ramp_up_rate=${ramp_up_rate} +| | Run Telemetry On All DUTs +| | ... | ${nodes} | profile=vpp_runtime.yaml +| | Stop traffic on tg -| Additional Statistics Action For clear-show-runtime-with-iperf3 +| Additional Statistics Action For vpp-runtime-iperf3 | | [Documentation] | | ... | Additional Statistics Action for clear and show runtime counters with | | ... | iPerf3 running traffic. | | | | ... | See documentation of the called keyword for required test variables. | | -| | Clear and show runtime counters with running iperf3 +| | ${runtime_duration} = | Get Runtime Duration +| | ${pids}= | iPerf Client Start Remote Exec +| | | ... | ${nodes['${iperf_client_node}']} +| | | ... | duration=${-1} +| | | ... | rate=${None} +| | | ... | frame_size=${None} +| | | ... | async_call=True +| | | ... | warmup_time=0 +| | | ... | traffic_directions=${1} +| | | ... | namespace=${iperf_client_namespace} +| | | ... | udp=${iperf_client_udp} +| | | ... | host=${iperf_server_bind} +| | | ... | bind=${iperf_client_bind} +| | | ... | affinity=${iperf_client_affinity} +| | Run Telemetry On All DUTs +| | ... | ${nodes} | profile=vpp_runtime.yaml +| | iPerf Client Stop Remote Exec | ${nodes['${iperf_client_node}']} | ${pids} | Additional Statistics Action For noop | | [Documentation] @@ -50,23 +93,12 @@ | | | | No operation -| Additional Statistics Action For vpp-clear-runtime -| | [Documentation] -| | ... | Additional Statistics Action for clear VPP runtime. -| | -| | VPP Clear Runtime On All DUTs | ${nodes} - | Additional Statistics Action For vpp-clear-stats | | [Documentation] | | ... | Additional Statistics Action for clear VPP statistics. | | -| | Clear Statistics On All DUTs | ${nodes} - -| Additional Statistics Action For vpp-enable-elog -| | [Documentation] -| | ... | Additional Statistics Action for enable VPP elog trace. -| | -| | VPP Enable Elog Traces On All DUTs | ${nodes} +| | Run Telemetry On All DUTs +| | ... | ${nodes} | profile=vpp_clear_stats.yaml | Additional Statistics Action For vpp-enable-packettrace | | [Documentation] @@ -75,12 +107,6 @@ | | Run Keyword If | ${extended_debug}==${True} | | ... | VPP Enable Traces On All DUTs | ${nodes} | fail_on_error=${False} -| Additional Statistics Action For vpp-show-elog -| | [Documentation] -| | ... | Additional Statistics Action for show VPP elog trace. -| | -| | Show Event Logger On All DUTs | ${nodes} - | Additional Statistics Action For vpp-show-packettrace | | [Documentation] | | ... | Additional Statistics Action for show VPP packet trace. @@ -88,14 +114,9 @@ | | Run Keyword If | ${extended_debug}==${True} | | ... | Show Packet Trace On All Duts | ${nodes} | maximum=${100} -| Additional Statistics Action For vpp-show-runtime -| | [Documentation] -| | ... | Additional Statistics Action for show VPP runtime. -| | -| | VPP Show Runtime On All DUTs | ${nodes} - | Additional Statistics Action For vpp-show-stats | | [Documentation] | | ... | Additional Statistics Action for show VPP statistics. | | -| | Show Statistics On All DUTs | ${nodes} +| | Run Telemetry On All DUTs +| | ... | ${nodes} | profile=vpp_show_stats.yaml diff --git a/resources/libraries/robot/performance/performance_utils.robot b/resources/libraries/robot/performance/performance_utils.robot index ef268ff663..8f29d975a1 100644 --- a/resources/libraries/robot/performance/performance_utils.robot +++ b/resources/libraries/robot/performance/performance_utils.robot @@ -15,9 +15,9 @@ | Library | Collections | Library | resources.libraries.python.topology.Topology | Library | resources.libraries.python.NodePath -| Library | resources.libraries.python.PerfUtil | Library | resources.libraries.python.InterfaceUtil | Library | resources.libraries.python.Iperf3 +| Library | resources.libraries.python.TelemetryUtil | Library | resources.libraries.python.TrafficGenerator | Library | resources.libraries.python.TrafficGenerator.OptimizedSearch | Library | resources.libraries.python.TrafficGenerator.TGDropRateSearchImpl @@ -38,53 +38,6 @@ | ${heap_size_mult}= | ${1} *** Keywords *** -| Clear and show runtime counters with running traffic -| | [Documentation] -| | ... | Start traffic at specified rate then clear runtime counters on all -| | ... | DUTs. Wait for specified amount of time and capture runtime counters -| | ... | on all DUTs. Finally stop traffic. -| | -| | ... | TODO: Support resetter if this is not the first trial-ish action? -| | -| | ... | *Example:* -| | -| | ... | \| Clear and show runtime counters with running traffic \| -| | -| | ${ppta} = | Get Packets Per Transaction Aggregated -| | ${ramp_up_duration} = | Get Ramp Up Duration -| | ${ramp_up_rate} = | Get Ramp Up Rate -| | ${runtime_duration} = | Get Runtime Duration -| | ${runtime_rate} = | Get Runtime Rate -| | ${traffic_directions} = | Get Traffic Directions -| | ${transaction_duration} = | Get Transaction Duration -| | ${transaction_scale} = | Get Transaction Scale -| | ${transaction_type} = | Get Transaction Type -| | ${use_latency} = | Get Use Latency -| | # Duration of -1 means we will stop traffic manually. -| | Send traffic on tg -| | ... | duration=${-1} -| | ... | rate=${runtime_rate} -| | ... | frame_size=${frame_size} -| | ... | traffic_profile=${traffic_profile} -| | ... | async_call=${True} -| | ... | ppta=${ppta} -| | ... | use_latency=${use_latency} -| | ... | traffic_directions=${traffic_directions} -| | ... | transaction_duration=${transaction_duration} -| | ... | transaction_scale=${transaction_scale} -| | ... | transaction_type=${transaction_type} -| | ... | duration_limit=${0.0} -| | ... | ramp_up_duration=${ramp_up_duration} -| | ... | ramp_up_rate=${ramp_up_rate} -| | FOR | ${action} | IN | @{pre_run_stats} -| | | Run Keyword | Additional Statistics Action For ${action} -| | END -| | Sleep | ${runtime_duration} -| | FOR | ${action} | IN | @{post_run_stats} -| | | Run Keyword | Additional Statistics Action For ${action} -| | END -| | Stop traffic on tg - | Find critical load using PLRsearch | | [Documentation] | | ... | Find boundaries for troughput (of hardcoded target loss ratio) @@ -435,7 +388,10 @@ | | ${transaction_scale} = | Get Transaction Scale | | ${transaction_type} = | Get Transaction Type | | Set Test Variable | \${rate_for_teardown} | ${rate} -| | FOR | ${action} | IN | @{pre_stats} +| | FOR | ${action} | IN | @{stat_runtime} +| | | Run Keyword | Additional Statistics Action For ${action} +| | END +| | FOR | ${action} | IN | @{stat_pre_trial} | | | Run Keyword | Additional Statistics Action For ${action} | | END | | ${results} = | Create List @@ -460,44 +416,11 @@ | | | # the approximated receive rate is the best estimate we have. | | | Append To List | ${results} | ${result.approximated_receive_rate} | | END -| | FOR | ${action} | IN | @{post_stats} +| | FOR | ${action} | IN | @{stat_post_trial} | | | Run Keyword | Additional Statistics Action For ${action} | | END | | Return From Keyword | ${results} -| Clear and show runtime counters with running iperf3 -| | [Documentation] -| | ... | Start traffic at specified rate then clear runtime counters on all -| | ... | DUTs. Wait for specified amount of time and capture runtime counters -| | ... | on all DUTs. Finally stop traffic. -| | -| | ... | *Example:* -| | -| | ... | \| Clear and show runtime counters with running traffic \| -| | -| | ${runtime_duration} = | Get Runtime Duration -| | ${pids}= | iPerf Client Start Remote Exec -| | | ... | ${nodes['${iperf_client_node}']} -| | | ... | duration=${-1} -| | | ... | rate=${None} -| | | ... | frame_size=${None} -| | | ... | async_call=True -| | | ... | warmup_time=0 -| | | ... | traffic_directions=${1} -| | | ... | namespace=${iperf_client_namespace} -| | | ... | udp=${iperf_client_udp} -| | | ... | host=${iperf_server_bind} -| | | ... | bind=${iperf_client_bind} -| | | ... | affinity=${iperf_client_affinity} -| | FOR | ${action} | IN | @{pre_run_stats} -| | | Run Keyword | Additional Statistics Action For ${action} -| | END -| | Sleep | ${runtime_duration} -| | FOR | ${action} | IN | @{post_run_stats} -| | | Run Keyword | Additional Statistics Action For ${action} -| | END -| | iPerf Client Stop Remote Exec | ${nodes['${iperf_client_node}']} | ${pids} - | Traffic should pass with maximum rate on iPerf3 | | [Documentation] | | ... | Send traffic at maximum rate on iPerf3. @@ -592,7 +515,10 @@ | | ${pre_stats}= | Create List | | ... | clear-show-runtime-with-iperf3 | | ... | vpp-clear-stats | vpp-enable-packettrace | vpp-enable-elog -| | FOR | ${action} | IN | @{pre_stats} +| | FOR | ${action} | IN | @{stat_runtime} +| | | Run Keyword | Additional Statistics Action For ${action} +| | END +| | FOR | ${action} | IN | @{stat_pre_trial} | | | Run Keyword | Additional Statistics Action For ${action} | | END | | ${results} = | Create List @@ -616,7 +542,7 @@ | | | Append To List | | | ... | ${results} | ${conv} | | END -| | FOR | ${action} | IN | @{post_stats} +| | FOR | ${action} | IN | @{stat_post_trial} | | | Run Keyword | Additional Statistics Action For ${action} | | END | | Return From Keyword | ${results} -- cgit 1.2.3-korg