From 3ff919f8eb9fa7eb98887f029be7f817de7a1303 Mon Sep 17 00:00:00 2001 From: Peter Mikus Date: Tue, 19 Nov 2019 12:00:57 +0000 Subject: Telemetry: Add more operational data + Add both NDR and PDR telemetry capture + Speedup sockets + Adjust privileges Signed-off-by: Peter Mikus Change-Id: Ia6fd5d405e6fb410651d8b705c921653753aea10 --- resources/libraries/python/PapiExecutor.py | 16 ++- resources/libraries/python/VPPUtil.py | 16 ++- resources/libraries/python/VppCounters.py | 46 ++++--- .../robot/performance/performance_utils.robot | 139 +++++---------------- .../libraries/robot/shared/test_teardown.robot | 2 + resources/tools/presentation/input_data_parser.py | 3 +- 6 files changed, 84 insertions(+), 138 deletions(-) (limited to 'resources') diff --git a/resources/libraries/python/PapiExecutor.py b/resources/libraries/python/PapiExecutor.py index cbb3e28603..8308303b8a 100644 --- a/resources/libraries/python/PapiExecutor.py +++ b/resources/libraries/python/PapiExecutor.py @@ -89,7 +89,6 @@ class PapiSocketExecutor: The reconnection is logged at WARN level, so it is prominently shown in log.html, so we can see how frequently it happens. - TODO: Support sockets in NFs somehow. TODO: Support handling of retval!=0 without try/except in caller. Note: Use only with "with" statement, e.g.: @@ -222,6 +221,7 @@ class PapiSocketExecutor: :returns: self :rtype: PapiSocketExecutor """ + time_enter = time.time() # Parsing takes longer than connecting, prepare instance before tunnel. vpp_instance = self.vpp_instance node = self._node @@ -244,16 +244,11 @@ class PapiSocketExecutor: # Even if ssh can perhaps reuse this file, # we need to remove it for readiness detection to work correctly. run([u"rm", u"-rvf", self._local_vpp_socket]) - # On VIRL, the ssh user is not added to "vpp" group, - # so we need to change remote socket file access rights. - exec_cmd_no_error( - node, u"chmod o+rwx " + self._remote_vpp_socket, sudo=True - ) - # We use sleep command. The ssh command will exit in 10 second, + # We use sleep command. The ssh command will exit in 30 second, # unless a local socket connection is established, # in which case the ssh command will exit only when # the ssh connection is closed again (via control socket). - # The log level is to supress "Warning: Permanently added" messages. + # The log level is to suppress "Warning: Permanently added" messages. ssh_cmd = [ u"ssh", u"-S", ssh_socket, u"-M", u"-o", u"LogLevel=ERROR", u"-o", u"UserKnownHostsFile=/dev/null", @@ -261,7 +256,7 @@ class PapiSocketExecutor: u"-o", u"ExitOnForwardFailure=yes", u"-L", self._local_vpp_socket + u":" + self._remote_vpp_socket, u"-p", str(node[u"port"]), node[u"username"] + u"@" + node[u"host"], - u"sleep", u"10" + u"sleep", u"30" ] priv_key = node.get(u"priv_key") if priv_key: @@ -311,6 +306,9 @@ class PapiSocketExecutor: break else: raise RuntimeError(u"Failed to connect to VPP over a socket.") + logger.trace( + f"Establishing socket connection took {time.time()-time_enter}s" + ) return self def __exit__(self, exc_type, exc_val, exc_tb): diff --git a/resources/libraries/python/VPPUtil.py b/resources/libraries/python/VPPUtil.py index 7dabb4fc61..865775f995 100644 --- a/resources/libraries/python/VPPUtil.py +++ b/resources/libraries/python/VPPUtil.py @@ -116,6 +116,18 @@ class VPPUtil: cmd = u"command -v vpp" exec_cmd_no_error(node, cmd, message=u"VPP is not installed!") + @staticmethod + def adjust_privileges(node): + """Adjust privileges to control VPP without sudo. + + :param node: Topology node. + :type node: dict + """ + cmd = u"chmod -R o+rwx /run/vpp" + exec_cmd_no_error( + node, cmd, sudo=True, message=u"Failed to adjust privileges!", + retries=120) + @staticmethod def verify_vpp_started(node): """Verify that VPP is started on the specified topology node. @@ -137,7 +149,7 @@ class VPPUtil: @staticmethod def verify_vpp(node): """Verify that VPP is installed and started on the specified topology - node. + node. Adjust privileges so user can connect without sudo. :param node: Topology node. :type node: dict @@ -147,6 +159,8 @@ class VPPUtil: try: # Verify responsiveness of vppctl. VPPUtil.verify_vpp_started(node) + # Adjust privileges. + VPPUtil.adjust_privileges(node) # Verify responsiveness of PAPI. VPPUtil.show_log(node) VPPUtil.vpp_show_version(node) diff --git a/resources/libraries/python/VppCounters.py b/resources/libraries/python/VppCounters.py index e6bb51ef4e..411302ae58 100644 --- a/resources/libraries/python/VppCounters.py +++ b/resources/libraries/python/VppCounters.py @@ -114,7 +114,7 @@ class VppCounters: ) @staticmethod - def vpp_show_runtime_counters_on_all_duts(nodes): + def vpp_show_runtime_on_all_duts(nodes): """Clear VPP runtime counters on all DUTs. :param nodes: VPP nodes. @@ -125,8 +125,8 @@ class VppCounters: VppCounters.vpp_show_runtime(node) @staticmethod - def vpp_show_hardware_verbose(node): - """Run "show hardware-interfaces verbose" debug CLI command. + def vpp_show_interface(node): + """Run "show interface" debug CLI command. :param node: Node to run command on. :type node: dict @@ -148,6 +148,17 @@ class VppCounters: node, u"show memory verbose api-segment stats-segment main-heap" ) + @staticmethod + def vpp_show_memory_on_all_duts(nodes): + """Run "show memory" on all DUTs. + + :param nodes: VPP nodes. + :type nodes: dict + """ + for node in nodes.values(): + if node[u"type"] == NodeType.DUT: + VppCounters.vpp_show_memory(node) + @staticmethod def vpp_clear_runtime(node): """Run "clear runtime" CLI command. @@ -160,7 +171,7 @@ class VppCounters: ) @staticmethod - def vpp_clear_runtime_counters_on_all_duts(nodes): + def vpp_clear_runtime_on_all_duts(nodes): """Run "clear runtime" CLI command on all DUTs. :param nodes: VPP nodes. @@ -171,8 +182,8 @@ class VppCounters: VppCounters.vpp_clear_runtime(node) @staticmethod - def vpp_clear_hardware_counters(node): - """Run "clear hardware" CLI command. + def vpp_clear_interfaces(node): + """Run "clear interfaces" CLI command. :param node: Node to run command on. :type node: dict @@ -180,22 +191,22 @@ class VppCounters: :rtype: dict """ PapiSocketExecutor.run_cli_cmd_on_all_sockets( - node, u"clear hardware", log=False + node, u"clear interfaces", log=False ) @staticmethod - def vpp_clear_hardware_counters_on_all_duts(nodes): - """Clear hardware counters on all DUTs. + def vpp_clear_interfaces_on_all_duts(nodes): + """Clear interfaces on all DUTs. :param nodes: VPP nodes. :type nodes: dict """ for node in nodes.values(): if node[u"type"] == NodeType.DUT: - VppCounters.vpp_clear_hardware_counters(node) + VppCounters.vpp_clear_interfaces(node) @staticmethod - def vpp_clear_errors_counters(node): + def vpp_clear_errors(node): """Run "clear errors" CLI command. :param node: Node to run command on. @@ -206,7 +217,7 @@ class VppCounters: ) @staticmethod - def vpp_clear_error_counters_on_all_duts(nodes): + def vpp_clear_errors_on_all_duts(nodes): """Clear VPP errors counters on all DUTs. :param nodes: VPP nodes. @@ -214,7 +225,7 @@ class VppCounters: """ for node in nodes.values(): if node[u"type"] == NodeType.DUT: - VppCounters.vpp_clear_errors_counters(node) + VppCounters.vpp_clear_errors(node) @staticmethod def show_vpp_statistics(node): @@ -224,9 +235,7 @@ class VppCounters: :type node: dict """ VppCounters.vpp_show_errors(node) - VppCounters.vpp_show_hardware_verbose(node) - VppCounters.vpp_show_runtime(node) - VppCounters.vpp_show_memory(node) + VppCounters.vpp_show_interface(node) @staticmethod def show_statistics_on_all_duts(nodes): @@ -246,9 +255,8 @@ class VppCounters: :param node: VPP node. :type node: dict """ - VppCounters.vpp_clear_errors_counters(node) - VppCounters.vpp_clear_hardware_counters(node) - VppCounters.vpp_clear_runtime(node) + VppCounters.vpp_clear_errors(node) + VppCounters.vpp_clear_interfaces(node) @staticmethod def clear_statistics_on_all_duts(nodes): diff --git a/resources/libraries/robot/performance/performance_utils.robot b/resources/libraries/robot/performance/performance_utils.robot index eac7fe0075..f5e5913fa3 100644 --- a/resources/libraries/robot/performance/performance_utils.robot +++ b/resources/libraries/robot/performance/performance_utils.robot @@ -15,10 +15,7 @@ | Library | Collections | Library | resources.libraries.python.topology.Topology | Library | resources.libraries.python.NodePath -| Library | resources.libraries.python.DpdkUtil | Library | resources.libraries.python.InterfaceUtil -| Library | resources.libraries.python.KubernetesUtils -| Library | resources.libraries.python.VhostUser | Library | resources.libraries.python.TrafficGenerator | Library | resources.libraries.python.TrafficGenerator.OptimizedSearch | Library | resources.libraries.python.TrafficGenerator.TGDropRateSearchImpl @@ -39,11 +36,6 @@ | | ... | reported result contains aggregate rates. | | ... | Currently, the min_rate value is hardcoded to match test teardowns. | | -| | ... | TODO: Should the trial duration of the additional -| | ... | measurements be configurable? -| | -| | ... | Some inputs are read from variables to streamline suites. -| | | | ... | *Test (or broader scope) variables read:* | | ... | - traffic_profile - Name of module defining traffc for measurements. | | ... | Type: string @@ -84,27 +76,16 @@ | | Check NDRPDR interval validity | ${result.pdr_interval} | | ... | ${packet_loss_ratio} | | Check NDRPDR interval validity | ${result.ndr_interval} -| | Perform additional measurements based on NDRPDR result -| | ... | ${result} | ${frame_size} | ${traffic_profile} - -| Display Reconfig Test Message -| | [Documentation] -| | ... | Display the number of packets lost (bidirectionally) -| | ... | due to reconfiguration under traffic. -| | -| | ... | *Arguments:* -| | ... | - result - Result of bidirectional measurtement. -| | ... | Type: ReceiveRateMeasurement -| | -| | ... | *Example:* -| | -| | ... | \| Display Reconfig Test Message \| \${result} \| -| | -| | [Arguments] | ${result} -| | -| | Set Test Message | Packets lost due to reconfig: ${result.loss_count} -| | ${time_lost} = | Evaluate | ${result.loss_count} / ${result.target_tr} -| | Set Test Message | ${\n}Implied time lost: ${time_lost} | append=yes +| | ${rate_sum}= | Set Variable | ${result.ndr_interval.measured_low.target_tr} +| | ${rate_per_stream}= | Evaluate | ${rate_sum} / float(${traffic_directions}) +| | Send traffic at specified rate +| | ... | ${2.0} | ${rate_per_stream}pps | ${framesize} | ${traffic_profile} +| | ... | traffic_directions=${traffic_directions} +| | ${rate_sum}= | Set Variable | ${result.pdr_interval.measured_low.target_tr} +| | ${rate_per_stream}= | Evaluate | ${rate_sum} / float(${traffic_directions}) +| | Send traffic at specified rate +| | ... | ${2.0} | ${rate_per_stream}pps | ${framesize} | ${traffic_profile} +| | ... | traffic_directions=${traffic_directions} | Find Throughput Using MLRsearch | | [Documentation] @@ -113,11 +94,6 @@ | | ... | Input rates are understood as uni-directional. | | ... | Currently, the min_rate value is hardcoded to match test teardowns. | | -| | ... | TODO: Should the trial duration of the additional -| | ... | measurements be configurable? -| | -| | ... | Some inputs are read from variables to streamline suites. -| | | | ... | *Test (or broader scope) variables read:* | | ... | - traffic_profile - Name of module defining traffc for measurements. | | ... | Type: string @@ -168,7 +144,6 @@ | | ... | Input rates are understood as uni-directional, | | ... | reported result contains aggregate rates. | | ... | Currently, the min_rate value is hardcoded to match test teardowns. -| | ... | Some inputs are read from variables to streamline suites. | | | | ... | *Test (or broader scope) variables read:* | | ... | - traffic_profile - Name of module defining traffc for measurements. @@ -232,6 +207,25 @@ | | Set Test Message | ${\n}LATENCY [min/avg/max/hdrh] per stream: ${latency} | | ... | append=yes +| Display Reconfig Test Message +| | [Documentation] +| | ... | Display the number of packets lost (bidirectionally) +| | ... | due to reconfiguration under traffic. +| | ... +| | ... | *Arguments:* +| | ... | - result - Result of bidirectional measurtement. +| | ... | Type: ReceiveRateMeasurement +| | ... +| | ... | *Example:* +| | ... +| | ... | \| Display Reconfig Test Message \| \${result} \| +| | ... +| | [Arguments] | ${result} +| | ... +| | Set Test Message | Packets lost due to reconfig: ${result.loss_count} +| | ${time_lost} = | Evaluate | ${result.loss_count} / ${result.target_tr} +| | Set Test Message | ${\n}Implied time lost: ${time_lost} | append=yes + | Display result of NDRPDR search | | [Documentation] | | ... | Display result of NDR+PDR search, both quantities, both bounds, @@ -275,8 +269,6 @@ | | ... | Sum of measured rates over streams | | ... | Bandwidth is calculated as: | | ... | (Throughput * (L2 Frame Size + IPG) * 8) -| | ... | TODO: Do we want to report some latency data, -| | ... | even if not measured at the reported bounds?. | | | | ... | *Test (or broader scope) variables read:* | | ... | - frame_size - L2 Frame Size [B] or IMIX string. Type: int or str @@ -330,69 +322,10 @@ | | ... | ${message}${\n}${message_zero} | ${message}${\n}${message_other} | | Fail | ${message} -| Perform additional measurements based on NDRPDR result -| | [Documentation] -| | ... | Perform any additional measurements which are not directly needed -| | ... | for determining NDR nor PDR, but which are needed for gathering -| | ... | additional data for debug purposes. -| | ... | Currently, just "Traffic should pass with no loss" is called. -| | ... | TODO: Move latency measurements from optimized search here. -| | -| | ... | *Arguments:* -| | ... | - result - Measured result data per stream [pps]. Type: NdrPdrResult -| | ... | - frame_size - L2 Frame Size [B] or IMIX string. Type: int or str -| | ... | - traffic_profile - Topology profile. Type: string -| | ... | - traffic_directions - Bi- (2) or uni- (1) directional traffic. -| | ... | Type: int -| | -| | ... | *Example:* -| | ... | \| Perform additional measurements based on NDRPDR result \ -| | ... | \| \${result} \| \${64} \| 3-node-IPv4 \| \${2} \| -| | -| | [Arguments] | ${result} | ${framesize} | ${traffic_profile} -| | ... | ${traffic_directions}=${2} -| | -| | ${duration}= | Set Variable | ${2.0} -| | ${rate_sum}= | Set Variable | ${result.ndr_interval.measured_low.target_tr} -| | ${rate_per_stream}= | Evaluate | ${rate_sum} / float(${traffic_directions}) -| | Traffic should pass with no loss | ${duration} | ${rate_per_stream}pps -| | ... | ${framesize} | ${traffic_profile} | fail_on_loss=${False} -| | ... | traffic_directions=${traffic_directions} - -| Traffic should pass with no loss -| | [Documentation] -| | ... | Send traffic at specified rate. No packet loss is accepted at loss -| | ... | evaluation. -| | -| | ... | *Arguments:* -| | ... | - duration - Duration of traffic run [s]. Type: integer -| | ... | - rate - Rate for sending packets. Type: string -| | ... | - frame_size - L2 Frame Size [B] or IMIX_v4_1. Type: integer/string -| | ... | - traffic_profile - Name of module defining traffc for measurements. -| | ... | Type: string -| | ... | - fail_on_loss - If True, the keyword fails if loss occurred. -| | ... | Type: boolean -| | ... | - traffic_directions - Bi- (2) or uni- (1) directional traffic. -| | ... | Type: int -| | -| | ... | *Example:* -| | -| | ... | \| Traffic should pass with no loss \| \${10} \| 4.0mpps \| \${64} \ -| | ... | \| 3-node-IPv4 \| \${2} \| -| | -| | [Arguments] | ${duration} | ${rate} | ${frame_size} | ${traffic_profile} -| | ... | ${fail_on_loss}=${True} | ${traffic_directions}=${2} -| | -| | Send traffic at specified rate | ${duration} | ${rate} | ${frame_size} -| | ... | ${traffic_profile} | traffic_directions=${traffic_directions} -| | Run Keyword If | ${fail_on_loss} | No traffic loss occurred - | Traffic should pass with maximum rate | | [Documentation] | | ... | Send traffic at maximum rate. | | -| | ... | Some inputs are read from variables to streamline suites. -| | | | ... | *Test (or broader scope) variables read:* | | ... | - traffic_profile - Name of module defining traffc for measurements. | | ... | Type: string @@ -424,8 +357,6 @@ | | Set Test Message | ${\n}Maximum Receive Rate trial results | | Set Test Message | in packets per second: ${results} | | ... | append=yes -| | # TODO: Should we also report the percentage relative to transmit rate, -| | # so that people looking at console can decide how close to 100% it is? | | Run Keyword If | ${fail_no_traffic} | Fail if no traffic forwarded | Send traffic at specified rate @@ -516,21 +447,17 @@ | | ... | traffic_directions=${traffic_directions} | tx_port=${tx_port} | | ... | rx_port=${rx_port} | | Run Keyword If | ${dut_stats}==${True} -| | ... | VPP clear runtime counters on all DUTs | ${nodes} +| | ... | VPP clear runtime on all DUTs | ${nodes} | | Sleep | ${duration} | | Run Keyword If | ${dut_stats}==${True} -| | ... | VPP show runtime counters on all DUTs | ${nodes} +| | ... | VPP show runtime on all DUTs | ${nodes} | | Stop traffic on tg | Start Traffic on Background | | [Documentation] | | ... | Start traffic at specified rate then return control to Robot. -| | | | ... | This keyword is useful if the test needs to do something | | ... | while traffic is running. -| | ... | Just a wrapper around L1 keyword. -| | ... | -| | ... | TODO: How to make sure the traffic is stopped on any failure? | | | | ... | *Test (or broader scope) variables read:* | | ... | - traffic_profile - Name of module defining traffc for measurements. @@ -562,10 +489,6 @@ | | ... | Stop the running traffic, return measurement result. | | ... | For bidirectional traffic, the reported values are bi-directional. | | -| | ... | Just a wrapper around L1 keyword. -| | ... | -| | ... | TODO: Tolerate if traffic was not started. -| | | | ... | *Returns:* | | ... | - Measurement result. Type: ReceiveRateMeasurement | | diff --git a/resources/libraries/robot/shared/test_teardown.robot b/resources/libraries/robot/shared/test_teardown.robot index 96d3cd825b..fe0dda2f19 100644 --- a/resources/libraries/robot/shared/test_teardown.robot +++ b/resources/libraries/robot/shared/test_teardown.robot @@ -37,6 +37,8 @@ | | ... | Get Core Files on All Nodes | ${nodes} | | Run Keyword If Test Failed | | ... | Verify VPP PID in Teardown +| | Run Keyword If Test Failed +| | ... | VPP Show Memory On All DUTs | ${nodes} | | FOR | ${action} | IN | @{actions} | | | Run Keyword | Additional Test Tear Down Action For ${action} | | END diff --git a/resources/tools/presentation/input_data_parser.py b/resources/tools/presentation/input_data_parser.py index 2f126f4c8b..aabb0c5d1b 100644 --- a/resources/tools/presentation/input_data_parser.py +++ b/resources/tools/presentation/input_data_parser.py @@ -922,7 +922,8 @@ class ExecutionChecker(ResultVisitor): :type test_kw: Keyword :returns: Nothing. """ - if test_kw.name.count(u"Show Runtime Counters On All Duts"): + if test_kw.name.count(u"Show Runtime On All Duts") or + test_kw.name.count(u"Show Runtime Counters On All Duts"): self._lookup_kw_nr += 1 self._show_run_lookup_nr = 0 self._msg_type = u"test-show-runtime" -- cgit 1.2.3-korg