diff options
author | Peter Mikus <pmikus@cisco.com> | 2019-02-06 16:15:58 +0000 |
---|---|---|
committer | Peter Mikus <pmikus@cisco.com> | 2019-02-08 14:55:39 +0000 |
commit | 390cb24b3945442fba9f84dc292b6a4138d4835e (patch) | |
tree | 406343768fd1e82097cd880c2752026e32b129c8 /resources/libraries | |
parent | 7348bd7bf8eee50dbc2316ee9bae3ad0403522e9 (diff) |
CSIT-845 Capture VPP core-dump from vpp crash on DUTs
Change-Id: I987dcd5092d8527c9aefbe093e1ed7ae144d191b
Signed-off-by: Peter Mikus <pmikus@cisco.com>
Diffstat (limited to 'resources/libraries')
-rw-r--r-- | resources/libraries/python/CoreDumpUtil.py | 159 | ||||
-rw-r--r-- | resources/libraries/python/LimitUtil.py | 60 | ||||
-rw-r--r-- | resources/libraries/python/SysctlUtil.py | 57 | ||||
-rw-r--r-- | resources/libraries/python/constants.py | 8 | ||||
-rw-r--r-- | resources/libraries/python/ssh.py | 32 | ||||
-rw-r--r-- | resources/libraries/robot/performance/performance_setup.robot | 104 | ||||
-rw-r--r-- | resources/libraries/robot/shared/default.robot | 2 |
7 files changed, 336 insertions, 86 deletions
diff --git a/resources/libraries/python/CoreDumpUtil.py b/resources/libraries/python/CoreDumpUtil.py new file mode 100644 index 0000000000..7843a59c09 --- /dev/null +++ b/resources/libraries/python/CoreDumpUtil.py @@ -0,0 +1,159 @@ +# Copyright (c) 2019 Cisco and/or its affiliates. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Core dump library.""" + +from time import time + +from resources.libraries.python.constants import Constants +from resources.libraries.python.DUTSetup import DUTSetup +from resources.libraries.python.LimitUtil import LimitUtil +from resources.libraries.python.SysctlUtil import SysctlUtil +from resources.libraries.python.ssh import exec_cmd_no_error, scp_node +from resources.libraries.python.topology import NodeType + +__all__ = ["CoreDumpUtil"] + + +class CoreDumpUtil(object): + """Class contains methods for processing core dumps.""" + + # Use one instance of class for all tests. If the functionality should + # be enabled per suite or per test case, change the scope to "TEST SUITE" or + # "TEST CASE" respectively. + ROBOT_LIBRARY_SCOPE = 'GLOBAL' + + def __init__(self): + """Initialize CoreDumpUtil class.""" + # Corekeeper is configured. + self._corekeeper_configured = False + # Enable setting core limit for process. This can be used to prevent + # library to further set the core limit for unwanted behavior. + self._core_limit_enabled = True + + def set_core_limit_enabled(self): + """Enable setting of core limit for PID.""" + self._core_limit_enabled = True + + def set_core_limit_disabled(self): + """Disable setting of core limit for PID.""" + self._core_limit_enabled = False + + def is_core_limit_enabled(self): + """Check if core limit is set for process. + + :returns: True if core limit is set for process. + :rtype: bool + """ + return self._corekeeper_configured and self._core_limit_enabled + + def setup_corekeeper_on_all_nodes(self, nodes): + """Setup core dumps system wide on all nodes. + + :param nodes: Nodes in the topology. + :type nodes: dict + """ + for node in nodes.values(): + # Any binary which normally would not be dumped is dumped anyway, + # but only if the "core_pattern" kernel sysctl is set to either a + # pipe handler or a fully qualified path. (For more details on this + # limitation, see CVE-2006-2451.) This mode is appropriate when + # administrators are attempting to debug problems in a normal + # environment, and either have a core dump pipe handler that knows + # to treat privileged core dumps with care, or specific directory + # defined for catching core dumps. If a core dump happens without a + # pipe handler or fully qualifid path, a message will be emitted to + # syslog warning about the lack of a correct setting. + SysctlUtil.set_sysctl_value(node, 'fs.suid_dumpable', 2) + + # Specify a core dumpfile pattern name (for the output filename). + # %p pid + # %u uid (in initial user namespace) + # %g gid (in initial user namespace) + # %s signal number + # %t UNIX time of dump + # %h hostname + # %e executable filename (may be shortened) + SysctlUtil.set_sysctl_value(node, 'kernel.core_pattern', + Constants.KERNEL_CORE_PATTERN) + + self._corekeeper_configured = True + + @staticmethod + def enable_coredump_limit(node, pid): + """Enable coredump for PID(s) by setting no core limits. + + :param node: Node in the topology. + :param pid: Process ID(s) to set core dump limit to unlimited. + :type node: dict + :type pid: list or int + """ + if isinstance(pid, list): + for item in pid: + LimitUtil.set_pid_limit(node, item, 'core', 'unlimited') + LimitUtil.get_pid_limit(node, item) + else: + LimitUtil.set_pid_limit(node, pid, 'core', 'unlimited') + LimitUtil.get_pid_limit(node, pid) + + def enable_coredump_limit_vpp_on_all_duts(self, nodes): + """Enable coredump for all VPP PIDs by setting no core limits on all + DUTs if setting of core limit by this library is enabled. + + :param nodes: Nodes in the topology. + :type nodes: dict + """ + for node in nodes.values(): + if node['type'] == NodeType.DUT and self.is_core_limit_enabled(): + vpp_pid = DUTSetup.get_vpp_pid(node) + self.enable_coredump_limit(node, vpp_pid) + + def get_core_files_on_all_nodes(self, nodes, disable_on_success=True): + """Compress all core files into single file and remove the original + core files on all nodes. + + :param nodes: Nodes in the topology. + :param disable_on_success: If True, disable setting of core limit by + this instance of library. Default: True + :type nodes: dict + :type disable_on_success: bool + """ + for node in nodes.values(): + uuid = str(time()).replace('.', '') + name = '{uuid}.tar.lzo.lrz.xz'.format(uuid=uuid) + + command = ('[ -e {dir}/*.core ] && sudo tar c {dir}/*.core | ' + 'lzop -1 | ' + 'lrzip -n -T -p 1 -w 5 | ' + 'xz -9e > {dir}/{name} && ' + 'sudo rm -f {dir}/*.core' + .format(dir=Constants.CORE_DUMP_DIR, name=name)) + try: + exec_cmd_no_error(node, command, timeout=3600) + if disable_on_success: + self.set_core_limit_disabled() + except RuntimeError: + # If compress was not sucessfull ignore error and skip further + # processing. + continue + + local_path = 'archive/{name}'.format(name=name) + remote_path = '{dir}/{name}'.format(dir=Constants.CORE_DUMP_DIR, + name=name) + try: + scp_node(node, local_path, remote_path, get=True, timeout=3600) + command = 'rm -f {dir}/{name}'\ + .format(dir=Constants.CORE_DUMP_DIR, name=name) + exec_cmd_no_error(node, command, sudo=True) + except RuntimeError: + pass diff --git a/resources/libraries/python/LimitUtil.py b/resources/libraries/python/LimitUtil.py new file mode 100644 index 0000000000..45aeaff5a0 --- /dev/null +++ b/resources/libraries/python/LimitUtil.py @@ -0,0 +1,60 @@ +# Copyright (c) 2019 Cisco and/or its affiliates. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Linux limit library.""" + +from resources.libraries.python.ssh import exec_cmd_no_error + +__all__ = ["LimitUtil"] + + +class LimitUtil(object): + """Class contains methods for getting or setting process resource limits.""" + + @staticmethod + def get_pid_limit(node, pid): + """Get process resource limits. + + :param node: Node in the topology. + :param pid: Process ID. + :type node: dict + :type pid: int + """ + command = 'prlimit --noheadings --pid={pid}'.format(pid=pid) + + message = 'Node {host} failed to run: {command}'.\ + format(host=node['host'], command=command) + + exec_cmd_no_error(node, command, sudo=True, message=message) + + @staticmethod + def set_pid_limit(node, pid, resource, limit): + """Set process resource limits. + + :param node: Node in the topology. + :param pid: Process ID. + :param resource: Resource to set limits. + :param limit: Limit value. + :type node: dict + :type pid: int + :type resource: str + :type limit: str + """ + command = 'prlimit --{resource}={limit} --pid={pid}'.format( + resource=resource, limit=limit, pid=pid) + + message = 'Node {host} failed to run: {command}'.\ + format(host=node['host'], command=command) + + exec_cmd_no_error(node, command, sudo=True, message=message) + diff --git a/resources/libraries/python/SysctlUtil.py b/resources/libraries/python/SysctlUtil.py new file mode 100644 index 0000000000..0db7e2c5e5 --- /dev/null +++ b/resources/libraries/python/SysctlUtil.py @@ -0,0 +1,57 @@ +# Copyright (c) 2019 Cisco and/or its affiliates. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Linux sysctl library.""" + +from resources.libraries.python.ssh import exec_cmd_no_error + +__all__ = ["SysctlUtil"] + + +class SysctlUtil(object): + """Class contains methods for getting or setting sysctl settings.""" + + @staticmethod + def get_sysctl_value(node, key): + """Get sysctl key. + + :param node: Node in the topology. + :param key: Key that will be set. + :type node: dict + :type key: str + """ + command = 'sysctl {key}'.format(key=key) + + message = 'Node {host} failed to run: {command}'.\ + format(host=node['host'], command=command) + + exec_cmd_no_error(node, command, sudo=True, message=message) + + @staticmethod + def set_sysctl_value(node, key, value): + """Set sysctl key to specific value. + + :param node: Node in the topology. + :param key: Key that will be set. + :param value: Value to set. + :type node: dict + :type key: str + :type value: str + """ + command = 'sysctl -w {key}={value}'.format(key=key, value=value) + + message = 'Node {host} failed to run: {command}'.\ + format(host=node['host'], command=command) + + exec_cmd_no_error(node, command, sudo=True, message=message) + diff --git a/resources/libraries/python/constants.py b/resources/libraries/python/constants.py index 43fbf1a76d..b4a96694b1 100644 --- a/resources/libraries/python/constants.py +++ b/resources/libraries/python/constants.py @@ -1,4 +1,4 @@ -# Copyright (c) 2018 Cisco and/or its affiliates. +# Copyright (c) 2019 Cisco and/or its affiliates. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at: @@ -64,3 +64,9 @@ class Constants(object): # ODL Client Restconf listener port ODL_PORT = 8181 + + # Sysctl kernel.core_pattern + KERNEL_CORE_PATTERN = '/tmp/%p-%u-%g-%s-%t-%h-%e.core' + + # Core dump directory + CORE_DUMP_DIR = '/tmp' diff --git a/resources/libraries/python/ssh.py b/resources/libraries/python/ssh.py index a23d163768..60f62561be 100644 --- a/resources/libraries/python/ssh.py +++ b/resources/libraries/python/ssh.py @@ -22,7 +22,7 @@ from time import time, sleep from paramiko import RSAKey, SSHClient, AutoAddPolicy from paramiko.ssh_exception import SSHException, NoValidConnectionsError from robot.api import logger -from scp import SCPClient +from scp import SCPClient, SCPException __all__ = ["exec_cmd", "exec_cmd_no_error"] @@ -467,3 +467,33 @@ def exec_cmd_no_error(node, cmd, timeout=600, sudo=False, message=None): raise RuntimeError(msg) return stdout, stderr + +def scp_node(node, local_path, remote_path, get=False, timeout=30): + """Copy files from local_path to remote_path or vice versa. + + :param node: SUT node. + :param local_path: Path to local file that should be uploaded; or + path where to save remote file. + :param remote_path: Remote path where to place uploaded file; or + path to remote file which should be downloaded. + :param get: scp operation to perform. Default is put. + :param timeout: Timeout value in seconds. + :type node: dict + :type local_path: str + :type remote_path: str + :type get: bool + :type timeout: int + :raises RuntimeError: If SSH connection failed or SCP transfer failed. + """ + ssh = SSH() + + try: + ssh.connect(node) + except SSHException: + raise RuntimeError('Failed to connect to {host}!' + .format(host=node['host'])) + try: + ssh.scp(local_path, remote_path, get, timeout) + except SCPException: + raise RuntimeError('SCP execution failed on {host}!' + .format(host=node['host'])) diff --git a/resources/libraries/robot/performance/performance_setup.robot b/resources/libraries/robot/performance/performance_setup.robot index ed6f4744db..1264d440ed 100644 --- a/resources/libraries/robot/performance/performance_setup.robot +++ b/resources/libraries/robot/performance/performance_setup.robot @@ -1,4 +1,4 @@ -# Copyright (c) 2018 Cisco and/or its affiliates. +# Copyright (c) 2019 Cisco and/or its affiliates. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at: @@ -666,9 +666,16 @@ # Tests teardowns +| Tear down performance test +| | [Documentation] | Common test teardown for performance tests. +| | ... +| | Remove All Added Ports On All DUTs From Topology | ${nodes} +| | Show VAT History On All DUTs | ${nodes} +| | Get Core Files on All Nodes | ${nodes} +| | Set Test Variable | ${pkt_trace} | ${True} + | Tear down performance discovery test -| | [Documentation] | Common test teardown for ndrdisc and pdrdisc performance \ -| | ... | tests. +| | [Documentation] | Common test teardown for ndrpdr performance tests. | | ... | | ... | *Arguments:* | | ... | - rate - Rate for sending packets. Type: string @@ -682,10 +689,8 @@ | | ... | | [Arguments] | ${rate} | ${framesize} | ${topology_type} | | ... -| | Remove All Added Ports On All DUTs From Topology | ${nodes} -| | Show VAT History On All DUTs | ${nodes} -| | Run Keyword If Test Failed -| | ... | Set Test Variable | ${pkt_trace} | ${True} +| | Tear down performance test +| | Show statistics on all DUTs | ${nodes} | | Run Keyword If Test Failed | | ... | Traffic should pass with no loss | ${perf_trial_duration} | ${rate} | | ... | ${framesize} | ${topology_type} | fail_on_loss=${False} @@ -694,8 +699,7 @@ | | [Documentation] | Common test teardown for max-received-rate performance | | ... | tests. | | ... -| | Remove All Added Ports On All DUTs From Topology | ${nodes} -| | Show VAT History On All DUTs | ${nodes} +| | Tear down performance test | Tear down performance test with wrk | | [Documentation] | Common test teardown for ndrdisc and pdrdisc performance \ @@ -705,9 +709,7 @@ | | ... | | ... | \| Tear down performance test with wrk \| | | ... -| | Remove All Added Ports On All DUTs From Topology | ${nodes} -| | Show VAT History On All DUTs | ${nodes} -| | Show statistics on all DUTs | ${nodes} +| | Tear down performance test | Tear down performance test with container | | [Documentation] @@ -739,19 +741,15 @@ | | ... | ${dut1_node}=${None} | ${dut1_vm_refs}=${None} | | ... | ${dut2_node}=${None} | ${dut2_vm_refs}=${None} | | ... -| | Remove All Added Ports On All DUTs From Topology | ${nodes} -| | Show VAT History On All DUTs | ${nodes} +| | Tear down performance discovery test | ${rate} | ${framesize} +| | ... | ${topology_type} | | Show VPP vhost on all DUTs | ${nodes} -| | Show statistics on all DUTs | ${nodes} -| | Run Keyword If Test Failed -| | ... | Traffic should pass with no loss | ${perf_trial_duration} | ${rate} -| | ... | ${framesize} | ${topology_type} | fail_on_loss=${False} | | Run keyword unless | ${dut1_node}==${None} | | ... | Tear down guest VM with dpdk-testpmd | ${dut1} | ${dut1_vm_refs} | | Run keyword unless | ${dut2_node}==${None} | | ... | Tear down guest VM with dpdk-testpmd | ${dut2} | ${dut2_vm_refs} -| Tear down mrr test with vhost and VM with dpdk-testpmd +| Tear down performance mrr test with vhost and VM with dpdk-testpmd | | [Documentation] | Common test teardown for mrr tests which use | | ... | vhost(s) and VM(s) with dpdk-testpmd. | | ... @@ -770,10 +768,8 @@ | | [Arguments] | ${dut1_node}=${None} | ${dut1_vm_refs}=${None} | | ... | ${dut2_node}=${None} | ${dut2_vm_refs}=${None} | | ... -| | Remove All Added Ports On All DUTs From Topology | ${nodes} -| | Show VAT History On All DUTs | ${nodes} +| | Tear down performance mrr test | | Show VPP vhost on all DUTs | ${nodes} -| | Show statistics on all DUTs | ${nodes} | | Run keyword unless | ${dut1_node}==${None} | | ... | Tear down guest VM with dpdk-testpmd | ${dut1} | ${dut1_vm_refs} | | Run keyword unless | ${dut2_node}==${None} @@ -817,72 +813,13 @@ | | [Arguments] | ${dut1_node}=${None} | ${dut1_vm_refs}=${None} | | ... | ${dut2_node}=${None} | ${dut2_vm_refs}=${None} | | ... -| | Tear down mrr test with vhost and VM with dpdk-testpmd +| | Tear down performance mrr test with vhost and VM with dpdk-testpmd | | ... | ${dut1_node} | ${dut1_vm_refs} | | ... | ${dut2_node} | ${dut2_vm_refs} | | Run Keyword If Test Failed | Vpp Log Plugin Acl Settings | ${dut1} | | Run Keyword If Test Failed | Run Keyword And Ignore Error | | ... | Vpp Log Plugin Acl Interface Assignment | ${dut1} -| Tear down performance pdrchk test with vhost and VM with dpdk-testpmd -| | [Documentation] | Common test teardown for performance pdrchk tests which \ -| | ... | use vhost(s) and VM(s) with dpdk-testpmd. -| | ... -| | ... | *Arguments:* -| | ... | - rate - Rate for sending packets. Type: string -| | ... | - framesize - L2 Frame Size [B]. Type: integer -| | ... | - topology_type - Topology type. Type: string -| | ... | - dut1_node - Node where to clean qemu. Type: dictionary -| | ... | - dut1_vm_refs - VM references on node. Type: dictionary -| | ... | - dut2_node - Node where to clean qemu. Type: dictionary -| | ... | - dut2_vm_refs - VM references on node. Type: dictionary -| | ... -| | ... | *Example:* -| | ... -| | ... | \| Tear down performance pdrchk test with vhost and VM with \ -| | ... | dpdk-testpmd \| 4.0mpps \| 64 \| 3-node-IPv4 \| ${node['DUT1']} \ -| | ... | \| ${dut_vm_refs} \| ${node['DUT2']} \| ${dut_vm_refs} \| -| | ... -| | [Arguments] | ${rate} | ${framesize} | ${topology_type} -| | ... | ${dut1_node}=${None} | ${dut1_vm_refs}=${None} -| | ... | ${dut2_node}=${None} | ${dut2_vm_refs}=${None} -| | ... -| | Remove All Added Ports On All DUTs From Topology | ${nodes} -| | Show VAT History On All DUTs | ${nodes} -| | Show VPP vhost on all DUTs | ${nodes} -| | Show statistics on all DUTs | ${nodes} -| | Run keyword unless | ${dut1_node}==${None} -| | ... | Tear down guest VM with dpdk-testpmd | ${dut1} | ${dut1_vm_refs} -| | Run keyword unless | ${dut2_node}==${None} -| | ... | Tear down guest VM with dpdk-testpmd | ${dut2} | ${dut2_vm_refs} - -| Tear down performance mrr test with vhost and VM with dpdk-testpmd -| | [Documentation] | Common test teardown for performance mrr tests which \ -| | ... | use vhost(s) and VM(s) with dpdk-testpmd. -| | ... -| | ... | *Arguments:* -| | ... | - dut1_node - Node where to clean qemu. Type: dictionary -| | ... | - dut1_vm_refs - VM references on node. Type: dictionary -| | ... | - dut2_node - Node where to clean qemu. Type: dictionary -| | ... | - dut2_vm_refs - VM references on node. Type: dictionary -| | ... -| | ... | *Example:* -| | ... -| | ... | \| Tear down performance mrr test with vhost and VM with \ -| | ... | dpdk-testpmd \| ${node['DUT1']} \| ${dut_vm_refs} \| ${node['DUT2']} \ -| | ... | \| ${dut_vm_refs} \| -| | ... -| | [Arguments] | ${dut1_node}=${None} | ${dut1_vm_refs}=${None} -| | ... | ${dut2_node}=${None} | ${dut2_vm_refs}=${None} -| | ... -| | Remove All Added Ports On All DUTs From Topology | ${nodes} -| | Show VAT History On All DUTs | ${nodes} -| | Show VPP vhost on all DUTs | ${nodes} -| | Run keyword unless | ${dut1_node}==${None} -| | ... | Tear down guest VM with dpdk-testpmd | ${dut1} | ${dut1_vm_refs} -| | Run keyword unless | ${dut2_node}==${None} -| | ... | Tear down guest VM with dpdk-testpmd | ${dut2} | ${dut2_vm_refs} - | Tear down DPDK 2-node performance topology | | [Documentation] | | ... | Suite teardown phase with traffic generator teardown. @@ -1044,8 +981,7 @@ | | ... | | ... | \| Tear down mrr test with SRv6 with encapsulation \| | | ... -| | Remove All Added Ports On All DUTs From Topology | ${nodes} -| | Show VAT History On All DUTs | ${nodes} +| | Tear down performance mrr test | | Run Keyword If Test Failed | Show SR Policies on all DUTs | ${nodes} | | Run Keyword If Test Failed | | ... | Show SR Steering Policies on all DUTs | ${nodes} diff --git a/resources/libraries/robot/shared/default.robot b/resources/libraries/robot/shared/default.robot index 981c329f63..7fdb14ed58 100644 --- a/resources/libraries/robot/shared/default.robot +++ b/resources/libraries/robot/shared/default.robot @@ -19,6 +19,7 @@ | Library | OperatingSystem | Library | String | ... +| Library | resources.libraries.python.CoreDumpUtil | Library | resources.libraries.python.CpuUtils | Library | resources.libraries.python.DUTSetup | Library | resources.libraries.python.L2Util @@ -431,6 +432,7 @@ | | ${duts}= | Get Matches | ${nodes} | DUT* | | :FOR | ${dut} | IN | @{duts} | | | Run keyword | ${dut}.Apply Config | restart_vpp=${restart_vpp} +| | Enable Coredump Limit VPP on All DUTs | ${nodes} | | Update All Interface Data On All Nodes | ${nodes} | skip_tg=${True} | Save VPP PIDs |