diff options
author | selias <samelias@cisco.com> | 2017-09-20 15:47:39 +0200 |
---|---|---|
committer | Peter Mikus <pmikus@cisco.com> | 2017-09-28 09:51:37 +0000 |
commit | 0513ce5642dcf58b21f9b77d6b50e4e9a7a94f04 (patch) | |
tree | 440b531fe1a999a88d42a7c47341327a8e02ef3d /resources/libraries/python/honeycomb/HoneycombSetup.py | |
parent | 9ca5e946c7ea4b5740c257c71fe6d76eaa4206b4 (diff) |
HC Test: Fix intermittent failures of HC startup
- rework "check Honeycomb startup state" keyword
- increase timeout value for startup
- add separators to Honeycomb log file between test suites
- other minor fixes:
- remove usused import from NAT suite
- fix loop in ICMPv6 ND proxy traffic script
- add teardown to LISP cases
- update prerequisites for sub-interface SPAN test
Change-Id: I2f714ffc1d2e8435d3abe690d8bd15099e071c9a
Signed-off-by: selias <samelias@cisco.com>
Diffstat (limited to 'resources/libraries/python/honeycomb/HoneycombSetup.py')
-rw-r--r-- | resources/libraries/python/honeycomb/HoneycombSetup.py | 175 |
1 files changed, 87 insertions, 88 deletions
diff --git a/resources/libraries/python/honeycomb/HoneycombSetup.py b/resources/libraries/python/honeycomb/HoneycombSetup.py index aa6f26d856..9ac217909e 100644 --- a/resources/libraries/python/honeycomb/HoneycombSetup.py +++ b/resources/libraries/python/honeycomb/HoneycombSetup.py @@ -14,6 +14,7 @@ """Implementation of keywords for Honeycomb setup.""" from json import loads +from time import time, sleep from ipaddress import IPv6Address, AddressValueError @@ -136,106 +137,104 @@ class HoneycombSetup(object): node['host'])) @staticmethod - def check_honeycomb_startup_state(*nodes): - """Check state of Honeycomb service during startup on specified nodes. + def check_honeycomb_startup_state(node, timeout=360, retries=20, + interval=15): + """Repeatedly check the status of Honeycomb startup until it is fully + started or until timeout or max retries is reached. - Reads html path from template file oper_vpp_version.url. - - Honeycomb nodes reply with connection refused or the following status - codes depending on startup progress: codes 200, 401, 403, 404, 500, 503 - - :param nodes: List of DUT nodes starting Honeycomb. - :type nodes: list - :return: True if all GETs returned code 200(OK). - :rtype bool - """ - path = HcUtil.read_path_from_url_file("oper_vpp_version") - expected_status_codes = (HTTPCodes.UNAUTHORIZED, - HTTPCodes.FORBIDDEN, - HTTPCodes.NOT_FOUND, - HTTPCodes.SERVICE_UNAVAILABLE, - HTTPCodes.INTERNAL_SERVER_ERROR) + :param node: Honeycomb node. + :param timeout: Timeout value in seconds. + :param retries: Max number of retries. + :param interval: Interval between checks, in seconds. + :type node: dict + :type timeout: int + :type retries: int + :type interval: int + :raises HoneycombError: If the Honeycomb process IP cannot be found, + or if timeout or number of retries is exceeded.""" - for node in nodes: - if node['type'] == NodeType.DUT: - HoneycombSetup.print_ports(node) - try: - status_code, _ = HTTPRequest.get(node, path, - enable_logging=False) - except HTTPRequestError: - ssh = SSH() - ssh.connect(node) - ret_code, _, _ = ssh.exec_command_sudo( - "tail -n 100 /var/log/syslog") - if ret_code != 0: - # It's probably Centos - ssh.exec_command_sudo("tail -n 100 /var/log/messages") - raise - if status_code == HTTPCodes.OK: - logger.info("Honeycomb on node {0} is up and running". - format(node['host'])) - elif status_code in expected_status_codes: - if status_code == HTTPCodes.UNAUTHORIZED: - logger.info('Unauthorized. If this triggers keyword ' - 'timeout, verify Honeycomb username and ' - 'password.') - raise HoneycombError('Honeycomb on node {0} running but ' - 'not yet ready.'.format(node['host']), - enable_logging=False) - else: - raise HoneycombError('Unexpected return code: {0}.'. - format(status_code)) - - status_code, _ = HcUtil.get_honeycomb_data( - node, "config_vpp_interfaces") - if status_code != HTTPCodes.OK: - raise HoneycombError('Honeycomb on node {0} running but ' - 'not yet ready.'.format(node['host']), - enable_logging=False) - return True + ssh = SSH() + ssh.connect(node) + ret_code, pid, _ = ssh.exec_command("pgrep honeycomb") + if ret_code != 0: + raise HoneycombError("No process named 'honeycomb' found.") + + pid = int(pid) + count = 0 + start = time() + while time() - start < timeout and count < retries: + count += 1 + ret_code, _, _ = ssh.exec_command( + " | ".join([ + "sudo tail -n 1000 /var/log/syslog", + "grep {pid}".format(pid=pid), + "grep 'Honeycomb started successfully!'"]) + ) + if ret_code != 0: + logger.debug( + "Attempt #{count} failed on log check.".format( + count=count)) + sleep(interval) + continue + status_code_version, _ = HcUtil.get_honeycomb_data( + node, "oper_vpp_version") + status_code_if_cfg, _ = HcUtil.get_honeycomb_data( + node, "config_vpp_interfaces") + status_code_if_oper, _ = HcUtil.get_honeycomb_data( + node, "oper_vpp_interfaces") + if status_code_if_cfg == HTTPCodes.OK\ + and status_code_if_cfg == HTTPCodes.OK\ + and status_code_if_oper == HTTPCodes.OK: + logger.info("Check successful, Honeycomb is up and running.") + break + else: + logger.debug( + "Attempt ${count} failed on Restconf check. Status codes:\n" + "Version: {version}\n" + "Interface config: {if_cfg}\n" + "Interface operational: {if_oper}".format( + count=count, + version=status_code_version, + if_cfg=status_code_if_cfg, + if_oper=status_code_if_oper)) + sleep(interval) + continue + else: + _, vpp_status, _ = ssh.exec_command("service vpp status") + ret_code, hc_log, _ = ssh.exec_command( + " | ".join([ + "sudo tail -n 1000 /var/log/syslog", + "grep {pid}".format(pid=pid)])) + raise HoneycombError( + "Timeout or max retries exceeded. Status of VPP:\n" + "{vpp_status}\n" + "Syslog entries filtered by Honeycomb's pid:\n" + "{hc_log}".format(vpp_status=vpp_status, hc_log=hc_log)) @staticmethod - def check_honeycomb_shutdown_state(*nodes): + def check_honeycomb_shutdown_state(node): """Check state of Honeycomb service during shutdown on specified nodes. Honeycomb nodes reply with connection refused or the following status codes depending on shutdown progress: codes 200, 404. - :param nodes: List of DUT nodes stopping Honeycomb. - :type nodes: list + :param node: List of DUT nodes stopping Honeycomb. + :type node: dict :return: True if all GETs fail to connect. :rtype bool """ - cmd = "ps -ef | grep -v grep | grep honeycomb" - for node in nodes: - if node['type'] == NodeType.DUT: - try: - status_code, _ = HTTPRequest.get(node, '/index.html', - enable_logging=False) - if status_code == HTTPCodes.OK: - raise HoneycombError('Honeycomb on node {0} is still ' - 'running.'.format(node['host']), - enable_logging=False) - elif status_code == HTTPCodes.NOT_FOUND: - raise HoneycombError('Honeycomb on node {0} is shutting' - ' down.'.format(node['host']), - enable_logging=False) - else: - raise HoneycombError('Unexpected return code: {0}.'. - format(status_code)) - except HTTPRequestError: - logger.debug('Connection refused, checking the process ' - 'state ...') - ssh = SSH() - ssh.connect(node) - (ret_code, _, _) = ssh.exec_command_sudo(cmd) - if ret_code == 0: - raise HoneycombError('Honeycomb on node {0} is still ' - 'running.'.format(node['host']), - enable_logging=False) - else: - logger.info("Honeycomb on node {0} has stopped". - format(node['host'])) + cmd = "pgrep honeycomb" + + ssh = SSH() + ssh.connect(node) + (ret_code, _, _) = ssh.exec_command_sudo(cmd) + if ret_code == 0: + raise HoneycombError('Honeycomb on node {0} is still ' + 'running.'.format(node['host']), + enable_logging=False) + else: + logger.info("Honeycomb on node {0} has stopped". + format(node['host'])) return True @staticmethod |