From 05742265d422e2c3b73aa62f3e0e460918655c77 Mon Sep 17 00:00:00 2001 From: Klement Sekera Date: Wed, 14 Mar 2018 18:14:49 +0100 Subject: make test: early core detection, code cleanup Change-Id: I503d69f902f1c8d3107fb3f2b9f31b0083ebb747 Signed-off-by: Klement Sekera --- test/framework.py | 13 +++++-------- test/hook.py | 2 ++ test/run_tests.py | 26 ++++++++++++++++++++++++-- 3 files changed, 31 insertions(+), 10 deletions(-) diff --git a/test/framework.py b/test/framework.py index 78aebf61e83..423feaf7545 100644 --- a/test/framework.py +++ b/test/framework.py @@ -8,7 +8,6 @@ import select import unittest import tempfile import time -import resource import faulthandler import random import copy @@ -23,9 +22,9 @@ from vpp_pg_interface import VppPGInterface from vpp_sub_interface import VppSubInterface from vpp_lo_interface import VppLoInterface from vpp_papi_provider import VppPapiProvider -from log import * +from log import RED, GREEN, YELLOW, double_line_delim, single_line_delim, \ + getLogger, colorize from vpp_object import VppObjectRegistry -from vpp_punt_socket import vpp_uds_socket_name if os.name == 'posix' and sys.version_info[0] < 3: # using subprocess32 is recommended by python official documentation # @ https://docs.python.org/2/library/subprocess.html @@ -263,8 +262,7 @@ class VppTestCase(unittest.TestCase): coredump_size, "}", "api-trace", "{", "on", "}", "api-segment", "{", "prefix", cls.shm_prefix, "}", "plugins", "{", "plugin", "dpdk_plugin.so", "{", - "disable", "}", "}", - "punt", "{", "socket", cls.punt_socket_path, "}"] + "disable", "}", "}", ] if plugin_path is not None: cls.vpp_cmdline.extend(["plugin_path", plugin_path]) cls.logger.info("vpp_cmdline: %s" % cls.vpp_cmdline) @@ -337,7 +335,6 @@ class VppTestCase(unittest.TestCase): cls.file_handler.setLevel(DEBUG) cls.logger.addHandler(cls.file_handler) cls.shm_prefix = cls.tempdir.split("/")[-1] - cls.punt_socket_path = '%s/%s' % (cls.tempdir, vpp_uds_socket_name) os.chdir(cls.tempdir) cls.logger.info("Temporary dir is %s, shm prefix is %s", cls.tempdir, cls.shm_prefix) @@ -391,7 +388,7 @@ class VppTestCase(unittest.TestCase): cls.quit() except: pass - raise t, v, tb + raise (t, v, tb) @classmethod def quit(cls): @@ -1086,7 +1083,7 @@ class VppTestRunner(unittest.TextTestRunner): for t in tests: if isinstance(t, unittest.suite.TestSuite): # this is a bunch of tests, recursively filter... - x = filter_tests(t, filter_cb) + x = VppTestRunner.filter_tests(t, filter_cb) if x.countTestCases() > 0: result.addTest(x) elif isinstance(t, unittest.TestCase): diff --git a/test/hook.py b/test/hook.py index 0ef1377ba99..44b432e5c25 100644 --- a/test/hook.py +++ b/test/hook.py @@ -62,6 +62,8 @@ class PollHook(Hook): def on_crash(self, core_path): if self.testcase.debug_core: + # notify parent process that we're handling a core file + open('%s/_core_handled' % self.testcase.tempdir, 'a').close() spawn_gdb(self.testcase.vpp_bin, core_path, self.logger) else: self.logger.critical("Core file present, debug with: gdb %s %s" % diff --git a/test/run_tests.py b/test/run_tests.py index a3d533633dc..419b8c679cb 100644 --- a/test/run_tests.py +++ b/test/run_tests.py @@ -13,6 +13,12 @@ from debug import spawn_gdb from log import global_logger from discover_tests import discover_tests +# timeout which controls how long the child has to finish after seeing +# a core dump in test temporary directory. If this is exceeded, parent assumes +# that child process is stuck (e.g. waiting for shm mutex, which will never +# get unlocked) and kill the child +core_timeout = 3 + def test_runner_wrapper(suite, keep_alive_pipe, result_pipe, failed_pipe): result = not VppTestRunner( @@ -62,6 +68,8 @@ def run_forked(suite): result = None failed = set() last_heard = time.time() + core_detected_at = None + debug_core = os.getenv("DEBUG", "").lower() == "core" while True: readable = select.select([keep_alive_parent_end.fileno(), result_parent_end.fileno(), @@ -82,7 +90,8 @@ def run_forked(suite): failed.add(failed_test.__name__) last_heard = time.time() fail = False - if last_heard + test_timeout < time.time(): + if last_heard + test_timeout < time.time() and \ + not os.path.isfile("%s/_core_handled" % last_test_temp_dir): fail = True global_logger.critical("Timeout while waiting for child test " "runner process (last test running was " @@ -93,6 +102,19 @@ def run_forked(suite): global_logger.critical("Child process unexpectedly died (last " "test running was `%s' in `%s')!" % (last_test, last_test_temp_dir)) + elif last_test_temp_dir and last_test_vpp_binary: + core_path = "%s/core" % last_test_temp_dir + if os.path.isfile(core_path): + if core_detected_at is None: + core_detected_at = time.time() + elif core_detected_at + core_timeout < time.time(): + if not os.path.isfile( + "%s/_core_handled" % last_test_temp_dir): + global_logger.critical( + "Child unresponsive and core-file exists in test " + "temporary directory!") + fail = True + if fail: failed_dir = os.getenv('VPP_TEST_FAILED_DIR') lttd = last_test_temp_dir.split("/")[-1] @@ -113,7 +135,7 @@ def run_forked(suite): if os.path.isfile(core_path): global_logger.error("Core-file exists in test temporary " "directory: %s!" % core_path) - if d and d.lower() == "core": + if debug_core: spawn_gdb(last_test_vpp_binary, core_path, global_logger) child.terminate() -- cgit 1.2.3-korg