From 909a6a1eb92ceacb983bcff088fb512352929c46 Mon Sep 17 00:00:00 2001 From: Klement Sekera Date: Tue, 8 Aug 2017 04:33:53 +0200 Subject: make test: detect hung tests Run tests in a forked process with a set of pipes to communicate keep-alives and overall result. This allows us to detect when e.g. vpp dies mid-API call causing the test to hang waiting for response (which will never come since vpp died). Support setting a (per test case) TIMEOUT make test option to set timeout, with a default timeout of 120 seconds. Example - fail the test suite if any test-case fails to finish within 300s: make TIMEOUT=300 test Change-Id: I0d04f26a7232968f4bf043adf5d5b508f5018717 Signed-off-by: Klement Sekera --- test/Makefile | 1 + test/debug.py | 23 ++++++++++++++++ test/framework.py | 42 +++++++++++++++++++++++++++-- test/hook.py | 29 +++++--------------- test/run_tests.py | 72 +++++++++++++++++++++++++++++++++++++++++++++++--- test/sanity_run_vpp.py | 10 +++++-- 6 files changed, 148 insertions(+), 29 deletions(-) create mode 100644 test/debug.py diff --git a/test/Makefile b/test/Makefile index 7416afc2..d48a6b5a 100644 --- a/test/Makefile +++ b/test/Makefile @@ -184,6 +184,7 @@ help: @echo "Arguments controlling test runs:" @echo " V=[0|1|2] - set test verbosity level" @echo " FAILFAST=[0|1] - fail fast if 1, complete all tests if 0" + @echo " TIMEOUT= - fail test suite if any single test takes longer than to finish" @echo " DEBUG= - set VPP debugging kind" @echo " DEBUG=core - detect coredump and load it in gdb on crash" @echo " DEBUG=gdb - allow easy debugging by printing VPP PID " diff --git a/test/debug.py b/test/debug.py new file mode 100644 index 00000000..4516b8c1 --- /dev/null +++ b/test/debug.py @@ -0,0 +1,23 @@ +""" debug utilities """ + +import os +import pexpect + +gdb_path = '/usr/bin/gdb' + + +def spawn_gdb(binary_path, core_path, logger): + if os.path.isfile(gdb_path) and os.access(gdb_path, os.X_OK): + # automatically attach gdb + gdb_cmdline = "%s %s %s" % (gdb_path, binary_path, core_path) + gdb = pexpect.spawn(gdb_cmdline) + gdb.interact() + try: + gdb.terminate(True) + except: + pass + if gdb.isalive(): + raise Exception("GDB refused to die...") + else: + logger.error("Debugger '%s' does not exist or is not an " + "executable.." % gdb_path) diff --git a/test/framework.py b/test/framework.py index fd493db3..58b76bbe 100644 --- a/test/framework.py +++ b/test/framework.py @@ -12,7 +12,7 @@ import resource import faulthandler from collections import deque from threading import Thread, Event -from inspect import getdoc +from inspect import getdoc, isclass from traceback import format_exception from logging import FileHandler, DEBUG, Formatter from scapy.packet import Raw @@ -92,6 +92,39 @@ def running_extended_tests(): return False +class KeepAliveReporter(object): + """ + Singleton object which reports test start to parent process + """ + _shared_state = {} + + def __init__(self): + self.__dict__ = self._shared_state + + @property + def pipe(self): + return self._pipe + + @pipe.setter + def pipe(self, pipe): + if hasattr(self, '_pipe'): + raise Exception("Internal error - pipe should only be set once.") + self._pipe = pipe + + def send_keep_alive(self, test): + """ + Write current test tmpdir & desc to keep-alive pipe to signal liveness + """ + if isclass(test): + desc = test.__name__ + else: + desc = test.shortDescription() + if not desc: + desc = str(test) + + self.pipe.send((desc, test.vpp_bin, test.tempdir)) + + class VppTestCase(unittest.TestCase): """This subclass is a base class for VPP test cases that are implemented as classes. It provides methods to create and run test case. @@ -257,6 +290,8 @@ class VppTestCase(unittest.TestCase): cls.vpp_dead = False cls.registry = VppObjectRegistry() cls.vpp_startup_failed = False + cls.reporter = KeepAliveReporter() + cls.reporter.send_keep_alive(cls) # need to catch exceptions here because if we raise, then the cleanup # doesn't get called and we might end with a zombie vpp try: @@ -394,6 +429,7 @@ class VppTestCase(unittest.TestCase): def setUp(self): """ Clear trace before running each test""" + self.reporter.send_keep_alive(self) self.logger.debug("--- setUp() for %s.%s(%s) called ---" % (self.__class__.__name__, self._testMethodName, self._testMethodDoc)) @@ -865,13 +901,15 @@ class VppTestRunner(unittest.TextTestRunner): """Class maintaining the results of the tests""" return VppTestResult - def __init__(self, stream=sys.stderr, descriptions=True, verbosity=1, + def __init__(self, pipe, stream=sys.stderr, descriptions=True, verbosity=1, failfast=False, buffer=False, resultclass=None): # ignore stream setting here, use hard-coded stdout to be in sync # with prints from VppTestCase methods ... super(VppTestRunner, self).__init__(sys.stdout, descriptions, verbosity, failfast, buffer, resultclass) + reporter = KeepAliveReporter() + reporter.pipe = pipe test_option = "TEST" diff --git a/test/hook.py b/test/hook.py index 247704ec..f34e0c5b 100644 --- a/test/hook.py +++ b/test/hook.py @@ -1,8 +1,8 @@ import signal import os -import pexpect import traceback -from log import * +from log import RED, single_line_delim, double_line_delim +from debug import spawn_gdb, gdb_path class Hook(object): @@ -60,31 +60,16 @@ class PollHook(Hook): self.testcase = testcase self.logger = testcase.logger - def spawn_gdb(self, gdb_path, core_path): - gdb_cmdline = gdb_path + ' ' + self.testcase.vpp_bin + ' ' + core_path - gdb = pexpect.spawn(gdb_cmdline) - gdb.interact() - try: - gdb.terminate(True) - except: - pass - if gdb.isalive(): - raise Exception("GDB refused to die...") - def on_crash(self, core_path): if self.testcase.debug_core: - gdb_path = '/usr/bin/gdb' - if os.path.isfile(gdb_path) and os.access(gdb_path, os.X_OK): - # automatically attach gdb - self.spawn_gdb(gdb_path, core_path) - return - else: + if not spawn_gdb(self.testcase.vpp_bin, core_path): self.logger.error( "Debugger '%s' does not exist or is not an executable.." % gdb_path) - - self.logger.critical('core file present, debug with: gdb ' + - self.testcase.vpp_bin + ' ' + core_path) + else: + return + self.logger.critical("Core file present, debug with: gdb %s %s" % + (self.testcase.vpp_bin, core_path)) def poll_vpp(self): """ diff --git a/test/run_tests.py b/test/run_tests.py index 1b9c677d..6d477d88 100644 --- a/test/run_tests.py +++ b/test/run_tests.py @@ -2,10 +2,14 @@ import sys import os +import select import unittest import argparse import importlib +from multiprocessing import Process, Pipe from framework import VppTestRunner +from debug import spawn_gdb +from log import global_logger def add_from_dir(suite, directory): @@ -39,12 +43,38 @@ def add_from_dir(suite, directory): if method.startswith("test_"): suite.addTest(cls(method)) + +def test_runner_wrapper(keep_alive_pipe, result_pipe): + result = not VppTestRunner( + pipe=keep_alive_pipe, + verbosity=verbose, + failfast=failfast).run(suite).wasSuccessful() + result_pipe.send(result) + result_pipe.close() + keep_alive_pipe.close() + + +def handle_core(vpp_binary, core_path): + try: + d = os.getenv("DEBUG") + except: + d = None + if d and d.lower() == "core": + spawn_gdb(vpp_binary, core_path, global_logger) + + if __name__ == '__main__': try: verbose = int(os.getenv("V", 0)) except: verbose = 0 + default_test_timeout = 120 + try: + test_timeout = int(os.getenv("TIMEOUT", default_test_timeout)) + except: + test_timeout = default_test_timeout + parser = argparse.ArgumentParser(description="VPP unit tests") parser.add_argument("-f", "--failfast", action='count', help="fast failure flag") @@ -56,7 +86,43 @@ if __name__ == '__main__': suite = unittest.TestSuite() for d in args.dir: - print("Adding tests from directory tree %s" % d) + global_logger.info("Adding tests from directory tree %s" % d) add_from_dir(suite, d) - sys.exit(not VppTestRunner(verbosity=verbose, - failfast=failfast).run(suite).wasSuccessful()) + keep_alive_parent_end, keep_alive_child_end = Pipe(duplex=False) + result_parent_end, result_child_end = Pipe(duplex=False) + + p = Process(target=test_runner_wrapper, + args=(keep_alive_child_end, + result_child_end)) + p.start() + last_test_temp_dir = None + last_test_vpp_binary = None + last_test = None + result = None + while result is None: + readable = select.select([keep_alive_parent_end.fileno(), + result_parent_end.fileno(), + ], + [], [], test_timeout)[0] + if result_parent_end.fileno() in readable: + result = result_parent_end.recv() + elif keep_alive_parent_end.fileno() in readable: + while keep_alive_parent_end.poll(): + last_test, last_test_vpp_binary, last_test_temp_dir =\ + keep_alive_parent_end.recv() + else: + global_logger.critical("Timeout while waiting for child test " + "runner process (last test running was " + "`%s' in `%s')!" % + (last_test, last_test_temp_dir)) + if last_test_temp_dir and last_test_vpp_binary: + core_path = "%s/core" % last_test_temp_dir + if os.path.isfile(core_path): + global_logger.error("Core-file exists in test temporary " + "directory: %s!" % core_path) + handle_core(last_test_vpp_binary, core_path) + p.terminate() + result = -1 + keep_alive_parent_end.close() + result_parent_end.close() + sys.exit(result) diff --git a/test/sanity_run_vpp.py b/test/sanity_run_vpp.py index 527b618f..156608c1 100644 --- a/test/sanity_run_vpp.py +++ b/test/sanity_run_vpp.py @@ -1,9 +1,10 @@ #!/usr/bin/env python from __future__ import print_function -from framework import VppTestCase -from hook import VppDiedError +from multiprocessing import Pipe from sys import exit +from hook import VppDiedError +from framework import VppTestCase, KeepAliveReporter class SanityTestCase(VppTestCase): @@ -13,6 +14,9 @@ class SanityTestCase(VppTestCase): if __name__ == '__main__': rc = 0 tc = SanityTestCase + x, y = Pipe() + reporter = KeepAliveReporter() + reporter.pipe = y try: tc.setUpClass() except VppDiedError: @@ -22,5 +26,7 @@ if __name__ == '__main__': tc.tearDownClass() except: pass + x.close() + y.close() exit(rc) -- cgit 1.2.3-korg