make test: detect hung tests 59/7959/4
authorKlement Sekera <ksekera@cisco.com>
Tue, 8 Aug 2017 02:33:53 +0000 (04:33 +0200)
committerDave Wallace <dwallacelf@gmail.com>
Thu, 10 Aug 2017 00:14:59 +0000 (00:14 +0000)
Run tests in a forked process with a set of pipes to communicate
keep-alives and overall result. This allows us to detect when e.g.
vpp dies mid-API call causing the test to hang waiting for response
(which will never come since vpp died).

Support setting a (per test case) TIMEOUT make test option to set timeout,
with a default timeout of 120 seconds.

Example - fail the test suite if any test-case fails to finish within
300s:

make TIMEOUT=300 test

Change-Id: I0d04f26a7232968f4bf043adf5d5b508f5018717
Signed-off-by: Klement Sekera <ksekera@cisco.com>
test/Makefile
test/debug.py [new file with mode: 0644]
test/framework.py
test/hook.py
test/run_tests.py
test/sanity_run_vpp.py

index 7416afc..d48a6b5 100644 (file)
@@ -184,6 +184,7 @@ help:
        @echo "Arguments controlling test runs:"
        @echo " V=[0|1|2]            - set test verbosity level"
        @echo " FAILFAST=[0|1]       - fail fast if 1, complete all tests if 0"
+       @echo " TIMEOUT=<timeout>    - fail test suite if any single test takes longer than <timeout> to finish"
        @echo " DEBUG=<type>         - set VPP debugging kind"
        @echo "    DEBUG=core        - detect coredump and load it in gdb on crash"
        @echo "    DEBUG=gdb         - allow easy debugging by printing VPP PID "
diff --git a/test/debug.py b/test/debug.py
new file mode 100644 (file)
index 0000000..4516b8c
--- /dev/null
@@ -0,0 +1,23 @@
+""" debug utilities """
+
+import os
+import pexpect
+
+gdb_path = '/usr/bin/gdb'
+
+
+def spawn_gdb(binary_path, core_path, logger):
+    if os.path.isfile(gdb_path) and os.access(gdb_path, os.X_OK):
+        # automatically attach gdb
+        gdb_cmdline = "%s %s %s" % (gdb_path, binary_path, core_path)
+        gdb = pexpect.spawn(gdb_cmdline)
+        gdb.interact()
+        try:
+            gdb.terminate(True)
+        except:
+            pass
+        if gdb.isalive():
+            raise Exception("GDB refused to die...")
+    else:
+        logger.error("Debugger '%s' does not exist or is not an "
+                     "executable.." % gdb_path)
index fd493db..58b76bb 100644 (file)
@@ -12,7 +12,7 @@ import resource
 import faulthandler
 from collections import deque
 from threading import Thread, Event
-from inspect import getdoc
+from inspect import getdoc, isclass
 from traceback import format_exception
 from logging import FileHandler, DEBUG, Formatter
 from scapy.packet import Raw
@@ -92,6 +92,39 @@ def running_extended_tests():
     return False
 
 
+class KeepAliveReporter(object):
+    """
+    Singleton object which reports test start to parent process
+    """
+    _shared_state = {}
+
+    def __init__(self):
+        self.__dict__ = self._shared_state
+
+    @property
+    def pipe(self):
+        return self._pipe
+
+    @pipe.setter
+    def pipe(self, pipe):
+        if hasattr(self, '_pipe'):
+            raise Exception("Internal error - pipe should only be set once.")
+        self._pipe = pipe
+
+    def send_keep_alive(self, test):
+        """
+        Write current test tmpdir & desc to keep-alive pipe to signal liveness
+        """
+        if isclass(test):
+            desc = test.__name__
+        else:
+            desc = test.shortDescription()
+            if not desc:
+                desc = str(test)
+
+        self.pipe.send((desc, test.vpp_bin, test.tempdir))
+
+
 class VppTestCase(unittest.TestCase):
     """This subclass is a base class for VPP test cases that are implemented as
     classes. It provides methods to create and run test case.
@@ -257,6 +290,8 @@ class VppTestCase(unittest.TestCase):
         cls.vpp_dead = False
         cls.registry = VppObjectRegistry()
         cls.vpp_startup_failed = False
+        cls.reporter = KeepAliveReporter()
+        cls.reporter.send_keep_alive(cls)
         # need to catch exceptions here because if we raise, then the cleanup
         # doesn't get called and we might end with a zombie vpp
         try:
@@ -394,6 +429,7 @@ class VppTestCase(unittest.TestCase):
 
     def setUp(self):
         """ Clear trace before running each test"""
+        self.reporter.send_keep_alive(self)
         self.logger.debug("--- setUp() for %s.%s(%s) called ---" %
                           (self.__class__.__name__, self._testMethodName,
                            self._testMethodDoc))
@@ -865,13 +901,15 @@ class VppTestRunner(unittest.TextTestRunner):
         """Class maintaining the results of the tests"""
         return VppTestResult
 
-    def __init__(self, stream=sys.stderr, descriptions=True, verbosity=1,
+    def __init__(self, pipe, stream=sys.stderr, descriptions=True, verbosity=1,
                  failfast=False, buffer=False, resultclass=None):
         # ignore stream setting here, use hard-coded stdout to be in sync
         # with prints from VppTestCase methods ...
         super(VppTestRunner, self).__init__(sys.stdout, descriptions,
                                             verbosity, failfast, buffer,
                                             resultclass)
+        reporter = KeepAliveReporter()
+        reporter.pipe = pipe
 
     test_option = "TEST"
 
index 247704e..f34e0c5 100644 (file)
@@ -1,8 +1,8 @@
 import signal
 import os
-import pexpect
 import traceback
-from log import *
+from log import RED, single_line_delim, double_line_delim
+from debug import spawn_gdb, gdb_path
 
 
 class Hook(object):
@@ -60,31 +60,16 @@ class PollHook(Hook):
         self.testcase = testcase
         self.logger = testcase.logger
 
-    def spawn_gdb(self, gdb_path, core_path):
-        gdb_cmdline = gdb_path + ' ' + self.testcase.vpp_bin + ' ' + core_path
-        gdb = pexpect.spawn(gdb_cmdline)
-        gdb.interact()
-        try:
-            gdb.terminate(True)
-        except:
-            pass
-        if gdb.isalive():
-            raise Exception("GDB refused to die...")
-
     def on_crash(self, core_path):
         if self.testcase.debug_core:
-            gdb_path = '/usr/bin/gdb'
-            if os.path.isfile(gdb_path) and os.access(gdb_path, os.X_OK):
-                # automatically attach gdb
-                self.spawn_gdb(gdb_path, core_path)
-                return
-            else:
+            if not spawn_gdb(self.testcase.vpp_bin, core_path):
                 self.logger.error(
                     "Debugger '%s' does not exist or is not an executable.." %
                     gdb_path)
-
-        self.logger.critical('core file present, debug with: gdb ' +
-                             self.testcase.vpp_bin + ' ' + core_path)
+            else:
+                return
+        self.logger.critical("Core file present, debug with: gdb %s %s" %
+                             (self.testcase.vpp_bin, core_path))
 
     def poll_vpp(self):
         """
index 1b9c677..6d477d8 100644 (file)
@@ -2,10 +2,14 @@
 
 import sys
 import os
+import select
 import unittest
 import argparse
 import importlib
+from multiprocessing import Process, Pipe
 from framework import VppTestRunner
+from debug import spawn_gdb
+from log import global_logger
 
 
 def add_from_dir(suite, directory):
@@ -39,12 +43,38 @@ def add_from_dir(suite, directory):
                 if method.startswith("test_"):
                     suite.addTest(cls(method))
 
+
+def test_runner_wrapper(keep_alive_pipe, result_pipe):
+    result = not VppTestRunner(
+        pipe=keep_alive_pipe,
+        verbosity=verbose,
+        failfast=failfast).run(suite).wasSuccessful()
+    result_pipe.send(result)
+    result_pipe.close()
+    keep_alive_pipe.close()
+
+
+def handle_core(vpp_binary, core_path):
+    try:
+        d = os.getenv("DEBUG")
+    except:
+        d = None
+    if d and d.lower() == "core":
+        spawn_gdb(vpp_binary, core_path, global_logger)
+
+
 if __name__ == '__main__':
     try:
         verbose = int(os.getenv("V", 0))
     except:
         verbose = 0
 
+    default_test_timeout = 120
+    try:
+        test_timeout = int(os.getenv("TIMEOUT", default_test_timeout))
+    except:
+        test_timeout = default_test_timeout
+
     parser = argparse.ArgumentParser(description="VPP unit tests")
     parser.add_argument("-f", "--failfast", action='count',
                         help="fast failure flag")
@@ -56,7 +86,43 @@ if __name__ == '__main__':
 
     suite = unittest.TestSuite()
     for d in args.dir:
-        print("Adding tests from directory tree %s" % d)
+        global_logger.info("Adding tests from directory tree %s" % d)
         add_from_dir(suite, d)
-    sys.exit(not VppTestRunner(verbosity=verbose,
-                               failfast=failfast).run(suite).wasSuccessful())
+    keep_alive_parent_end, keep_alive_child_end = Pipe(duplex=False)
+    result_parent_end, result_child_end = Pipe(duplex=False)
+
+    p = Process(target=test_runner_wrapper,
+                args=(keep_alive_child_end,
+                      result_child_end))
+    p.start()
+    last_test_temp_dir = None
+    last_test_vpp_binary = None
+    last_test = None
+    result = None
+    while result is None:
+        readable = select.select([keep_alive_parent_end.fileno(),
+                                  result_parent_end.fileno(),
+                                  ],
+                                 [], [], test_timeout)[0]
+        if result_parent_end.fileno() in readable:
+            result = result_parent_end.recv()
+        elif keep_alive_parent_end.fileno() in readable:
+            while keep_alive_parent_end.poll():
+                last_test, last_test_vpp_binary, last_test_temp_dir =\
+                    keep_alive_parent_end.recv()
+        else:
+            global_logger.critical("Timeout while waiting for child test "
+                                   "runner process (last test running was "
+                                   "`%s' in `%s')!" %
+                                   (last_test, last_test_temp_dir))
+            if last_test_temp_dir and last_test_vpp_binary:
+                core_path = "%s/core" % last_test_temp_dir
+                if os.path.isfile(core_path):
+                    global_logger.error("Core-file exists in test temporary "
+                                        "directory: %s!" % core_path)
+                    handle_core(last_test_vpp_binary, core_path)
+            p.terminate()
+            result = -1
+    keep_alive_parent_end.close()
+    result_parent_end.close()
+    sys.exit(result)
index 527b618..156608c 100644 (file)
@@ -1,9 +1,10 @@
 #!/usr/bin/env python
 
 from __future__ import print_function
-from framework import VppTestCase
-from hook import VppDiedError
+from multiprocessing import Pipe
 from sys import exit
+from hook import VppDiedError
+from framework import VppTestCase, KeepAliveReporter
 
 
 class SanityTestCase(VppTestCase):
@@ -13,6 +14,9 @@ class SanityTestCase(VppTestCase):
 if __name__ == '__main__':
     rc = 0
     tc = SanityTestCase
+    x, y = Pipe()
+    reporter = KeepAliveReporter()
+    reporter.pipe = y
     try:
         tc.setUpClass()
     except VppDiedError:
@@ -22,5 +26,7 @@ if __name__ == '__main__':
             tc.tearDownClass()
         except:
             pass
+    x.close()
+    y.close()
 
     exit(rc)