X-Git-Url: https://gerrit.fd.io/r/gitweb?a=blobdiff_plain;f=test%2Frun_tests.py;h=5d091ad253fb3ceb9b65465dcbd360e551c6cd37;hb=51d56bab707965399d524c350eaaa33d20b55244;hp=aac28d13ed949739e09b94e9a66ee837b2882981;hpb=184870ac5a266c37987e4a4d97ab4d4efefacb1f;p=vpp.git diff --git a/test/run_tests.py b/test/run_tests.py index aac28d13ed9..5d091ad253f 100644 --- a/test/run_tests.py +++ b/test/run_tests.py @@ -1,35 +1,35 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 import sys import shutil import os -import select +import fnmatch import unittest import argparse import time import threading import signal -import psutil -from multiprocessing import Process, Pipe, cpu_count +import re +from multiprocessing import Process, Pipe, get_context from multiprocessing.queues import Queue from multiprocessing.managers import BaseManager +import framework from framework import VppTestRunner, running_extended_tests, VppTestCase, \ - get_testcase_doc_name, get_test_description -from debug import spawn_gdb + get_testcase_doc_name, get_test_description, PASS, FAIL, ERROR, SKIP, \ + TEST_RUN, SKIP_CPU_SHORTAGE +from debug import spawn_gdb, start_vpp_in_gdb from log import get_parallel_logger, double_line_delim, RED, YELLOW, GREEN, \ - colorize + colorize, single_line_delim from discover_tests import discover_tests from subprocess import check_output, CalledProcessError -from util import check_core_path +from util import check_core_path, get_core_path, is_core_present +from cpu_config import num_cpus, max_vpp_cpus, available_cpus # timeout which controls how long the child has to finish after seeing # a core dump in test temporary directory. If this is exceeded, parent assumes -# that child process is stuck (e.g. waiting for shm mutex, which will never -# get unlocked) and kill the child +# that child process is stuck (e.g. waiting for event from vpp) and kill +# the child core_timeout = 3 -min_req_shm = 536870912 # min 512MB shm required -# 128MB per extra process -shm_per_process = 134217728 class StreamQueue(Queue): @@ -48,20 +48,90 @@ class StreamQueueManager(BaseManager): pass -StreamQueueManager.register('Queue', StreamQueue) +StreamQueueManager.register('StreamQueue', StreamQueue) -def test_runner_wrapper(suite, keep_alive_pipe, result_pipe, stdouterr_queue, - logger): +class TestResult(dict): + def __init__(self, testcase_suite, testcases_by_id=None): + super(TestResult, self).__init__() + self[PASS] = [] + self[FAIL] = [] + self[ERROR] = [] + self[SKIP] = [] + self[SKIP_CPU_SHORTAGE] = [] + self[TEST_RUN] = [] + self.crashed = False + self.testcase_suite = testcase_suite + self.testcases = [testcase for testcase in testcase_suite] + self.testcases_by_id = testcases_by_id + + def was_successful(self): + return 0 == len(self[FAIL]) == len(self[ERROR]) \ + and len(self[PASS] + self[SKIP] + self[SKIP_CPU_SHORTAGE]) \ + == self.testcase_suite.countTestCases() + + def no_tests_run(self): + return 0 == len(self[TEST_RUN]) + + def process_result(self, test_id, result): + self[result].append(test_id) + + def suite_from_failed(self): + rerun_ids = set([]) + for testcase in self.testcase_suite: + tc_id = testcase.id() + if tc_id not in self[PASS] + self[SKIP] + self[SKIP_CPU_SHORTAGE]: + rerun_ids.add(tc_id) + if rerun_ids: + return suite_from_failed(self.testcase_suite, rerun_ids) + + def get_testcase_names(self, test_id): + # could be tearDownClass (test_ipsec_esp.TestIpsecEsp1) + setup_teardown_match = re.match( + r'((tearDownClass)|(setUpClass)) \((.+\..+)\)', test_id) + if setup_teardown_match: + test_name, _, _, testcase_name = setup_teardown_match.groups() + if len(testcase_name.split('.')) == 2: + for key in self.testcases_by_id.keys(): + if key.startswith(testcase_name): + testcase_name = key + break + testcase_name = self._get_testcase_doc_name(testcase_name) + else: + test_name = self._get_test_description(test_id) + testcase_name = self._get_testcase_doc_name(test_id) + + return testcase_name, test_name + + def _get_test_description(self, test_id): + if test_id in self.testcases_by_id: + desc = get_test_description(descriptions, + self.testcases_by_id[test_id]) + else: + desc = test_id + return desc + + def _get_testcase_doc_name(self, test_id): + if test_id in self.testcases_by_id: + doc_name = get_testcase_doc_name(self.testcases_by_id[test_id]) + else: + doc_name = test_id + return doc_name + + +def test_runner_wrapper(suite, keep_alive_pipe, stdouterr_queue, + finished_pipe, result_pipe, logger): sys.stdout = stdouterr_queue sys.stderr = stdouterr_queue - VppTestCase.logger = logger + VppTestCase.parallel_handler = logger.handlers[0] result = VppTestRunner(keep_alive_pipe=keep_alive_pipe, descriptions=descriptions, verbosity=verbose, - failfast=failfast).run(suite) - result_pipe.send(result) - result_pipe.close() + result_pipe=result_pipe, + failfast=failfast, + print_summary=False).run(suite) + finished_pipe.send(result.wasSuccessful()) + finished_pipe.close() keep_alive_pipe.close() @@ -69,42 +139,93 @@ class TestCaseWrapper(object): def __init__(self, testcase_suite, manager): self.keep_alive_parent_end, self.keep_alive_child_end = Pipe( duplex=False) + self.finished_parent_end, self.finished_child_end = Pipe(duplex=False) self.result_parent_end, self.result_child_end = Pipe(duplex=False) self.testcase_suite = testcase_suite - self.stdouterr_queue = manager.Queue() + self.stdouterr_queue = manager.StreamQueue(ctx=get_context()) self.logger = get_parallel_logger(self.stdouterr_queue) self.child = Process(target=test_runner_wrapper, - args=(testcase_suite, self.keep_alive_child_end, - self.result_child_end, self.stdouterr_queue, + args=(testcase_suite, + self.keep_alive_child_end, + self.stdouterr_queue, + self.finished_child_end, + self.result_child_end, self.logger) ) self.child.start() - self.pid = self.child.pid self.last_test_temp_dir = None self.last_test_vpp_binary = None - self.last_test = None - self.result = None + self._last_test = None + self.last_test_id = None + self.vpp_pid = None self.last_heard = time.time() self.core_detected_at = None - self.failed_tests = [] + self.testcases_by_id = {} + self.testclasess_with_core = {} + for testcase in self.testcase_suite: + self.testcases_by_id[testcase.id()] = testcase + self.result = TestResult(testcase_suite, self.testcases_by_id) + + @property + def last_test(self): + return self._last_test + + @last_test.setter + def last_test(self, test_id): + self.last_test_id = test_id + if test_id in self.testcases_by_id: + testcase = self.testcases_by_id[test_id] + self._last_test = testcase.shortDescription() + if not self._last_test: + self._last_test = str(testcase) + else: + self._last_test = test_id + + def add_testclass_with_core(self): + if self.last_test_id in self.testcases_by_id: + test = self.testcases_by_id[self.last_test_id] + class_name = unittest.util.strclass(test.__class__) + test_name = "'{}' ({})".format(get_test_description(descriptions, + test), + self.last_test_id) + else: + test_name = self.last_test_id + class_name = re.match(r'((tearDownClass)|(setUpClass)) ' + r'\((.+\..+)\)', test_name).groups()[3] + if class_name not in self.testclasess_with_core: + self.testclasess_with_core[class_name] = ( + test_name, + self.last_test_vpp_binary, + self.last_test_temp_dir) def close_pipes(self): self.keep_alive_child_end.close() + self.finished_child_end.close() self.result_child_end.close() self.keep_alive_parent_end.close() + self.finished_parent_end.close() self.result_parent_end.close() + def was_successful(self): + return self.result.was_successful() + + @property + def cpus_used(self): + return self.testcase_suite.cpus_used + + def get_assigned_cpus(self): + return self.testcase_suite.get_assigned_cpus() + def stdouterr_reader_wrapper(unread_testcases, finished_unread_testcases, read_testcases): read_testcase = None - while read_testcases.is_set() or len(unread_testcases) > 0: - if not read_testcase: - if len(finished_unread_testcases) > 0: - read_testcase = finished_unread_testcases.pop() - unread_testcases.remove(read_testcase) - elif len(unread_testcases) > 0: - read_testcase = unread_testcases.pop() + while read_testcases.is_set() or unread_testcases: + if finished_unread_testcases: + read_testcase = finished_unread_testcases.pop() + unread_testcases.remove(read_testcase) + elif unread_testcases: + read_testcase = unread_testcases.pop() if read_testcase: data = '' while data is not None: @@ -116,25 +237,151 @@ def stdouterr_reader_wrapper(unread_testcases, finished_unread_testcases, read_testcase = None -def run_forked(testcases): +def handle_failed_suite(logger, last_test_temp_dir, vpp_pid): + if last_test_temp_dir: + # Need to create link in case of a timeout or core dump without failure + lttd = os.path.basename(last_test_temp_dir) + failed_dir = os.getenv('FAILED_DIR') + link_path = '%s%s-FAILED' % (failed_dir, lttd) + if not os.path.exists(link_path): + os.symlink(last_test_temp_dir, link_path) + logger.error("Symlink to failed testcase directory: %s -> %s" + % (link_path, lttd)) + + # Report core existence + core_path = get_core_path(last_test_temp_dir) + if os.path.exists(core_path): + logger.error( + "Core-file exists in test temporary directory: %s!" % + core_path) + check_core_path(logger, core_path) + logger.debug("Running 'file %s':" % core_path) + try: + info = check_output(["file", core_path]) + logger.debug(info) + except CalledProcessError as e: + logger.error("Subprocess returned with return code " + "while running `file' utility on core-file " + "returned: " + "rc=%s", e.returncode) + except OSError as e: + logger.error("Subprocess returned with OS error while " + "running 'file' utility " + "on core-file: " + "(%s) %s", e.errno, e.strerror) + except Exception as e: + logger.exception("Unexpected error running `file' utility " + "on core-file") + logger.error("gdb %s %s" % + (os.getenv('VPP_BIN', 'vpp'), core_path)) + + if vpp_pid: + # Copy api post mortem + api_post_mortem_path = "/tmp/api_post_mortem.%d" % vpp_pid + if os.path.isfile(api_post_mortem_path): + logger.error("Copying api_post_mortem.%d to %s" % + (vpp_pid, last_test_temp_dir)) + shutil.copy2(api_post_mortem_path, last_test_temp_dir) + + +def check_and_handle_core(vpp_binary, tempdir, core_crash_test): + if is_core_present(tempdir): + if debug_core: + print('VPP core detected in %s. Last test running was %s' % + (tempdir, core_crash_test)) + print(single_line_delim) + spawn_gdb(vpp_binary, get_core_path(tempdir)) + print(single_line_delim) + elif compress_core: + print("Compressing core-file in test directory `%s'" % tempdir) + os.system("gzip %s" % get_core_path(tempdir)) + + +def handle_cores(failed_testcases): + for failed_testcase in failed_testcases: + tcs_with_core = failed_testcase.testclasess_with_core + if tcs_with_core: + for test, vpp_binary, tempdir in tcs_with_core.values(): + check_and_handle_core(vpp_binary, tempdir, test) + + +def process_finished_testsuite(wrapped_testcase_suite, + finished_testcase_suites, + failed_wrapped_testcases, + results): + results.append(wrapped_testcase_suite.result) + finished_testcase_suites.add(wrapped_testcase_suite) + stop_run = False + if failfast and not wrapped_testcase_suite.was_successful(): + stop_run = True + + if not wrapped_testcase_suite.was_successful(): + failed_wrapped_testcases.add(wrapped_testcase_suite) + handle_failed_suite(wrapped_testcase_suite.logger, + wrapped_testcase_suite.last_test_temp_dir, + wrapped_testcase_suite.vpp_pid) + + return stop_run + + +def run_forked(testcase_suites): wrapped_testcase_suites = set() + solo_testcase_suites = [] # suites are unhashable, need to use list results = [] - debug_core = os.getenv("DEBUG", "").lower() == "core" unread_testcases = set() finished_unread_testcases = set() manager = StreamQueueManager() manager.start() - for i in range(concurrent_tests): - if len(testcases) > 0: - wrapped_testcase_suite = TestCaseWrapper(testcases.pop(0), manager) - wrapped_testcase_suites.add(wrapped_testcase_suite) - unread_testcases.add(wrapped_testcase_suite) - # time.sleep(1) + tests_running = 0 + free_cpus = list(available_cpus) + + def on_suite_start(tc): + nonlocal tests_running + nonlocal free_cpus + tests_running = tests_running + 1 + + def on_suite_finish(tc): + nonlocal tests_running + nonlocal free_cpus + tests_running = tests_running - 1 + assert tests_running >= 0 + free_cpus.extend(tc.get_assigned_cpus()) + + def run_suite(suite): + nonlocal manager + nonlocal wrapped_testcase_suites + nonlocal unread_testcases + nonlocal free_cpus + suite.assign_cpus(free_cpus[:suite.cpus_used]) + free_cpus = free_cpus[suite.cpus_used:] + wrapper = TestCaseWrapper(suite, manager) + wrapped_testcase_suites.add(wrapper) + unread_testcases.add(wrapper) + on_suite_start(suite) + + def can_run_suite(suite): + return (tests_running < max_concurrent_tests and + (suite.cpus_used <= len(free_cpus) or + suite.cpus_used > max_vpp_cpus)) + + while free_cpus and testcase_suites: + a_suite = testcase_suites[0] + if a_suite.is_tagged_run_solo: + a_suite = testcase_suites.pop(0) + solo_testcase_suites.append(a_suite) + continue + if can_run_suite(a_suite): + a_suite = testcase_suites.pop(0) + run_suite(a_suite) else: break + if tests_running == 0 and solo_testcase_suites: + a_suite = solo_testcase_suites.pop(0) + run_suite(a_suite) + read_from_testcases = threading.Event() read_from_testcases.set() stdouterr_thread = threading.Thread(target=stdouterr_reader_wrapper, @@ -143,153 +390,177 @@ def run_forked(testcases): read_from_testcases)) stdouterr_thread.start() - while len(wrapped_testcase_suites) > 0: - finished_testcase_suites = set() - for wrapped_testcase_suite in wrapped_testcase_suites: - readable = select.select( - [wrapped_testcase_suite.keep_alive_parent_end.fileno(), - wrapped_testcase_suite.result_parent_end.fileno()], - [], [], 1)[0] - if wrapped_testcase_suite.result_parent_end.fileno() in readable: - results.append( - (wrapped_testcase_suite.testcase_suite, - wrapped_testcase_suite.result_parent_end.recv())) - finished_testcase_suites.add(wrapped_testcase_suite) - continue - - if wrapped_testcase_suite.keep_alive_parent_end.fileno() \ - in readable: + failed_wrapped_testcases = set() + stop_run = False + + try: + while wrapped_testcase_suites: + finished_testcase_suites = set() + for wrapped_testcase_suite in wrapped_testcase_suites: + while wrapped_testcase_suite.result_parent_end.poll(): + wrapped_testcase_suite.result.process_result( + *wrapped_testcase_suite.result_parent_end.recv()) + wrapped_testcase_suite.last_heard = time.time() + while wrapped_testcase_suite.keep_alive_parent_end.poll(): wrapped_testcase_suite.last_test, \ wrapped_testcase_suite.last_test_vpp_binary, \ wrapped_testcase_suite.last_test_temp_dir, \ wrapped_testcase_suite.vpp_pid = \ wrapped_testcase_suite.keep_alive_parent_end.recv() - wrapped_testcase_suite.last_heard = time.time() - - fail = False - if wrapped_testcase_suite.last_heard + test_timeout < time.time() \ - and not os.path.isfile( - "%s/_core_handled" % - wrapped_testcase_suite.last_test_temp_dir): - fail = True - wrapped_testcase_suite.logger.critical( - "Timeout while waiting for child test " - "runner process (last test running was " - "`%s' in `%s')!" % - (wrapped_testcase_suite.last_test, - wrapped_testcase_suite.last_test_temp_dir)) - elif not wrapped_testcase_suite.child.is_alive(): - fail = True - wrapped_testcase_suite.logger.critical( - "Child python process unexpectedly died " - "(last test running was `%s' in `%s')!" % - (wrapped_testcase_suite.last_test, - wrapped_testcase_suite.last_test_temp_dir)) - elif wrapped_testcase_suite.last_test_temp_dir and \ - wrapped_testcase_suite.last_test_vpp_binary: - core_path = "%s/core" % \ - wrapped_testcase_suite.last_test_temp_dir - if os.path.isfile(core_path): - if wrapped_testcase_suite.core_detected_at is None: - wrapped_testcase_suite.core_detected_at = time.time() - elif wrapped_testcase_suite.core_detected_at + \ - core_timeout < time.time(): - if not os.path.isfile( - "%s/_core_handled" % - wrapped_testcase_suite. - last_test_temp_dir): - wrapped_testcase_suite.logger.critical( - "Child python process unresponsive and core-" - "file exists in test temporary directory!") - fail = True + wrapped_testcase_suite.last_heard = time.time() + + if wrapped_testcase_suite.finished_parent_end.poll(): + wrapped_testcase_suite.finished_parent_end.recv() + wrapped_testcase_suite.last_heard = time.time() + stop_run = process_finished_testsuite( + wrapped_testcase_suite, + finished_testcase_suites, + failed_wrapped_testcases, + results) or stop_run + continue - if fail: - failed_dir = os.getenv('VPP_TEST_FAILED_DIR') - lttd = os.path.basename( - wrapped_testcase_suite.last_test_temp_dir) - link_path = '%s%s-FAILED' % (failed_dir, lttd) - wrapped_testcase_suite.logger.error( - "Creating a link to the failed test: %s -> %s" % - (link_path, lttd)) - if not os.path.exists(link_path): - os.symlink(wrapped_testcase_suite.last_test_temp_dir, - link_path) - api_post_mortem_path = "/tmp/api_post_mortem.%d" % \ - wrapped_testcase_suite.vpp_pid - if os.path.isfile(api_post_mortem_path): - wrapped_testcase_suite.logger.error( - "Copying api_post_mortem.%d to %s" % - (wrapped_testcase_suite.vpp_pid, + fail = False + if wrapped_testcase_suite.last_heard + test_timeout < \ + time.time(): + fail = True + wrapped_testcase_suite.logger.critical( + "Child test runner process timed out " + "(last test running was `%s' in `%s')!" % + (wrapped_testcase_suite.last_test, + wrapped_testcase_suite.last_test_temp_dir)) + elif not wrapped_testcase_suite.child.is_alive(): + fail = True + wrapped_testcase_suite.logger.critical( + "Child test runner process unexpectedly died " + "(last test running was `%s' in `%s')!" % + (wrapped_testcase_suite.last_test, wrapped_testcase_suite.last_test_temp_dir)) - shutil.copy2(api_post_mortem_path, - wrapped_testcase_suite.last_test_temp_dir) - if wrapped_testcase_suite.last_test_temp_dir and \ + elif wrapped_testcase_suite.last_test_temp_dir and \ wrapped_testcase_suite.last_test_vpp_binary: - core_path = "%s/core" % \ - wrapped_testcase_suite.last_test_temp_dir - if os.path.isfile(core_path): - wrapped_testcase_suite.logger.error( - "Core-file exists in test temporary directory: %s!" - % core_path) - check_core_path(wrapped_testcase_suite.logger, - core_path) - wrapped_testcase_suite.logger.debug( - "Running `file %s':" % core_path) - try: - info = check_output(["file", core_path]) - wrapped_testcase_suite.logger.debug(info) - except CalledProcessError as e: - wrapped_testcase_suite.logger.error( - "Could not run `file' utility on core-file, " - "rc=%s" % e.returncode) - pass - if debug_core: - spawn_gdb( - wrapped_testcase_suite.last_test_vpp_binary, - core_path, wrapped_testcase_suite.logger) - wrapped_testcase_suite.child.terminate() - try: - # terminating the child process tends to leave orphan - # VPP process around - os.kill(wrapped_testcase_suite.vpp_pid, signal.SIGTERM) - except OSError: - # already dead - pass - results.append((wrapped_testcase_suite.testcase_suite, None)) - finished_testcase_suites.add(wrapped_testcase_suite) - - for finished_testcase in finished_testcase_suites: - finished_testcase.child.join() - finished_testcase.close_pipes() - wrapped_testcase_suites.remove(finished_testcase) - finished_unread_testcases.add(finished_testcase) - finished_testcase.stdouterr_queue.put(None) - if len(testcases) > 0: - new_testcase = TestCaseWrapper(testcases.pop(0), manager) - wrapped_testcase_suites.add(new_testcase) - unread_testcases.add(new_testcase) - - read_from_testcases.clear() - stdouterr_thread.join(test_timeout) - manager.shutdown() + if is_core_present( + wrapped_testcase_suite.last_test_temp_dir): + wrapped_testcase_suite.add_testclass_with_core() + if wrapped_testcase_suite.core_detected_at is None: + wrapped_testcase_suite.core_detected_at = \ + time.time() + elif wrapped_testcase_suite.core_detected_at + \ + core_timeout < time.time(): + wrapped_testcase_suite.logger.critical( + "Child test runner process unresponsive and " + "core-file exists in test temporary directory " + "(last test running was `%s' in `%s')!" % + (wrapped_testcase_suite.last_test, + wrapped_testcase_suite.last_test_temp_dir)) + fail = True + + if fail: + wrapped_testcase_suite.child.terminate() + try: + # terminating the child process tends to leave orphan + # VPP process around + if wrapped_testcase_suite.vpp_pid: + os.kill(wrapped_testcase_suite.vpp_pid, + signal.SIGTERM) + except OSError: + # already dead + pass + wrapped_testcase_suite.result.crashed = True + wrapped_testcase_suite.result.process_result( + wrapped_testcase_suite.last_test_id, ERROR) + stop_run = process_finished_testsuite( + wrapped_testcase_suite, + finished_testcase_suites, + failed_wrapped_testcases, + results) or stop_run + + for finished_testcase in finished_testcase_suites: + # Somewhat surprisingly, the join below may + # timeout, even if client signaled that + # it finished - so we note it just in case. + join_start = time.time() + finished_testcase.child.join(test_finished_join_timeout) + join_end = time.time() + if join_end - join_start >= test_finished_join_timeout: + finished_testcase.logger.error( + "Timeout joining finished test: %s (pid %d)" % + (finished_testcase.last_test, + finished_testcase.child.pid)) + finished_testcase.close_pipes() + wrapped_testcase_suites.remove(finished_testcase) + finished_unread_testcases.add(finished_testcase) + finished_testcase.stdouterr_queue.put(None) + on_suite_finish(finished_testcase) + if stop_run: + while testcase_suites: + results.append(TestResult(testcase_suites.pop(0))) + elif testcase_suites: + a_suite = testcase_suites.pop(0) + while a_suite and a_suite.is_tagged_run_solo: + solo_testcase_suites.append(a_suite) + if testcase_suites: + a_suite = testcase_suites.pop(0) + else: + a_suite = None + if a_suite and can_run_suite(a_suite): + run_suite(a_suite) + if solo_testcase_suites and tests_running == 0: + a_suite = solo_testcase_suites.pop(0) + run_suite(a_suite) + time.sleep(0.1) + except Exception: + for wrapped_testcase_suite in wrapped_testcase_suites: + wrapped_testcase_suite.child.terminate() + wrapped_testcase_suite.stdouterr_queue.put(None) + raise + finally: + read_from_testcases.clear() + stdouterr_thread.join(test_timeout) + manager.shutdown() + + handle_cores(failed_wrapped_testcases) return results +class TestSuiteWrapper(unittest.TestSuite): + cpus_used = 0 + + def __init__(self): + return super().__init__() + + def addTest(self, test): + self.cpus_used = max(self.cpus_used, test.get_cpus_required()) + super().addTest(test) + + def assign_cpus(self, cpus): + self.cpus = cpus + + def _handleClassSetUp(self, test, result): + if not test.__class__.skipped_due_to_cpu_lack: + test.assign_cpus(self.cpus) + super()._handleClassSetUp(test, result) + + def get_assigned_cpus(self): + return self.cpus + + class SplitToSuitesCallback: def __init__(self, filter_callback): self.suites = {} self.suite_name = 'default' self.filter_callback = filter_callback - self.filtered = unittest.TestSuite() + self.filtered = TestSuiteWrapper() def __call__(self, file_name, cls, method): test_method = cls(method) if self.filter_callback(file_name, cls.__name__, method): self.suite_name = file_name + cls.__name__ if self.suite_name not in self.suites: - self.suites[self.suite_name] = unittest.TestSuite() + self.suites[self.suite_name] = TestSuiteWrapper() + self.suites[self.suite_name].is_tagged_run_solo = False self.suites[self.suite_name].addTest(test_method) + if test_method.is_tagged_run_solo(): + self.suites[self.suite_name].is_tagged_run_solo = True else: self.filtered.addTest(test_method) @@ -330,7 +601,7 @@ def parse_test_option(): def filter_tests(tests, filter_cb): - result = unittest.suite.TestSuite() + result = TestSuiteWrapper() for t in tests: if isinstance(t, unittest.suite.TestSuite): # this is a bunch of tests, recursively filter... @@ -360,8 +631,10 @@ class FilterByTestOption: self.filter_func_name = filter_func_name def __call__(self, file_name, class_name, func_name): - if self.filter_file_name and file_name != self.filter_file_name: - return False + if self.filter_file_name: + fn_match = fnmatch.fnmatch(file_name, self.filter_file_name) + if not fn_match: + return False if self.filter_class_name and class_name != self.filter_class_name: return False if self.filter_func_name and func_name != self.filter_func_name: @@ -370,103 +643,56 @@ class FilterByTestOption: class FilterByClassList: - def __init__(self, class_list): - self.class_list = class_list + def __init__(self, classes_with_filenames): + self.classes_with_filenames = classes_with_filenames def __call__(self, file_name, class_name, func_name): - return class_name in self.class_list + return '.'.join([file_name, class_name]) in self.classes_with_filenames def suite_from_failed(suite, failed): + failed = {x.rsplit('.', 1)[0] for x in failed} filter_cb = FilterByClassList(failed) suite = filter_tests(suite, filter_cb) return suite -class NonPassedResults(dict): +class AllResults(dict): def __init__(self): - super(NonPassedResults, self).__init__() + super(AllResults, self).__init__() self.all_testcases = 0 - self.results_per_suite = {} - self.failures_id = 'failures' - self.errors_id = 'errors' - self.crashes_id = 'crashes' - self.skipped_id = 'skipped' - self.expectedFailures_id = 'expectedFailures' - self.unexpectedSuccesses_id = 'unexpectedSuccesses' + self.results_per_suite = [] + self[PASS] = 0 + self[FAIL] = 0 + self[ERROR] = 0 + self[SKIP] = 0 + self[SKIP_CPU_SHORTAGE] = 0 + self[TEST_RUN] = 0 self.rerun = [] - self[self.failures_id] = 0 - self[self.errors_id] = 0 - self[self.crashes_id] = 0 - self[self.skipped_id] = 0 - self[self.expectedFailures_id] = 0 - self[self.unexpectedSuccesses_id] = 0 - - def _add_result(self, test, result_id): - if isinstance(test, VppTestCase): - parts = test.id().split('.') - if len(parts) == 3: - tc_class = get_testcase_doc_name(test) - if tc_class not in self.results_per_suite: - # failed, errored, skipped, expectedly failed, - # unexpectedly passed - self.results_per_suite[tc_class] = \ - {self.failures_id: [], - self.errors_id: [], - self.crashes_id: [], - self.skipped_id: [], - self.expectedFailures_id: [], - self.unexpectedSuccesses_id: []} - self.results_per_suite[tc_class][result_id].append(test) - return True - return False - - def add_results(self, testcases, testcase_result, - duplicates=None): - for failed_testcase, _ in testcases: - if self._add_result(failed_testcase, testcase_result): - if duplicates: - if failed_testcase not in duplicates: - self[testcase_result] += 1 - else: - self[testcase_result] += 1 + self.testsuites_no_tests_run = [] - def add_result(self, testcase_suite, result): - retval = 0 - self.all_testcases += testcase_suite.countTestCases() - if result: - # suite finished properly - if not result.wasSuccessful(): - retval = 1 + def add_results(self, result): + self.results_per_suite.append(result) + result_types = [PASS, FAIL, ERROR, SKIP, TEST_RUN, SKIP_CPU_SHORTAGE] + for result_type in result_types: + self[result_type] += len(result[result_type]) - self.add_results(result.failures, self.failures_id) - self.add_results(result.errors, self.errors_id, - result.failures + result.errors) - self.add_results(result.skipped, self.skipped_id) - self.add_results(result.expectedFailures, - self.expectedFailures_id) - self.add_results(result.unexpectedSuccesses, - self.unexpectedSuccesses_id) + def add_result(self, result): + retval = 0 + self.all_testcases += result.testcase_suite.countTestCases() + self.add_results(result) - else: - # suite crashed - retval = -1 - self.add_results([(x, None) for x in testcase_suite], - self.crashes_id) + if result.no_tests_run(): + self.testsuites_no_tests_run.append(result.testcase_suite) + if result.crashed: + retval = -1 + else: + retval = 1 + elif not result.was_successful(): + retval = 1 if retval != 0: - if concurrent_tests == 1: - if result: - rerun_classes = {x[0].__class__.__name__ for - x in result.errors} - rerun_classes.update({x[0].__class__.__name__ for - x in result.failures}) - self.rerun.append(suite_from_failed(testcase_suite, - rerun_classes)) - else: - self.rerun.append(testcase_suite) - else: - self.rerun.append(testcase_suite) + self.rerun.append(result.testcase_suite) return retval @@ -474,89 +700,97 @@ class NonPassedResults(dict): print('') print(double_line_delim) print('TEST RESULTS:') - print(' Executed tests: {}'.format(self.all_testcases)) - print(' Passed tests: {}'.format( - colorize(str(self.all_testcases - - self.all_nonpassed), GREEN))) - if self[self.failures_id] > 0: - print(' Failed tests: {}'.format( - colorize(str(self[self.failures_id]), RED))) - if self[self.errors_id] > 0: - print(' Errored tests: {}'.format( - colorize(str(self[self.errors_id]), RED))) - if self[self.crashes_id] > 0: - print(' Crashed tests: {}'.format( - colorize(str(self[self.crashes_id]), RED))) - if self[self.skipped_id] > 0: - print(' Skipped tests: {}'.format( - colorize(str(self[self.skipped_id]), YELLOW))) - if self[self.expectedFailures_id] > 0: - print(' Expected failures: {}'.format( - colorize(str(self[self.expectedFailures_id]), GREEN))) - if self[self.unexpectedSuccesses_id] > 0: - print(' Unexpected successes: {}'.format( - colorize(str(self[self.unexpectedSuccesses_id]), YELLOW))) - if self.all_failed > 0: - print('FAILED TESTS:') - for testcase_class, suite_results in \ - self.results_per_suite.items(): - failed_testcases = suite_results[ - self.failures_id] - errored_testcases = suite_results[ - self.errors_id] - crashed_testcases = suite_results[ - self.crashes_id] - if len(failed_testcases) or len(errored_testcases) \ - or len(crashed_testcases): - print(' Testcase name: {}'.format( - colorize(testcase_class, RED))) - for failed_test in failed_testcases: - print(' FAILED: {}'.format( - colorize(get_test_description( - descriptions, failed_test), RED))) - for failed_test in errored_testcases: - print(' ERRORED: {}'.format( - colorize(get_test_description( - descriptions, failed_test), RED))) - for failed_test in crashed_testcases: - print(' CRASHED: {}'.format( - colorize(get_test_description( - descriptions, failed_test), RED))) + def indent_results(lines): + lines = list(filter(None, lines)) + maximum = max(lines, key=lambda x: x.index(":")) + maximum = 4 + maximum.index(":") + for l in lines: + padding = " " * (maximum - l.index(":")) + print(f"{padding}{l}") + + indent_results([ + f'Scheduled tests: {self.all_testcases}', + f'Executed tests: {self[TEST_RUN]}', + f'Passed tests: {colorize(self[PASS], GREEN)}', + f'Skipped tests: {colorize(self[SKIP], YELLOW)}' + if self[SKIP] else None, + f'Not Executed tests: {colorize(self.not_executed, RED)}' + if self.not_executed else None, + f'Failures: {colorize(self[FAIL], RED)}' if self[FAIL] else None, + f'Errors: {colorize(self[ERROR], RED)}' if self[ERROR] else None, + 'Tests skipped due to lack of CPUS: ' + f'{colorize(self[SKIP_CPU_SHORTAGE], YELLOW)}' + if self[SKIP_CPU_SHORTAGE] else None + ]) + if self.all_failed > 0: + print('FAILURES AND ERRORS IN TESTS:') + for result in self.results_per_suite: + failed_testcase_ids = result[FAIL] + errored_testcase_ids = result[ERROR] + old_testcase_name = None + if failed_testcase_ids: + for failed_test_id in failed_testcase_ids: + new_testcase_name, test_name = \ + result.get_testcase_names(failed_test_id) + if new_testcase_name != old_testcase_name: + print(' Testcase name: {}'.format( + colorize(new_testcase_name, RED))) + old_testcase_name = new_testcase_name + print(' FAILURE: {} [{}]'.format( + colorize(test_name, RED), failed_test_id)) + if errored_testcase_ids: + for errored_test_id in errored_testcase_ids: + new_testcase_name, test_name = \ + result.get_testcase_names(errored_test_id) + if new_testcase_name != old_testcase_name: + print(' Testcase name: {}'.format( + colorize(new_testcase_name, RED))) + old_testcase_name = new_testcase_name + print(' ERROR: {} [{}]'.format( + colorize(test_name, RED), errored_test_id)) + if self.testsuites_no_tests_run: + print('TESTCASES WHERE NO TESTS WERE SUCCESSFULLY EXECUTED:') + tc_classes = set() + for testsuite in self.testsuites_no_tests_run: + for testcase in testsuite: + tc_classes.add(get_testcase_doc_name(testcase)) + for tc_class in tc_classes: + print(' {}'.format(colorize(tc_class, RED))) + + if self[SKIP_CPU_SHORTAGE]: + print() + print(colorize(' SOME TESTS WERE SKIPPED BECAUSE THERE ARE NOT' + ' ENOUGH CPUS AVAILABLE', YELLOW)) print(double_line_delim) print('') @property - def all_nonpassed(self): - return self[self.failures_id] + self[self.errors_id] + \ - self[self.crashes_id] + self[self.skipped_id] + \ - self[self.expectedFailures_id] + \ - self[self.unexpectedSuccesses_id] + def not_executed(self): + return self.all_testcases - self[TEST_RUN] @property def all_failed(self): - return self[self.failures_id] + self[self.errors_id] + \ - self[self.crashes_id] + return self[FAIL] + self[ERROR] def parse_results(results): """ - Prints the number of executed, passed, failed, errored, skipped, - expectedly failed and unexpectedly passed tests and details about - failed, errored, expectedly failed and unexpectedly passed tests. + Prints the number of scheduled, executed, not executed, passed, failed, + errored and skipped tests and details about failed and errored tests. - Also returns any suites where any test failed. + Also returns all suites where any test failed. :param results: :return: """ - results_per_suite = NonPassedResults() + results_per_suite = AllResults() crashed = False failed = False - for testcase_suite, result in results: - result_code = results_per_suite.add_result(testcase_suite, result) + for result in results: + result_code = results_per_suite.add_result(result) if result_code == 1: failed = True elif result_code == -1: @@ -591,44 +825,55 @@ if __name__ == '__main__': test_timeout = parse_digit_env("TIMEOUT", 600) # default = 10 minutes + test_finished_join_timeout = 15 + retries = parse_digit_env("RETRIES", 0) - debug = os.getenv("DEBUG", "n").lower() in ["gdb", "gdbserver"] + debug = os.getenv("DEBUG", "n").lower() in ["gdb", "gdbserver", "attach"] + + debug_core = os.getenv("DEBUG", "").lower() == "core" + compress_core = framework.BoolEnvironmentVariable("CORE_COMPRESS") - step = os.getenv("STEP", "n").lower() in ("y", "yes", "1") + if os.getenv("VPP_IN_GDB", "n").lower() in ["1", "y", "yes"]: + start_vpp_in_gdb() + exit() - force_foreground = \ - os.getenv("FORCE_FOREGROUND", "n").lower() in ("y", "yes", "1") + step = framework.BoolEnvironmentVariable("STEP") + force_foreground = framework.BoolEnvironmentVariable("FORCE_FOREGROUND") run_interactive = debug or step or force_foreground + max_concurrent_tests = 0 + print(f"OS reports {num_cpus} available cpu(s).") + test_jobs = os.getenv("TEST_JOBS", "1").lower() # default = 1 process if test_jobs == 'auto': if run_interactive: - concurrent_tests = 1 - print('Interactive mode required, running on one core') + max_concurrent_tests = 1 + print('Interactive mode required, running tests consecutively.') else: - shm_free = psutil.disk_usage('/dev/shm').free - shm_max_processes = 1 - if shm_free < min_req_shm: - raise Exception('Not enough free space in /dev/shm. Required ' - 'free space is at least %sM.' - % (min_req_shm >> 20)) - else: - extra_shm = shm_free - min_req_shm - shm_max_processes += extra_shm / shm_per_process - concurrent_tests = max(cpu_count(), shm_max_processes) - print('Found enough resources to run tests with %s cores' - % concurrent_tests) - elif test_jobs.isdigit(): - concurrent_tests = int(test_jobs) + max_concurrent_tests = num_cpus + print(f"Running at most {max_concurrent_tests} python test " + "processes concurrently.") else: - concurrent_tests = 1 - - if run_interactive and concurrent_tests > 1: + try: + test_jobs = int(test_jobs) + except ValueError as e: + raise ValueError("Invalid TEST_JOBS value specified, valid " + "values are a positive integer or 'auto'") from e + if test_jobs <= 0: + raise ValueError("Invalid TEST_JOBS value specified, valid " + "values are a positive integer or 'auto'") + max_concurrent_tests = int(test_jobs) + print(f"Running at most {max_concurrent_tests} python test processes " + "concurrently as set by 'TEST_JOBS'.") + + print(f"Using at most {max_vpp_cpus} cpus for VPP threads.") + + if run_interactive and max_concurrent_tests > 1: raise NotImplementedError( - 'Running tests interactively (DEBUG, STEP or FORCE_FOREGROUND is ' - 'set) in parallel (TEST_JOBS is more than 1) is not ' + 'Running tests interactively (DEBUG is gdb[server] or ATTACH or ' + 'STEP is set) in parallel (TEST_JOBS is more than 1) is not ' 'supported') parser = argparse.ArgumentParser(description="VPP unit tests") @@ -641,7 +886,7 @@ if __name__ == '__main__': failfast = args.failfast descriptions = True - print("Running tests using custom test runner") # debug message + print("Running tests using custom test runner.") filter_file, filter_class, filter_func = parse_test_option() print("Active filters: file=%s, class=%s, function=%s" % ( @@ -649,44 +894,83 @@ if __name__ == '__main__': filter_cb = FilterByTestOption(filter_file, filter_class, filter_func) + ignore_path = os.getenv("VENV_PATH", None) cb = SplitToSuitesCallback(filter_cb) for d in args.dir: print("Adding tests from directory tree %s" % d) - discover_tests(d, cb) + discover_tests(d, cb, ignore_path) # suites are not hashable, need to use list suites = [] tests_amount = 0 for testcase_suite in cb.suites.values(): tests_amount += testcase_suite.countTestCases() + if testcase_suite.cpus_used > max_vpp_cpus: + # here we replace test functions with lambdas to just skip them + # but we also replace setUp/tearDown functions to do nothing + # so that the test can be "started" and "stopped", so that we can + # still keep those prints (test description - SKIP), which are done + # in stopTest() (for that to trigger, test function must run) + for t in testcase_suite: + for m in dir(t): + if m.startswith('test_'): + setattr(t, m, lambda: t.skipTest("not enough cpus")) + setattr(t.__class__, 'setUpClass', lambda: None) + setattr(t.__class__, 'tearDownClass', lambda: None) + setattr(t, 'setUp', lambda: None) + setattr(t, 'tearDown', lambda: None) + t.__class__.skipped_due_to_cpu_lack = True suites.append(testcase_suite) - if concurrent_tests == 1: - new_suite = unittest.TestSuite() - for suite in suites: - new_suite.addTest(suite) - - suites = [new_suite] - print("%s out of %s tests match specified filters" % ( tests_amount, tests_amount + cb.filtered.countTestCases())) - if not running_extended_tests(): + if not running_extended_tests: print("Not running extended tests (some tests will be skipped)") attempts = retries + 1 if attempts > 1: print("Perform %s attempts to pass the suite..." % attempts) - if run_interactive: + if run_interactive and suites: # don't fork if requiring interactive terminal - sys.exit(not VppTestRunner( - verbosity=verbose, failfast=failfast) - .run(suites[0]).wasSuccessful()) + print('Running tests in foreground in the current process') + full_suite = unittest.TestSuite() + free_cpus = list(available_cpus) + cpu_shortage = False + for suite in suites: + if suite.cpus_used <= max_vpp_cpus: + suite.assign_cpus(free_cpus[:suite.cpus_used]) + else: + suite.assign_cpus([]) + cpu_shortage = True + full_suite.addTests(suites) + result = VppTestRunner(verbosity=verbose, + failfast=failfast, + print_summary=True).run(full_suite) + was_successful = result.wasSuccessful() + if not was_successful: + for test_case_info in result.failed_test_cases_info: + handle_failed_suite(test_case_info.logger, + test_case_info.tempdir, + test_case_info.vpp_pid) + if test_case_info in result.core_crash_test_cases_info: + check_and_handle_core(test_case_info.vpp_bin_path, + test_case_info.tempdir, + test_case_info.core_crash_test) + + if cpu_shortage: + print() + print(colorize('SOME TESTS WERE SKIPPED BECAUSE THERE ARE NOT' + ' ENOUGH CPUS AVAILABLE', YELLOW)) + print() + sys.exit(not was_successful) else: + print('Running each VPPTestCase in a separate background process' + f' with at most {max_concurrent_tests} parallel python test ' + 'process(es)') exit_code = 0 - while len(suites) > 0 and attempts > 0: - tests_amount = sum([x.countTestCases() for x in suites]) + while suites and attempts > 0: results = run_forked(suites) exit_code, suites = parse_results(results) attempts -= 1