X-Git-Url: https://gerrit.fd.io/r/gitweb?a=blobdiff_plain;f=test%2Frun_tests.py;h=09b68a217a527ec6d0d87b20c1f4465e99dd0a20;hb=ead1e536d66d83b546528c32e2112085a97c8e13;hp=d72a74c2ee46cec1ead9ced69a35b7e46e0df814;hpb=abec0129b17797170d59521e0ad8b5f6d5853643;p=vpp.git diff --git a/test/run_tests.py b/test/run_tests.py index d72a74c2ee4..09b68a217a5 100644 --- a/test/run_tests.py +++ b/test/run_tests.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 import sys import shutil @@ -11,9 +11,11 @@ import threading import signal import psutil import re +import multiprocessing from multiprocessing import Process, Pipe, cpu_count from multiprocessing.queues import Queue from multiprocessing.managers import BaseManager +import framework from framework import VppTestRunner, running_extended_tests, VppTestCase, \ get_testcase_doc_name, get_test_description, PASS, FAIL, ERROR, SKIP, \ TEST_RUN @@ -68,8 +70,8 @@ class TestResult(dict): def was_successful(self): return 0 == len(self[FAIL]) == len(self[ERROR]) \ - and len(self[PASS] + self[SKIP]) \ - == self.testcase_suite.countTestCases() == len(self[TEST_RUN]) + and len(self[PASS] + self[SKIP]) \ + == self.testcase_suite.countTestCases() == len(self[TEST_RUN]) def no_tests_run(self): return 0 == len(self[TEST_RUN]) @@ -83,37 +85,41 @@ class TestResult(dict): tc_id = testcase.id() if tc_id not in self[PASS] and tc_id not in self[SKIP]: rerun_ids.add(tc_id) - if len(rerun_ids) > 0: + if rerun_ids: return suite_from_failed(self.testcase_suite, rerun_ids) def get_testcase_names(self, test_id): - if re.match(r'.+\..+\..+', test_id): + # could be tearDownClass (test_ipsec_esp.TestIpsecEsp1) + setup_teardown_match = re.match( + r'((tearDownClass)|(setUpClass)) \((.+\..+)\)', test_id) + if setup_teardown_match: + test_name, _, _, testcase_name = setup_teardown_match.groups() + if len(testcase_name.split('.')) == 2: + for key in self.testcases_by_id.keys(): + if key.startswith(testcase_name): + testcase_name = key + break + testcase_name = self._get_testcase_doc_name(testcase_name) + else: test_name = self._get_test_description(test_id) testcase_name = self._get_testcase_doc_name(test_id) - else: - # could be tearDownClass (test_ipsec_esp.TestIpsecEsp1) - setup_teardown_match = re.match( - r'((tearDownClass)|(setUpClass)) \((.+\..+)\)', test_id) - if setup_teardown_match: - test_name, _, _, testcase_name = setup_teardown_match.groups() - if len(testcase_name.split('.')) == 2: - for key in self.testcases_by_id.keys(): - if key.startswith(testcase_name): - testcase_name = key - break - testcase_name = self._get_testcase_doc_name(testcase_name) - else: - test_name = test_id - testcase_name = test_id return testcase_name, test_name def _get_test_description(self, test_id): - return get_test_description(descriptions, - self.testcases_by_id[test_id]) + if test_id in self.testcases_by_id: + desc = get_test_description(descriptions, + self.testcases_by_id[test_id]) + else: + desc = test_id + return desc def _get_testcase_doc_name(self, test_id): - return get_testcase_doc_name(self.testcases_by_id[test_id]) + if test_id in self.testcases_by_id: + doc_name = get_testcase_doc_name(self.testcases_by_id[test_id]) + else: + doc_name = test_id + return doc_name def test_runner_wrapper(suite, keep_alive_pipe, stdouterr_queue, @@ -139,7 +145,11 @@ class TestCaseWrapper(object): self.finished_parent_end, self.finished_child_end = Pipe(duplex=False) self.result_parent_end, self.result_child_end = Pipe(duplex=False) self.testcase_suite = testcase_suite - self.stdouterr_queue = manager.StreamQueue() + if sys.version[0] == '2': + self.stdouterr_queue = manager.StreamQueue() + else: + from multiprocessing import get_context + self.stdouterr_queue = manager.StreamQueue(ctx=get_context()) self.logger = get_parallel_logger(self.stdouterr_queue) self.child = Process(target=test_runner_wrapper, args=(testcase_suite, @@ -210,13 +220,12 @@ class TestCaseWrapper(object): def stdouterr_reader_wrapper(unread_testcases, finished_unread_testcases, read_testcases): read_testcase = None - while read_testcases.is_set() or len(unread_testcases) > 0: - if not read_testcase: - if len(finished_unread_testcases) > 0: - read_testcase = finished_unread_testcases.pop() - unread_testcases.remove(read_testcase) - elif len(unread_testcases) > 0: - read_testcase = unread_testcases.pop() + while read_testcases.is_set() or unread_testcases: + if finished_unread_testcases: + read_testcase = finished_unread_testcases.pop() + unread_testcases.remove(read_testcase) + elif unread_testcases: + read_testcase = unread_testcases.pop() if read_testcase: data = '' while data is not None: @@ -232,7 +241,7 @@ def handle_failed_suite(logger, last_test_temp_dir, vpp_pid): if last_test_temp_dir: # Need to create link in case of a timeout or core dump without failure lttd = os.path.basename(last_test_temp_dir) - failed_dir = os.getenv('VPP_TEST_FAILED_DIR') + failed_dir = os.getenv('FAILED_DIR') link_path = '%s%s-FAILED' % (failed_dir, lttd) if not os.path.exists(link_path): os.symlink(last_test_temp_dir, link_path) @@ -246,13 +255,23 @@ def handle_failed_suite(logger, last_test_temp_dir, vpp_pid): "Core-file exists in test temporary directory: %s!" % core_path) check_core_path(logger, core_path) - logger.debug("Running `file %s':" % core_path) + logger.debug("Running 'file %s':" % core_path) try: info = check_output(["file", core_path]) logger.debug(info) except CalledProcessError as e: - logger.error("Could not run `file' utility on core-file, " - "rc=%s" % e.returncode) + logger.error("Subprocess returned with return code " + "while running `file' utility on core-file " + "returned: " + "rc=%s", e.returncode) + except OSError as e: + logger.error("Subprocess returned with OS error while " + "running 'file' utility " + "on core-file: " + "(%s) %s", e.errno, e.strerror) + except Exception as e: + logger.exception("Unexpected error running `file' utility " + "on core-file") if vpp_pid: # Copy api post mortem @@ -265,20 +284,23 @@ def handle_failed_suite(logger, last_test_temp_dir, vpp_pid): def check_and_handle_core(vpp_binary, tempdir, core_crash_test): if is_core_present(tempdir): - print('VPP core detected in %s. Last test running was %s' % - (tempdir, core_crash_test)) - print(single_line_delim) - spawn_gdb(vpp_binary, get_core_path(tempdir)) - print(single_line_delim) + if debug_core: + print('VPP core detected in %s. Last test running was %s' % + (tempdir, core_crash_test)) + print(single_line_delim) + spawn_gdb(vpp_binary, get_core_path(tempdir)) + print(single_line_delim) + elif compress_core: + print("Compressing core-file in test directory `%s'" % tempdir) + os.system("gzip %s" % get_core_path(tempdir)) def handle_cores(failed_testcases): - if debug_core: - for failed_testcase in failed_testcases: - tcs_with_core = failed_testcase.testclasess_with_core - if len(tcs_with_core) > 0: - for test, vpp_binary, tempdir in tcs_with_core.values(): - check_and_handle_core(vpp_binary, tempdir, test) + for failed_testcase in failed_testcases: + tcs_with_core = failed_testcase.testclasess_with_core + if tcs_with_core: + for test, vpp_binary, tempdir in tcs_with_core.values(): + check_and_handle_core(vpp_binary, tempdir, test) def process_finished_testsuite(wrapped_testcase_suite, @@ -310,7 +332,7 @@ def run_forked(testcase_suites): manager = StreamQueueManager() manager.start() for i in range(concurrent_tests): - if len(testcase_suites) > 0: + if testcase_suites: wrapped_testcase_suite = TestCaseWrapper(testcase_suites.pop(0), manager) wrapped_testcase_suites.add(wrapped_testcase_suite) @@ -328,103 +350,123 @@ def run_forked(testcase_suites): failed_wrapped_testcases = set() stop_run = False - while len(wrapped_testcase_suites) > 0: - finished_testcase_suites = set() + + try: + while wrapped_testcase_suites: + finished_testcase_suites = set() + for wrapped_testcase_suite in wrapped_testcase_suites: + while wrapped_testcase_suite.result_parent_end.poll(): + wrapped_testcase_suite.result.process_result( + *wrapped_testcase_suite.result_parent_end.recv()) + wrapped_testcase_suite.last_heard = time.time() + + while wrapped_testcase_suite.keep_alive_parent_end.poll(): + wrapped_testcase_suite.last_test, \ + wrapped_testcase_suite.last_test_vpp_binary, \ + wrapped_testcase_suite.last_test_temp_dir, \ + wrapped_testcase_suite.vpp_pid = \ + wrapped_testcase_suite.keep_alive_parent_end.recv() + wrapped_testcase_suite.last_heard = time.time() + + if wrapped_testcase_suite.finished_parent_end.poll(): + wrapped_testcase_suite.finished_parent_end.recv() + wrapped_testcase_suite.last_heard = time.time() + stop_run = process_finished_testsuite( + wrapped_testcase_suite, + finished_testcase_suites, + failed_wrapped_testcases, + results) or stop_run + continue + + fail = False + if wrapped_testcase_suite.last_heard + test_timeout < \ + time.time(): + fail = True + wrapped_testcase_suite.logger.critical( + "Child test runner process timed out " + "(last test running was `%s' in `%s')!" % + (wrapped_testcase_suite.last_test, + wrapped_testcase_suite.last_test_temp_dir)) + elif not wrapped_testcase_suite.child.is_alive(): + fail = True + wrapped_testcase_suite.logger.critical( + "Child test runner process unexpectedly died " + "(last test running was `%s' in `%s')!" % + (wrapped_testcase_suite.last_test, + wrapped_testcase_suite.last_test_temp_dir)) + elif wrapped_testcase_suite.last_test_temp_dir and \ + wrapped_testcase_suite.last_test_vpp_binary: + if is_core_present( + wrapped_testcase_suite.last_test_temp_dir): + wrapped_testcase_suite.add_testclass_with_core() + if wrapped_testcase_suite.core_detected_at is None: + wrapped_testcase_suite.core_detected_at = \ + time.time() + elif wrapped_testcase_suite.core_detected_at + \ + core_timeout < time.time(): + wrapped_testcase_suite.logger.critical( + "Child test runner process unresponsive and " + "core-file exists in test temporary directory " + "(last test running was `%s' in `%s')!" % + (wrapped_testcase_suite.last_test, + wrapped_testcase_suite.last_test_temp_dir)) + fail = True + + if fail: + wrapped_testcase_suite.child.terminate() + try: + # terminating the child process tends to leave orphan + # VPP process around + if wrapped_testcase_suite.vpp_pid: + os.kill(wrapped_testcase_suite.vpp_pid, + signal.SIGTERM) + except OSError: + # already dead + pass + wrapped_testcase_suite.result.crashed = True + wrapped_testcase_suite.result.process_result( + wrapped_testcase_suite.last_test_id, ERROR) + stop_run = process_finished_testsuite( + wrapped_testcase_suite, + finished_testcase_suites, + failed_wrapped_testcases, + results) or stop_run + + for finished_testcase in finished_testcase_suites: + # Somewhat surprisingly, the join below may + # timeout, even if client signaled that + # it finished - so we note it just in case. + join_start = time.time() + finished_testcase.child.join(test_finished_join_timeout) + join_end = time.time() + if join_end - join_start >= test_finished_join_timeout: + finished_testcase.logger.error( + "Timeout joining finished test: %s (pid %d)" % + (finished_testcase.last_test, + finished_testcase.child.pid)) + finished_testcase.close_pipes() + wrapped_testcase_suites.remove(finished_testcase) + finished_unread_testcases.add(finished_testcase) + finished_testcase.stdouterr_queue.put(None) + if stop_run: + while testcase_suites: + results.append(TestResult(testcase_suites.pop(0))) + elif testcase_suites: + new_testcase = TestCaseWrapper(testcase_suites.pop(0), + manager) + wrapped_testcase_suites.add(new_testcase) + unread_testcases.add(new_testcase) + time.sleep(0.1) + except Exception: for wrapped_testcase_suite in wrapped_testcase_suites: - while wrapped_testcase_suite.result_parent_end.poll(): - wrapped_testcase_suite.result.process_result( - *wrapped_testcase_suite.result_parent_end.recv()) - wrapped_testcase_suite.last_heard = time.time() - - while wrapped_testcase_suite.keep_alive_parent_end.poll(): - wrapped_testcase_suite.last_test, \ - wrapped_testcase_suite.last_test_vpp_binary, \ - wrapped_testcase_suite.last_test_temp_dir, \ - wrapped_testcase_suite.vpp_pid = \ - wrapped_testcase_suite.keep_alive_parent_end.recv() - wrapped_testcase_suite.last_heard = time.time() - - if wrapped_testcase_suite.finished_parent_end.poll(): - wrapped_testcase_suite.finished_parent_end.recv() - wrapped_testcase_suite.last_heard = time.time() - stop_run = process_finished_testsuite( - wrapped_testcase_suite, - finished_testcase_suites, - failed_wrapped_testcases, - results) or stop_run - continue - - fail = False - if wrapped_testcase_suite.last_heard + test_timeout < time.time(): - fail = True - wrapped_testcase_suite.logger.critical( - "Child test runner process timed out " - "(last test running was `%s' in `%s')!" % - (wrapped_testcase_suite.last_test, - wrapped_testcase_suite.last_test_temp_dir)) - elif not wrapped_testcase_suite.child.is_alive(): - fail = True - wrapped_testcase_suite.logger.critical( - "Child test runner process unexpectedly died " - "(last test running was `%s' in `%s')!" % - (wrapped_testcase_suite.last_test, - wrapped_testcase_suite.last_test_temp_dir)) - elif wrapped_testcase_suite.last_test_temp_dir and \ - wrapped_testcase_suite.last_test_vpp_binary: - if is_core_present(wrapped_testcase_suite.last_test_temp_dir): - wrapped_testcase_suite.add_testclass_with_core() - if wrapped_testcase_suite.core_detected_at is None: - wrapped_testcase_suite.core_detected_at = time.time() - elif wrapped_testcase_suite.core_detected_at + \ - core_timeout < time.time(): - wrapped_testcase_suite.logger.critical( - "Child test runner process unresponsive and core-" - "file exists in test temporary directory " - "(last test running was `%s' in `%s')!" % - (wrapped_testcase_suite.last_test, - wrapped_testcase_suite.last_test_temp_dir)) - fail = True - - if fail: - wrapped_testcase_suite.child.terminate() - try: - # terminating the child process tends to leave orphan - # VPP process around - if wrapped_testcase_suite.vpp_pid: - os.kill(wrapped_testcase_suite.vpp_pid, signal.SIGTERM) - except OSError: - # already dead - pass - wrapped_testcase_suite.result.crashed = True - wrapped_testcase_suite.result.process_result( - wrapped_testcase_suite.last_test_id, ERROR) - stop_run = process_finished_testsuite( - wrapped_testcase_suite, - finished_testcase_suites, - failed_wrapped_testcases, - results) or stop_run - - for finished_testcase in finished_testcase_suites: - finished_testcase.child.join() - finished_testcase.close_pipes() - wrapped_testcase_suites.remove(finished_testcase) - finished_unread_testcases.add(finished_testcase) - finished_testcase.stdouterr_queue.put(None) - if stop_run: - while len(testcase_suites) > 0: - results.append(TestResult(testcase_suites.pop(0))) - elif len(testcase_suites) > 0: - new_testcase = TestCaseWrapper(testcase_suites.pop(0), manager) - wrapped_testcase_suites.add(new_testcase) - unread_testcases.add(new_testcase) - - while len(unread_testcases) > 0: - # wait for reader thread to read everything in all loggers - pass - - read_from_testcases.clear() - stdouterr_thread.join(test_timeout) - manager.shutdown() + wrapped_testcase_suite.child.terminate() + wrapped_testcase_suite.stdouterr_queue.put(None) + raise + finally: + read_from_testcases.clear() + stdouterr_thread.join(test_timeout) + manager.shutdown() + handle_cores(failed_wrapped_testcases) return results @@ -604,7 +646,7 @@ class AllResults(dict): failed_testcase_ids = result[FAIL] errored_testcase_ids = result[ERROR] old_testcase_name = None - if len(failed_testcase_ids) or len(errored_testcase_ids): + if failed_testcase_ids or errored_testcase_ids: for failed_test_id in failed_testcase_ids: new_testcase_name, test_name = \ result.get_testcase_names(failed_test_id) @@ -612,8 +654,8 @@ class AllResults(dict): print(' Testcase name: {}'.format( colorize(new_testcase_name, RED))) old_testcase_name = new_testcase_name - print(' FAILURE: {}'.format( - colorize(test_name, RED))) + print(' FAILURE: {} [{}]'.format( + colorize(test_name, RED), failed_test_id)) for failed_test_id in errored_testcase_ids: new_testcase_name, test_name = \ result.get_testcase_names(failed_test_id) @@ -621,9 +663,9 @@ class AllResults(dict): print(' Testcase name: {}'.format( colorize(new_testcase_name, RED))) old_testcase_name = new_testcase_name - print(' ERROR: {}'.format( - colorize(test_name, RED))) - if len(self.testsuites_no_tests_run) > 0: + print(' ERROR: {} [{}]'.format( + colorize(test_name, RED), failed_test_id)) + if self.testsuites_no_tests_run: print('TESTCASES WHERE NO TESTS WERE SUCCESSFULLY EXECUTED:') tc_classes = set() for testsuite in self.testsuites_no_tests_run: @@ -690,19 +732,37 @@ def parse_digit_env(env_var, default): if __name__ == '__main__': + if "RND_SEED" not in os.environ: + os.environ["RND_SEED"] = str(time.time()) + print("Setting RND_SEED=%s" % os.environ["RND_SEED"]) + else: + print("Using provided RND_SEED=%s" % os.environ["RND_SEED"]) verbose = parse_digit_env("V", 0) test_timeout = parse_digit_env("TIMEOUT", 600) # default = 10 minutes + test_finished_join_timeout = 15 + retries = parse_digit_env("RETRIES", 0) debug = os.getenv("DEBUG", "n").lower() in ["gdb", "gdbserver"] debug_core = os.getenv("DEBUG", "").lower() == "core" + compress_core = framework.BoolEnvironmentVariable("CORE_COMPRESS") + + step = framework.BoolEnvironmentVariable("STEP") + force_foreground = framework.BoolEnvironmentVariable("FORCE_FOREGROUND") + + run_interactive = debug or step or force_foreground - step = os.getenv("STEP", "n").lower() in ("y", "yes", "1") + try: + num_cpus = len(os.sched_getaffinity(0)) + except AttributeError: + num_cpus = multiprocessing.cpu_count() + shm_free = psutil.disk_usage('/dev/shm').free - run_interactive = debug or step + print('OS reports %s available cpu(s). Free shm: %s' % ( + num_cpus, "{:,}MB".format(shm_free / (1024 * 1024)))) test_jobs = os.getenv("TEST_JOBS", "1").lower() # default = 1 process if test_jobs == 'auto': @@ -710,7 +770,6 @@ if __name__ == '__main__': concurrent_tests = 1 print('Interactive mode required, running on one core') else: - shm_free = psutil.disk_usage('/dev/shm').free shm_max_processes = 1 if shm_free < min_req_shm: raise Exception('Not enough free space in /dev/shm. Required ' @@ -718,14 +777,17 @@ if __name__ == '__main__': % (min_req_shm >> 20)) else: extra_shm = shm_free - min_req_shm - shm_max_processes += extra_shm / shm_per_process + shm_max_processes += extra_shm // shm_per_process concurrent_tests = min(cpu_count(), shm_max_processes) print('Found enough resources to run tests with %s cores' % concurrent_tests) elif test_jobs.isdigit(): concurrent_tests = int(test_jobs) + print("Running on %s core(s) as set by 'TEST_JOBS'." % + concurrent_tests) else: concurrent_tests = 1 + print('Running on one core.') if run_interactive and concurrent_tests > 1: raise NotImplementedError( @@ -750,10 +812,11 @@ if __name__ == '__main__': filter_cb = FilterByTestOption(filter_file, filter_class, filter_func) + ignore_path = os.getenv("VENV_PATH", None) cb = SplitToSuitesCallback(filter_cb) for d in args.dir: print("Adding tests from directory tree %s" % d) - discover_tests(d, cb) + discover_tests(d, cb, ignore_path) # suites are not hashable, need to use list suites = [] @@ -765,34 +828,38 @@ if __name__ == '__main__': print("%s out of %s tests match specified filters" % ( tests_amount, tests_amount + cb.filtered.countTestCases())) - if not running_extended_tests(): + if not running_extended_tests: print("Not running extended tests (some tests will be skipped)") attempts = retries + 1 if attempts > 1: print("Perform %s attempts to pass the suite..." % attempts) - if run_interactive: + if run_interactive and suites: # don't fork if requiring interactive terminal + print('Running tests in foreground in the current process') + full_suite = unittest.TestSuite() + full_suite.addTests(suites) result = VppTestRunner(verbosity=verbose, failfast=failfast, - print_summary=True).run(suites[0]) + print_summary=True).run(full_suite) was_successful = result.wasSuccessful() if not was_successful: for test_case_info in result.failed_test_cases_info: handle_failed_suite(test_case_info.logger, test_case_info.tempdir, test_case_info.vpp_pid) - if debug_core and \ - test_case_info in result.core_crash_test_cases_info: + if test_case_info in result.core_crash_test_cases_info: check_and_handle_core(test_case_info.vpp_bin_path, test_case_info.tempdir, test_case_info.core_crash_test) sys.exit(not was_successful) else: + print('Running each VPPTestCase in a separate background process' + ' with {} parallel process(es)'.format(concurrent_tests)) exit_code = 0 - while len(suites) > 0 and attempts > 0: + while suites and attempts > 0: results = run_forked(suites) exit_code, suites = parse_results(results) attempts -= 1