X-Git-Url: https://gerrit.fd.io/r/gitweb?a=blobdiff_plain;f=test%2Frun_tests.py;h=5d091ad253fb3ceb9b65465dcbd360e551c6cd37;hb=c99ab12cbb9252806db55f7a79e5d84b41df3525;hp=6836466373e988a4ee65d6869bc799ce7c017f49;hpb=d760f79cb8c81eab1dbbd85e1453cad122ecd345;p=vpp.git

diff --git a/test/run_tests.py b/test/run_tests.py
index 6836466373e..5d091ad253f 100644
--- a/test/run_tests.py
+++ b/test/run_tests.py
@@ -1,37 +1,35 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 
 import sys
 import shutil
 import os
 import fnmatch
-import select
 import unittest
 import argparse
 import time
 import threading
 import signal
-import psutil
-from multiprocessing import Process, Pipe, cpu_count
+import re
+from multiprocessing import Process, Pipe, get_context
 from multiprocessing.queues import Queue
 from multiprocessing.managers import BaseManager
+import framework
 from framework import VppTestRunner, running_extended_tests, VppTestCase, \
     get_testcase_doc_name, get_test_description, PASS, FAIL, ERROR, SKIP, \
-    TEST_RUN
-from debug import spawn_gdb
+    TEST_RUN, SKIP_CPU_SHORTAGE
+from debug import spawn_gdb, start_vpp_in_gdb
 from log import get_parallel_logger, double_line_delim, RED, YELLOW, GREEN, \
-    colorize
+    colorize, single_line_delim
 from discover_tests import discover_tests
 from subprocess import check_output, CalledProcessError
-from util import check_core_path
+from util import check_core_path, get_core_path, is_core_present
+from cpu_config import num_cpus, max_vpp_cpus, available_cpus
 
 # timeout which controls how long the child has to finish after seeing
 # a core dump in test temporary directory. If this is exceeded, parent assumes
-# that child process is stuck (e.g. waiting for shm mutex, which will never
-# get unlocked) and kill the child
+# that child process is stuck (e.g. waiting for event from vpp) and kill
+# the child
 core_timeout = 3
-min_req_shm = 536870912  # min 512MB shm required
-# 128MB per extra process
-shm_per_process = 134217728
 
 
 class StreamQueue(Queue):
@@ -54,69 +52,84 @@ StreamQueueManager.register('StreamQueue', StreamQueue)
 
 
 class TestResult(dict):
-    def __init__(self, testcase_suite):
+    def __init__(self, testcase_suite, testcases_by_id=None):
         super(TestResult, self).__init__()
         self[PASS] = []
         self[FAIL] = []
         self[ERROR] = []
         self[SKIP] = []
+        self[SKIP_CPU_SHORTAGE] = []
         self[TEST_RUN] = []
+        self.crashed = False
         self.testcase_suite = testcase_suite
         self.testcases = [testcase for testcase in testcase_suite]
-        self.testcases_by_id = {}
+        self.testcases_by_id = testcases_by_id
 
     def was_successful(self):
-        return len(self[PASS] + self[SKIP]) \
-               == self.testcase_suite.countTestCases()
+        return 0 == len(self[FAIL]) == len(self[ERROR]) \
+            and len(self[PASS] + self[SKIP] + self[SKIP_CPU_SHORTAGE]) \
+            == self.testcase_suite.countTestCases()
 
     def no_tests_run(self):
         return 0 == len(self[TEST_RUN])
 
     def process_result(self, test_id, result):
         self[result].append(test_id)
-        for testcase in self.testcases:
-            if testcase.id() == test_id:
-                self.testcases_by_id[test_id] = testcase
-                self.testcases.remove(testcase)
-                break
 
     def suite_from_failed(self):
         rerun_ids = set([])
         for testcase in self.testcase_suite:
             tc_id = testcase.id()
-            if tc_id not in self[PASS] and tc_id not in self[SKIP]:
+            if tc_id not in self[PASS] + self[SKIP] + self[SKIP_CPU_SHORTAGE]:
                 rerun_ids.add(tc_id)
-        if len(rerun_ids) > 0:
+        if rerun_ids:
             return suite_from_failed(self.testcase_suite, rerun_ids)
 
     def get_testcase_names(self, test_id):
-        return self._get_testcase_class(test_id), \
-               self._get_test_description(test_id)
+        # could be tearDownClass (test_ipsec_esp.TestIpsecEsp1)
+        setup_teardown_match = re.match(
+            r'((tearDownClass)|(setUpClass)) \((.+\..+)\)', test_id)
+        if setup_teardown_match:
+            test_name, _, _, testcase_name = setup_teardown_match.groups()
+            if len(testcase_name.split('.')) == 2:
+                for key in self.testcases_by_id.keys():
+                    if key.startswith(testcase_name):
+                        testcase_name = key
+                        break
+            testcase_name = self._get_testcase_doc_name(testcase_name)
+        else:
+            test_name = self._get_test_description(test_id)
+            testcase_name = self._get_testcase_doc_name(test_id)
+
+        return testcase_name, test_name
 
     def _get_test_description(self, test_id):
         if test_id in self.testcases_by_id:
-            return get_test_description(descriptions,
+            desc = get_test_description(descriptions,
                                         self.testcases_by_id[test_id])
         else:
-            return test_id
+            desc = test_id
+        return desc
 
-    def _get_testcase_class(self, test_id):
+    def _get_testcase_doc_name(self, test_id):
         if test_id in self.testcases_by_id:
-            return get_testcase_doc_name(self.testcases_by_id[test_id])
+            doc_name = get_testcase_doc_name(self.testcases_by_id[test_id])
         else:
-            return test_id
+            doc_name = test_id
+        return doc_name
 
 
 def test_runner_wrapper(suite, keep_alive_pipe, stdouterr_queue,
                         finished_pipe, result_pipe, logger):
     sys.stdout = stdouterr_queue
     sys.stderr = stdouterr_queue
-    VppTestCase.logger = logger
+    VppTestCase.parallel_handler = logger.handlers[0]
     result = VppTestRunner(keep_alive_pipe=keep_alive_pipe,
                            descriptions=descriptions,
                            verbosity=verbose,
                            result_pipe=result_pipe,
-                           failfast=failfast).run(suite)
+                           failfast=failfast,
+                           print_summary=False).run(suite)
     finished_pipe.send(result.wasSuccessful())
     finished_pipe.close()
     keep_alive_pipe.close()
@@ -129,7 +142,7 @@ class TestCaseWrapper(object):
         self.finished_parent_end, self.finished_child_end = Pipe(duplex=False)
         self.result_parent_end, self.result_child_end = Pipe(duplex=False)
         self.testcase_suite = testcase_suite
-        self.stdouterr_queue = manager.StreamQueue()
+        self.stdouterr_queue = manager.StreamQueue(ctx=get_context())
         self.logger = get_parallel_logger(self.stdouterr_queue)
         self.child = Process(target=test_runner_wrapper,
                              args=(testcase_suite,
@@ -142,12 +155,48 @@ class TestCaseWrapper(object):
         self.child.start()
         self.last_test_temp_dir = None
         self.last_test_vpp_binary = None
-        self.last_test = None
-        self.result = None
+        self._last_test = None
+        self.last_test_id = None
         self.vpp_pid = None
         self.last_heard = time.time()
         self.core_detected_at = None
-        self.result = TestResult(testcase_suite)
+        self.testcases_by_id = {}
+        self.testclasess_with_core = {}
+        for testcase in self.testcase_suite:
+            self.testcases_by_id[testcase.id()] = testcase
+        self.result = TestResult(testcase_suite, self.testcases_by_id)
+
+    @property
+    def last_test(self):
+        return self._last_test
+
+    @last_test.setter
+    def last_test(self, test_id):
+        self.last_test_id = test_id
+        if test_id in self.testcases_by_id:
+            testcase = self.testcases_by_id[test_id]
+            self._last_test = testcase.shortDescription()
+            if not self._last_test:
+                self._last_test = str(testcase)
+        else:
+            self._last_test = test_id
+
+    def add_testclass_with_core(self):
+        if self.last_test_id in self.testcases_by_id:
+            test = self.testcases_by_id[self.last_test_id]
+            class_name = unittest.util.strclass(test.__class__)
+            test_name = "'{}' ({})".format(get_test_description(descriptions,
+                                                                test),
+                                           self.last_test_id)
+        else:
+            test_name = self.last_test_id
+            class_name = re.match(r'((tearDownClass)|(setUpClass)) '
+                                  r'\((.+\..+)\)', test_name).groups()[3]
+        if class_name not in self.testclasess_with_core:
+            self.testclasess_with_core[class_name] = (
+                test_name,
+                self.last_test_vpp_binary,
+                self.last_test_temp_dir)
 
     def close_pipes(self):
         self.keep_alive_child_end.close()
@@ -157,17 +206,26 @@ class TestCaseWrapper(object):
         self.finished_parent_end.close()
         self.result_parent_end.close()
 
+    def was_successful(self):
+        return self.result.was_successful()
+
+    @property
+    def cpus_used(self):
+        return self.testcase_suite.cpus_used
+
+    def get_assigned_cpus(self):
+        return self.testcase_suite.get_assigned_cpus()
+
 
 def stdouterr_reader_wrapper(unread_testcases, finished_unread_testcases,
                              read_testcases):
     read_testcase = None
-    while read_testcases.is_set() or len(unread_testcases) > 0:
-        if not read_testcase:
-            if len(finished_unread_testcases) > 0:
-                read_testcase = finished_unread_testcases.pop()
-                unread_testcases.remove(read_testcase)
-            elif len(unread_testcases) > 0:
-                read_testcase = unread_testcases.pop()
+    while read_testcases.is_set() or unread_testcases:
+        if finished_unread_testcases:
+            read_testcase = finished_unread_testcases.pop()
+            unread_testcases.remove(read_testcase)
+        elif unread_testcases:
+            read_testcase = unread_testcases.pop()
         if read_testcase:
             data = ''
             while data is not None:
@@ -179,26 +237,151 @@ def stdouterr_reader_wrapper(unread_testcases, finished_unread_testcases,
             read_testcase = None
 
 
+def handle_failed_suite(logger, last_test_temp_dir, vpp_pid):
+    if last_test_temp_dir:
+        # Need to create link in case of a timeout or core dump without failure
+        lttd = os.path.basename(last_test_temp_dir)
+        failed_dir = os.getenv('FAILED_DIR')
+        link_path = '%s%s-FAILED' % (failed_dir, lttd)
+        if not os.path.exists(link_path):
+            os.symlink(last_test_temp_dir, link_path)
+        logger.error("Symlink to failed testcase directory: %s -> %s"
+                     % (link_path, lttd))
+
+        # Report core existence
+        core_path = get_core_path(last_test_temp_dir)
+        if os.path.exists(core_path):
+            logger.error(
+                "Core-file exists in test temporary directory: %s!" %
+                core_path)
+            check_core_path(logger, core_path)
+            logger.debug("Running 'file %s':" % core_path)
+            try:
+                info = check_output(["file", core_path])
+                logger.debug(info)
+            except CalledProcessError as e:
+                logger.error("Subprocess returned with return code "
+                             "while running `file' utility on core-file "
+                             "returned: "
+                             "rc=%s", e.returncode)
+            except OSError as e:
+                logger.error("Subprocess returned with OS error while "
+                             "running 'file' utility "
+                             "on core-file: "
+                             "(%s) %s", e.errno, e.strerror)
+            except Exception as e:
+                logger.exception("Unexpected error running `file' utility "
+                                 "on core-file")
+            logger.error("gdb %s %s" %
+                         (os.getenv('VPP_BIN', 'vpp'), core_path))
+
+    if vpp_pid:
+        # Copy api post mortem
+        api_post_mortem_path = "/tmp/api_post_mortem.%d" % vpp_pid
+        if os.path.isfile(api_post_mortem_path):
+            logger.error("Copying api_post_mortem.%d to %s" %
+                         (vpp_pid, last_test_temp_dir))
+            shutil.copy2(api_post_mortem_path, last_test_temp_dir)
+
+
+def check_and_handle_core(vpp_binary, tempdir, core_crash_test):
+    if is_core_present(tempdir):
+        if debug_core:
+            print('VPP core detected in %s. Last test running was %s' %
+                  (tempdir, core_crash_test))
+            print(single_line_delim)
+            spawn_gdb(vpp_binary, get_core_path(tempdir))
+            print(single_line_delim)
+        elif compress_core:
+            print("Compressing core-file in test directory `%s'" % tempdir)
+            os.system("gzip %s" % get_core_path(tempdir))
+
+
+def handle_cores(failed_testcases):
+    for failed_testcase in failed_testcases:
+        tcs_with_core = failed_testcase.testclasess_with_core
+        if tcs_with_core:
+            for test, vpp_binary, tempdir in tcs_with_core.values():
+                check_and_handle_core(vpp_binary, tempdir, test)
+
+
+def process_finished_testsuite(wrapped_testcase_suite,
+                               finished_testcase_suites,
+                               failed_wrapped_testcases,
+                               results):
+    results.append(wrapped_testcase_suite.result)
+    finished_testcase_suites.add(wrapped_testcase_suite)
+    stop_run = False
+    if failfast and not wrapped_testcase_suite.was_successful():
+        stop_run = True
+
+    if not wrapped_testcase_suite.was_successful():
+        failed_wrapped_testcases.add(wrapped_testcase_suite)
+        handle_failed_suite(wrapped_testcase_suite.logger,
+                            wrapped_testcase_suite.last_test_temp_dir,
+                            wrapped_testcase_suite.vpp_pid)
+
+    return stop_run
+
+
 def run_forked(testcase_suites):
     wrapped_testcase_suites = set()
+    solo_testcase_suites = []
 
     # suites are unhashable, need to use list
     results = []
-    debug_core = os.getenv("DEBUG", "").lower() == "core"
     unread_testcases = set()
     finished_unread_testcases = set()
     manager = StreamQueueManager()
     manager.start()
-    for i in range(concurrent_tests):
-        if len(testcase_suites) > 0:
-            wrapped_testcase_suite = TestCaseWrapper(testcase_suites.pop(0),
-                                                     manager)
-            wrapped_testcase_suites.add(wrapped_testcase_suite)
-            unread_testcases.add(wrapped_testcase_suite)
-            # time.sleep(1)
+    tests_running = 0
+    free_cpus = list(available_cpus)
+
+    def on_suite_start(tc):
+        nonlocal tests_running
+        nonlocal free_cpus
+        tests_running = tests_running + 1
+
+    def on_suite_finish(tc):
+        nonlocal tests_running
+        nonlocal free_cpus
+        tests_running = tests_running - 1
+        assert tests_running >= 0
+        free_cpus.extend(tc.get_assigned_cpus())
+
+    def run_suite(suite):
+        nonlocal manager
+        nonlocal wrapped_testcase_suites
+        nonlocal unread_testcases
+        nonlocal free_cpus
+        suite.assign_cpus(free_cpus[:suite.cpus_used])
+        free_cpus = free_cpus[suite.cpus_used:]
+        wrapper = TestCaseWrapper(suite, manager)
+        wrapped_testcase_suites.add(wrapper)
+        unread_testcases.add(wrapper)
+        on_suite_start(suite)
+
+    def can_run_suite(suite):
+        return (tests_running < max_concurrent_tests and
+                (suite.cpus_used <= len(free_cpus) or
+                 suite.cpus_used > max_vpp_cpus))
+
+    while free_cpus and testcase_suites:
+        a_suite = testcase_suites[0]
+        if a_suite.is_tagged_run_solo:
+            a_suite = testcase_suites.pop(0)
+            solo_testcase_suites.append(a_suite)
+            continue
+        if can_run_suite(a_suite):
+            a_suite = testcase_suites.pop(0)
+            run_suite(a_suite)
         else:
             break
 
+    if tests_running == 0 and solo_testcase_suites:
+        a_suite = solo_testcase_suites.pop(0)
+        run_suite(a_suite)
+
     read_from_testcases = threading.Event()
     read_from_testcases.set()
     stdouterr_thread = threading.Thread(target=stdouterr_reader_wrapper,
@@ -207,155 +390,177 @@ def run_forked(testcase_suites):
                                               read_from_testcases))
     stdouterr_thread.start()
 
-    while len(wrapped_testcase_suites) > 0:
-        finished_testcase_suites = set()
-        for wrapped_testcase_suite in wrapped_testcase_suites:
-            while wrapped_testcase_suite.result_parent_end.poll():
-                wrapped_testcase_suite.result.process_result(
-                    *wrapped_testcase_suite.result_parent_end.recv())
-                wrapped_testcase_suite.last_heard = time.time()
-
-            if wrapped_testcase_suite.finished_parent_end.poll():
-                wrapped_testcase_suite.finished_parent_end.recv()
-                results.append(wrapped_testcase_suite.result)
-                finished_testcase_suites.add(wrapped_testcase_suite)
-                continue
-
-            while wrapped_testcase_suite.keep_alive_parent_end.poll():
-                wrapped_testcase_suite.last_test, \
-                    wrapped_testcase_suite.last_test_vpp_binary, \
-                    wrapped_testcase_suite.last_test_temp_dir, \
-                    wrapped_testcase_suite.vpp_pid = \
-                    wrapped_testcase_suite.keep_alive_parent_end.recv()
-                wrapped_testcase_suite.last_heard = time.time()
-
-            fail = False
-            if wrapped_testcase_suite.last_heard + test_timeout < time.time() \
-                    and not os.path.isfile(
-                                "%s/_core_handled" %
-                                wrapped_testcase_suite.last_test_temp_dir):
-                fail = True
-                wrapped_testcase_suite.logger.critical(
-                    "Timeout while waiting for child test "
-                    "runner process (last test running was "
-                    "`%s' in `%s')!" %
-                    (wrapped_testcase_suite.last_test,
-                     wrapped_testcase_suite.last_test_temp_dir))
-            elif not wrapped_testcase_suite.child.is_alive():
-                fail = True
-                wrapped_testcase_suite.logger.critical(
-                    "Child python process unexpectedly died "
-                    "(last test running was `%s' in `%s')!" %
-                    (wrapped_testcase_suite.last_test,
-                     wrapped_testcase_suite.last_test_temp_dir))
-            elif wrapped_testcase_suite.last_test_temp_dir and \
-                    wrapped_testcase_suite.last_test_vpp_binary:
-                core_path = "%s/core" % \
-                            wrapped_testcase_suite.last_test_temp_dir
-                if os.path.isfile(core_path):
-                    if wrapped_testcase_suite.core_detected_at is None:
-                        wrapped_testcase_suite.core_detected_at = time.time()
-                    elif wrapped_testcase_suite.core_detected_at + \
-                            core_timeout < time.time():
-                        if not os.path.isfile(
-                                        "%s/_core_handled" %
-                                        wrapped_testcase_suite.
-                                        last_test_temp_dir):
-                            wrapped_testcase_suite.logger.critical(
-                                "Child python process unresponsive and core-"
-                                "file exists in test temporary directory!")
-                            fail = True
+    failed_wrapped_testcases = set()
+    stop_run = False
+
+    try:
+        while wrapped_testcase_suites:
+            finished_testcase_suites = set()
+            for wrapped_testcase_suite in wrapped_testcase_suites:
+                while wrapped_testcase_suite.result_parent_end.poll():
+                    wrapped_testcase_suite.result.process_result(
+                        *wrapped_testcase_suite.result_parent_end.recv())
+                    wrapped_testcase_suite.last_heard = time.time()
+
+                while wrapped_testcase_suite.keep_alive_parent_end.poll():
+                    wrapped_testcase_suite.last_test, \
+                        wrapped_testcase_suite.last_test_vpp_binary, \
+                        wrapped_testcase_suite.last_test_temp_dir, \
+                        wrapped_testcase_suite.vpp_pid = \
+                        wrapped_testcase_suite.keep_alive_parent_end.recv()
+                    wrapped_testcase_suite.last_heard = time.time()
+
+                if wrapped_testcase_suite.finished_parent_end.poll():
+                    wrapped_testcase_suite.finished_parent_end.recv()
+                    wrapped_testcase_suite.last_heard = time.time()
+                    stop_run = process_finished_testsuite(
+                        wrapped_testcase_suite,
+                        finished_testcase_suites,
+                        failed_wrapped_testcases,
+                        results) or stop_run
+                    continue
 
-            if fail:
-                failed_dir = os.getenv('VPP_TEST_FAILED_DIR')
-                if wrapped_testcase_suite.last_test_temp_dir:
-                    lttd = os.path.basename(
-                        wrapped_testcase_suite.last_test_temp_dir)
-                else:
-                    lttd = None
-                link_path = '%s%s-FAILED' % (failed_dir, lttd)
-                wrapped_testcase_suite.logger.error(
-                    "Creating a link to the failed test: %s -> %s" %
-                    (link_path, lttd))
-                if not os.path.exists(link_path) \
-                        and wrapped_testcase_suite.last_test_temp_dir:
-                    os.symlink(wrapped_testcase_suite.last_test_temp_dir,
-                               link_path)
-                api_post_mortem_path = "/tmp/api_post_mortem.%d" % \
-                                       wrapped_testcase_suite.vpp_pid
-                if os.path.isfile(api_post_mortem_path):
-                    wrapped_testcase_suite.logger.error(
-                        "Copying api_post_mortem.%d to %s" %
-                        (wrapped_testcase_suite.vpp_pid,
+                fail = False
+                if wrapped_testcase_suite.last_heard + test_timeout < \
+                        time.time():
+                    fail = True
+                    wrapped_testcase_suite.logger.critical(
+                        "Child test runner process timed out "
+                        "(last test running was `%s' in `%s')!" %
+                        (wrapped_testcase_suite.last_test,
+                         wrapped_testcase_suite.last_test_temp_dir))
+                elif not wrapped_testcase_suite.child.is_alive():
+                    fail = True
+                    wrapped_testcase_suite.logger.critical(
+                        "Child test runner process unexpectedly died "
+                        "(last test running was `%s' in `%s')!" %
+                        (wrapped_testcase_suite.last_test,
                          wrapped_testcase_suite.last_test_temp_dir))
-                    shutil.copy2(api_post_mortem_path,
-                                 wrapped_testcase_suite.last_test_temp_dir)
-                if wrapped_testcase_suite.last_test_temp_dir and \
+                elif wrapped_testcase_suite.last_test_temp_dir and \
                         wrapped_testcase_suite.last_test_vpp_binary:
-                    core_path = "%s/core" % \
-                                wrapped_testcase_suite.last_test_temp_dir
-                    if os.path.isfile(core_path):
-                        wrapped_testcase_suite.logger.error(
-                            "Core-file exists in test temporary directory: %s!"
-                            % core_path)
-                        check_core_path(wrapped_testcase_suite.logger,
-                                        core_path)
-                        wrapped_testcase_suite.logger.debug(
-                            "Running `file %s':" % core_path)
-                        try:
-                            info = check_output(["file", core_path])
-                            wrapped_testcase_suite.logger.debug(info)
-                        except CalledProcessError as e:
-                            wrapped_testcase_suite.logger.error(
-                                "Could not run `file' utility on core-file, "
-                                "rc=%s" % e.returncode)
-                            pass
-                        if debug_core:
-                            spawn_gdb(
-                                wrapped_testcase_suite.last_test_vpp_binary,
-                                core_path, wrapped_testcase_suite.logger)
-                wrapped_testcase_suite.child.terminate()
-                try:
-                    # terminating the child process tends to leave orphan
-                    # VPP process around
-                    os.kill(wrapped_testcase_suite.vpp_pid, signal.SIGTERM)
-                except OSError:
-                    # already dead
-                    pass
-                results.append(wrapped_testcase_suite.result)
-                finished_testcase_suites.add(wrapped_testcase_suite)
-
-        for finished_testcase in finished_testcase_suites:
-            finished_testcase.child.join()
-            finished_testcase.close_pipes()
-            wrapped_testcase_suites.remove(finished_testcase)
-            finished_unread_testcases.add(finished_testcase)
-            finished_testcase.stdouterr_queue.put(None)
-            if len(testcase_suites) > 0:
-                new_testcase = TestCaseWrapper(testcase_suites.pop(0), manager)
-                wrapped_testcase_suites.add(new_testcase)
-                unread_testcases.add(new_testcase)
-
-    read_from_testcases.clear()
-    stdouterr_thread.join(test_timeout)
-    manager.shutdown()
+                    if is_core_present(
+                            wrapped_testcase_suite.last_test_temp_dir):
+                        wrapped_testcase_suite.add_testclass_with_core()
+                        if wrapped_testcase_suite.core_detected_at is None:
+                            wrapped_testcase_suite.core_detected_at = \
+                                time.time()
+                        elif wrapped_testcase_suite.core_detected_at + \
+                                core_timeout < time.time():
+                            wrapped_testcase_suite.logger.critical(
+                                "Child test runner process unresponsive and "
+                                "core-file exists in test temporary directory "
+                                "(last test running was `%s' in `%s')!" %
+                                (wrapped_testcase_suite.last_test,
+                                 wrapped_testcase_suite.last_test_temp_dir))
+                            fail = True
+
+                if fail:
+                    wrapped_testcase_suite.child.terminate()
+                    try:
+                        # terminating the child process tends to leave orphan
+                        # VPP process around
+                        if wrapped_testcase_suite.vpp_pid:
+                            os.kill(wrapped_testcase_suite.vpp_pid,
+                                    signal.SIGTERM)
+                    except OSError:
+                        # already dead
+                        pass
+                    wrapped_testcase_suite.result.crashed = True
+                    wrapped_testcase_suite.result.process_result(
+                        wrapped_testcase_suite.last_test_id, ERROR)
+                    stop_run = process_finished_testsuite(
+                        wrapped_testcase_suite,
+                        finished_testcase_suites,
+                        failed_wrapped_testcases,
+                        results) or stop_run
+
+            for finished_testcase in finished_testcase_suites:
+                # Somewhat surprisingly, the join below may
+                # timeout, even if client signaled that
+                # it finished - so we note it just in case.
+                join_start = time.time()
+                finished_testcase.child.join(test_finished_join_timeout)
+                join_end = time.time()
+                if join_end - join_start >= test_finished_join_timeout:
+                    finished_testcase.logger.error(
+                        "Timeout joining finished test: %s (pid %d)" %
+                        (finished_testcase.last_test,
+                         finished_testcase.child.pid))
+                finished_testcase.close_pipes()
+                wrapped_testcase_suites.remove(finished_testcase)
+                finished_unread_testcases.add(finished_testcase)
+                finished_testcase.stdouterr_queue.put(None)
+                on_suite_finish(finished_testcase)
+                if stop_run:
+                    while testcase_suites:
+                        results.append(TestResult(testcase_suites.pop(0)))
+                elif testcase_suites:
+                    a_suite = testcase_suites.pop(0)
+                    while a_suite and a_suite.is_tagged_run_solo:
+                        solo_testcase_suites.append(a_suite)
+                        if testcase_suites:
+                            a_suite = testcase_suites.pop(0)
+                        else:
+                            a_suite = None
+                    if a_suite and can_run_suite(a_suite):
+                        run_suite(a_suite)
+                if solo_testcase_suites and tests_running == 0:
+                    a_suite = solo_testcase_suites.pop(0)
+                    run_suite(a_suite)
+            time.sleep(0.1)
+    except Exception:
+        for wrapped_testcase_suite in wrapped_testcase_suites:
+            wrapped_testcase_suite.child.terminate()
+            wrapped_testcase_suite.stdouterr_queue.put(None)
+        raise
+    finally:
+        read_from_testcases.clear()
+        stdouterr_thread.join(test_timeout)
+        manager.shutdown()
+
+    handle_cores(failed_wrapped_testcases)
     return results
 
 
+class TestSuiteWrapper(unittest.TestSuite):
+    cpus_used = 0
+
+    def __init__(self):
+        return super().__init__()
+
+    def addTest(self, test):
+        self.cpus_used = max(self.cpus_used, test.get_cpus_required())
+        super().addTest(test)
+
+    def assign_cpus(self, cpus):
+        self.cpus = cpus
+
+    def _handleClassSetUp(self, test, result):
+        if not test.__class__.skipped_due_to_cpu_lack:
+            test.assign_cpus(self.cpus)
+        super()._handleClassSetUp(test, result)
+
+    def get_assigned_cpus(self):
+        return self.cpus
+
+
 class SplitToSuitesCallback:
     def __init__(self, filter_callback):
         self.suites = {}
         self.suite_name = 'default'
         self.filter_callback = filter_callback
-        self.filtered = unittest.TestSuite()
+        self.filtered = TestSuiteWrapper()
 
     def __call__(self, file_name, cls, method):
         test_method = cls(method)
         if self.filter_callback(file_name, cls.__name__, method):
             self.suite_name = file_name + cls.__name__
             if self.suite_name not in self.suites:
-                self.suites[self.suite_name] = unittest.TestSuite()
+                self.suites[self.suite_name] = TestSuiteWrapper()
+                self.suites[self.suite_name].is_tagged_run_solo = False
             self.suites[self.suite_name].addTest(test_method)
+            if test_method.is_tagged_run_solo():
+                self.suites[self.suite_name].is_tagged_run_solo = True
 
         else:
             self.filtered.addTest(test_method)
@@ -396,7 +601,7 @@ def parse_test_option():
 
 
 def filter_tests(tests, filter_cb):
-    result = unittest.suite.TestSuite()
+    result = TestSuiteWrapper()
     for t in tests:
         if isinstance(t, unittest.suite.TestSuite):
             # this is a bunch of tests, recursively filter...
@@ -461,36 +666,33 @@ class AllResults(dict):
         self[FAIL] = 0
         self[ERROR] = 0
         self[SKIP] = 0
+        self[SKIP_CPU_SHORTAGE] = 0
         self[TEST_RUN] = 0
         self.rerun = []
         self.testsuites_no_tests_run = []
 
     def add_results(self, result):
         self.results_per_suite.append(result)
-        result_types = [PASS, FAIL, ERROR, SKIP, TEST_RUN]
+        result_types = [PASS, FAIL, ERROR, SKIP, TEST_RUN, SKIP_CPU_SHORTAGE]
         for result_type in result_types:
             self[result_type] += len(result[result_type])
 
     def add_result(self, result):
         retval = 0
         self.all_testcases += result.testcase_suite.countTestCases()
-        if not result.no_tests_run():
-            if not result.was_successful():
-                retval = 1
+        self.add_results(result)
 
-            self.add_results(result)
-        else:
+        if result.no_tests_run():
             self.testsuites_no_tests_run.append(result.testcase_suite)
-            retval = -1
+            if result.crashed:
+                retval = -1
+            else:
+                retval = 1
+        elif not result.was_successful():
+            retval = 1
 
         if retval != 0:
-            if concurrent_tests == 1:
-                if not result.no_tests_run():
-                    self.rerun.append(result.suite_from_failed())
-                else:
-                    self.rerun.append(result.testcase_suite)
-            else:
-                self.rerun.append(result.testcase_suite)
+            self.rerun.append(result.testcase_suite)
 
         return retval
 
@@ -498,30 +700,37 @@ class AllResults(dict):
         print('')
         print(double_line_delim)
         print('TEST RESULTS:')
-        print('     Scheduled tests: {}'.format(self.all_testcases))
-        print('      Executed tests: {}'.format(self[TEST_RUN]))
-        print('        Passed tests: {}'.format(
-            colorize(str(self[PASS]), GREEN)))
-        if self[SKIP] > 0:
-            print('       Skipped tests: {}'.format(
-                colorize(str(self[SKIP]), YELLOW)))
-        if self.not_executed > 0:
-            print('  Not Executed tests: {}'.format(
-                colorize(str(self.not_executed), RED)))
-        if self[FAIL] > 0:
-            print('            Failures: {}'.format(
-                colorize(str(self[FAIL]), RED)))
-        if self[ERROR] > 0:
-            print('              Errors: {}'.format(
-                colorize(str(self[ERROR]), RED)))
+
+        def indent_results(lines):
+            lines = list(filter(None, lines))
+            maximum = max(lines, key=lambda x: x.index(":"))
+            maximum = 4 + maximum.index(":")
+            for l in lines:
+                padding = " " * (maximum - l.index(":"))
+                print(f"{padding}{l}")
+
+        indent_results([
+            f'Scheduled tests: {self.all_testcases}',
+            f'Executed tests: {self[TEST_RUN]}',
+            f'Passed tests: {colorize(self[PASS], GREEN)}',
+            f'Skipped tests: {colorize(self[SKIP], YELLOW)}'
+            if self[SKIP] else None,
+            f'Not Executed tests: {colorize(self.not_executed, RED)}'
+            if self.not_executed else None,
+            f'Failures: {colorize(self[FAIL], RED)}' if self[FAIL] else None,
+            f'Errors: {colorize(self[ERROR], RED)}' if self[ERROR] else None,
+            'Tests skipped due to lack of CPUS: '
+            f'{colorize(self[SKIP_CPU_SHORTAGE], YELLOW)}'
+            if self[SKIP_CPU_SHORTAGE] else None
+        ])
 
         if self.all_failed > 0:
-            print('FAILED TESTS:')
+            print('FAILURES AND ERRORS IN TESTS:')
             for result in self.results_per_suite:
                 failed_testcase_ids = result[FAIL]
                 errored_testcase_ids = result[ERROR]
                 old_testcase_name = None
-                if len(failed_testcase_ids) or len(errored_testcase_ids):
+                if failed_testcase_ids:
                     for failed_test_id in failed_testcase_ids:
                         new_testcase_name, test_name = \
                             result.get_testcase_names(failed_test_id)
@@ -529,26 +738,31 @@ class AllResults(dict):
                             print('  Testcase name: {}'.format(
                                 colorize(new_testcase_name, RED)))
                             old_testcase_name = new_testcase_name
-                        print('     FAILED: {}'.format(
-                            colorize(test_name, RED)))
-                    for failed_test_id in errored_testcase_ids:
+                        print('    FAILURE: {} [{}]'.format(
+                            colorize(test_name, RED), failed_test_id))
+                if errored_testcase_ids:
+                    for errored_test_id in errored_testcase_ids:
                         new_testcase_name, test_name = \
-                            result.get_testcase_names(failed_test_id)
+                            result.get_testcase_names(errored_test_id)
                         if new_testcase_name != old_testcase_name:
                             print('  Testcase name: {}'.format(
                                 colorize(new_testcase_name, RED)))
                             old_testcase_name = new_testcase_name
-                        print('    ERRORED: {}'.format(
-                            colorize(test_name, RED)))
-        if len(self.testsuites_no_tests_run) > 0:
+                        print('      ERROR: {} [{}]'.format(
+                            colorize(test_name, RED), errored_test_id))
+        if self.testsuites_no_tests_run:
             print('TESTCASES WHERE NO TESTS WERE SUCCESSFULLY EXECUTED:')
-            tc_classes = set([])
+            tc_classes = set()
             for testsuite in self.testsuites_no_tests_run:
                 for testcase in testsuite:
                     tc_classes.add(get_testcase_doc_name(testcase))
             for tc_class in tc_classes:
                 print('  {}'.format(colorize(tc_class, RED)))
 
+        if self[SKIP_CPU_SHORTAGE]:
+            print()
+            print(colorize('     SOME TESTS WERE SKIPPED BECAUSE THERE ARE NOT'
+                           ' ENOUGH CPUS AVAILABLE', YELLOW))
         print(double_line_delim)
         print('')
 
@@ -611,44 +825,55 @@ if __name__ == '__main__':
 
     test_timeout = parse_digit_env("TIMEOUT", 600)  # default = 10 minutes
 
+    test_finished_join_timeout = 15
+
     retries = parse_digit_env("RETRIES", 0)
 
-    debug = os.getenv("DEBUG", "n").lower() in ["gdb", "gdbserver"]
+    debug = os.getenv("DEBUG", "n").lower() in ["gdb", "gdbserver", "attach"]
 
-    step = os.getenv("STEP", "n").lower() in ("y", "yes", "1")
+    debug_core = os.getenv("DEBUG", "").lower() == "core"
+    compress_core = framework.BoolEnvironmentVariable("CORE_COMPRESS")
+
+    if os.getenv("VPP_IN_GDB", "n").lower() in ["1", "y", "yes"]:
+        start_vpp_in_gdb()
+        exit()
 
-    force_foreground = \
-        os.getenv("FORCE_FOREGROUND", "n").lower() in ("y", "yes", "1")
+    step = framework.BoolEnvironmentVariable("STEP")
+    force_foreground = framework.BoolEnvironmentVariable("FORCE_FOREGROUND")
 
     run_interactive = debug or step or force_foreground
 
+    max_concurrent_tests = 0
+    print(f"OS reports {num_cpus} available cpu(s).")
+
     test_jobs = os.getenv("TEST_JOBS", "1").lower()  # default = 1 process
     if test_jobs == 'auto':
         if run_interactive:
-            concurrent_tests = 1
-            print('Interactive mode required, running on one core')
+            max_concurrent_tests = 1
+            print('Interactive mode required, running tests consecutively.')
         else:
-            shm_free = psutil.disk_usage('/dev/shm').free
-            shm_max_processes = 1
-            if shm_free < min_req_shm:
-                raise Exception('Not enough free space in /dev/shm. Required '
-                                'free space is at least %sM.'
-                                % (min_req_shm >> 20))
-            else:
-                extra_shm = shm_free - min_req_shm
-                shm_max_processes += extra_shm / shm_per_process
-            concurrent_tests = min(cpu_count(), shm_max_processes)
-            print('Found enough resources to run tests with %s cores'
-                  % concurrent_tests)
-    elif test_jobs.isdigit():
-        concurrent_tests = int(test_jobs)
+            max_concurrent_tests = num_cpus
+            print(f"Running at most {max_concurrent_tests} python test "
+                  "processes concurrently.")
     else:
-        concurrent_tests = 1
-
-    if run_interactive and concurrent_tests > 1:
+        try:
+            test_jobs = int(test_jobs)
+        except ValueError as e:
+            raise ValueError("Invalid TEST_JOBS value specified, valid "
+                             "values are a positive integer or 'auto'") from e
+        if test_jobs <= 0:
+            raise ValueError("Invalid TEST_JOBS value specified, valid "
+                             "values are a positive integer or 'auto'")
+        max_concurrent_tests = int(test_jobs)
+        print(f"Running at most {max_concurrent_tests} python test processes "
+              "concurrently as set by 'TEST_JOBS'.")
+
+    print(f"Using at most {max_vpp_cpus} cpus for VPP threads.")
+
+    if run_interactive and max_concurrent_tests > 1:
         raise NotImplementedError(
-            'Running tests interactively (DEBUG, STEP or FORCE_FOREGROUND is '
-            'set) in parallel (TEST_JOBS is more than 1) is not '
+            'Running tests interactively (DEBUG is gdb[server] or ATTACH or '
+            'STEP is set) in parallel (TEST_JOBS is more than 1) is not '
             'supported')
 
     parser = argparse.ArgumentParser(description="VPP unit tests")
@@ -661,7 +886,7 @@ if __name__ == '__main__':
     failfast = args.failfast
     descriptions = True
 
-    print("Running tests using custom test runner")  # debug message
+    print("Running tests using custom test runner.")
     filter_file, filter_class, filter_func = parse_test_option()
 
     print("Active filters: file=%s, class=%s, function=%s" % (
@@ -669,44 +894,83 @@ if __name__ == '__main__':
 
     filter_cb = FilterByTestOption(filter_file, filter_class, filter_func)
 
+    ignore_path = os.getenv("VENV_PATH", None)
     cb = SplitToSuitesCallback(filter_cb)
     for d in args.dir:
         print("Adding tests from directory tree %s" % d)
-        discover_tests(d, cb)
+        discover_tests(d, cb, ignore_path)
 
     # suites are not hashable, need to use list
     suites = []
     tests_amount = 0
     for testcase_suite in cb.suites.values():
         tests_amount += testcase_suite.countTestCases()
+        if testcase_suite.cpus_used > max_vpp_cpus:
+            # here we replace test functions with lambdas to just skip them
+            # but we also replace setUp/tearDown functions to do nothing
+            # so that the test can be "started" and "stopped", so that we can
+            # still keep those prints (test description - SKIP), which are done
+            # in stopTest() (for that to trigger, test function must run)
+            for t in testcase_suite:
+                for m in dir(t):
+                    if m.startswith('test_'):
+                        setattr(t, m, lambda: t.skipTest("not enough cpus"))
+                setattr(t.__class__, 'setUpClass', lambda: None)
+                setattr(t.__class__, 'tearDownClass', lambda: None)
+                setattr(t, 'setUp', lambda: None)
+                setattr(t, 'tearDown', lambda: None)
+                t.__class__.skipped_due_to_cpu_lack = True
         suites.append(testcase_suite)
 
-    if concurrent_tests == 1:
-        new_suite = unittest.TestSuite()
-        for suite in suites:
-            new_suite.addTests(suite)
-
-        suites = [new_suite]
-
     print("%s out of %s tests match specified filters" % (
         tests_amount, tests_amount + cb.filtered.countTestCases()))
 
-    if not running_extended_tests():
+    if not running_extended_tests:
         print("Not running extended tests (some tests will be skipped)")
 
     attempts = retries + 1
     if attempts > 1:
         print("Perform %s attempts to pass the suite..." % attempts)
 
-    if run_interactive:
+    if run_interactive and suites:
         # don't fork if requiring interactive terminal
-        sys.exit(not VppTestRunner(
-            verbosity=verbose, failfast=failfast)
-                 .run(suites[0]).wasSuccessful())
+        print('Running tests in foreground in the current process')
+        full_suite = unittest.TestSuite()
+        free_cpus = list(available_cpus)
+        cpu_shortage = False
+        for suite in suites:
+            if suite.cpus_used <= max_vpp_cpus:
+                suite.assign_cpus(free_cpus[:suite.cpus_used])
+            else:
+                suite.assign_cpus([])
+                cpu_shortage = True
+        full_suite.addTests(suites)
+        result = VppTestRunner(verbosity=verbose,
+                               failfast=failfast,
+                               print_summary=True).run(full_suite)
+        was_successful = result.wasSuccessful()
+        if not was_successful:
+            for test_case_info in result.failed_test_cases_info:
+                handle_failed_suite(test_case_info.logger,
+                                    test_case_info.tempdir,
+                                    test_case_info.vpp_pid)
+                if test_case_info in result.core_crash_test_cases_info:
+                    check_and_handle_core(test_case_info.vpp_bin_path,
+                                          test_case_info.tempdir,
+                                          test_case_info.core_crash_test)
+
+        if cpu_shortage:
+            print()
+            print(colorize('SOME TESTS WERE SKIPPED BECAUSE THERE ARE NOT'
+                           ' ENOUGH CPUS AVAILABLE', YELLOW))
+            print()
+        sys.exit(not was_successful)
     else:
+        print('Running each VPPTestCase in a separate background process'
+              f' with at most {max_concurrent_tests} parallel python test '
+              'process(es)')
         exit_code = 0
-        while len(suites) > 0 and attempts > 0:
-            tests_amount = sum([x.countTestCases() for x in suites])
+        while suites and attempts > 0:
             results = run_forked(suites)
             exit_code, suites = parse_results(results)
             attempts -= 1