X-Git-Url: https://gerrit.fd.io/r/gitweb?a=blobdiff_plain;f=test%2Fframework.py;h=2afec7a4f0f86bd5e67bfa309e34ff8b66eb84ae;hb=6197cb730;hp=47de2c4d967109ea78c65d0e0b406c89add5f1ae;hpb=7d31ab2a5cc2124ddd973ac5dfac2219619f345a;p=vpp.git diff --git a/test/framework.py b/test/framework.py old mode 100644 new mode 100755 index 47de2c4d967..2afec7a4f0f --- a/test/framework.py +++ b/test/framework.py @@ -1,10 +1,13 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 from __future__ import print_function import gc +import logging import sys import os import select +import signal +import subprocess import unittest import tempfile import time @@ -18,16 +21,21 @@ from threading import Thread, Event from inspect import getdoc, isclass from traceback import format_exception from logging import FileHandler, DEBUG, Formatter +from enum import Enum +from abc import ABC, abstractmethod import scapy.compat from scapy.packet import Raw -from hook import StepHook, PollHook, VppDiedError +import hook as hookmodule from vpp_pg_interface import VppPGInterface from vpp_sub_interface import VppSubInterface from vpp_lo_interface import VppLoInterface from vpp_bvi_interface import VppBviInterface from vpp_papi_provider import VppPapiProvider +from vpp_papi import VppEnum +import vpp_papi from vpp_papi.vpp_stats import VPPStats +from vpp_papi.vpp_transport_socket import VppTransportSocketIOError from log import RED, GREEN, YELLOW, double_line_delim, single_line_delim, \ get_logger, colorize from vpp_object import VppObjectRegistry @@ -36,28 +44,43 @@ from scapy.layers.inet import IPerror, TCPerror, UDPerror, ICMPerror from scapy.layers.inet6 import ICMPv6DestUnreach, ICMPv6EchoRequest from scapy.layers.inet6 import ICMPv6EchoReply -if os.name == 'posix' and sys.version_info[0] < 3: - # using subprocess32 is recommended by python official documentation - # @ https://docs.python.org/2/library/subprocess.html - import subprocess32 as subprocess -else: - import subprocess - -# Python2/3 compatible -try: - input = raw_input -except NameError: - pass +from cpu_config import available_cpus, num_cpus, max_vpp_cpus + +logger = logging.getLogger(__name__) + +# Set up an empty logger for the testcase that can be overridden as necessary +null_logger = logging.getLogger('VppTestCase') +null_logger.addHandler(logging.NullHandler()) PASS = 0 FAIL = 1 ERROR = 2 SKIP = 3 TEST_RUN = 4 +SKIP_CPU_SHORTAGE = 5 + + +class BoolEnvironmentVariable(object): + + def __init__(self, env_var_name, default='n', true_values=None): + self.name = env_var_name + self.default = default + self.true_values = true_values if true_values is not None else \ + ("y", "yes", "1") + + def __bool__(self): + return os.getenv(self.name, self.default).lower() in self.true_values + + if sys.version_info[0] == 2: + __nonzero__ = __bool__ + + def __repr__(self): + return 'BoolEnvironmentVariable(%r, default=%r, true_values=%r)' % \ + (self.name, self.default, self.true_values) + -debug_framework = False -if os.getenv('TEST_DEBUG', "0") == "1": - debug_framework = True +debug_framework = BoolEnvironmentVariable('TEST_DEBUG') +if debug_framework: import debug_internal """ @@ -68,6 +91,39 @@ if os.getenv('TEST_DEBUG', "0") == "1": """ +class VppDiedError(Exception): + """ exception for reporting that the subprocess has died.""" + + signals_by_value = {v: k for k, v in signal.__dict__.items() if + k.startswith('SIG') and not k.startswith('SIG_')} + + def __init__(self, rv=None, testcase=None, method_name=None): + self.rv = rv + self.signal_name = None + self.testcase = testcase + self.method_name = method_name + + try: + self.signal_name = VppDiedError.signals_by_value[-rv] + except (KeyError, TypeError): + pass + + if testcase is None and method_name is None: + in_msg = '' + else: + in_msg = ' while running %s.%s' % (testcase, method_name) + + if self.rv: + msg = "VPP subprocess died unexpectedly%s with return code: %d%s."\ + % (in_msg, self.rv, ' [%s]' % + (self.signal_name if + self.signal_name is not None else '')) + else: + msg = "VPP subprocess died unexpectedly%s." % in_msg + + super(VppDiedError, self).__init__(msg) + + class _PacketInfo(object): """Private class to create packet info object. @@ -108,7 +164,8 @@ def pump_output(testclass): if testclass.vpp.stdout.fileno() in readable: read = os.read(testclass.vpp.stdout.fileno(), 102400) if len(read) > 0: - split = read.splitlines(True) + split = read.decode('ascii', + errors='backslashreplace').splitlines(True) if len(stdout_fragment) > 0: split[0] = "%s%s" % (stdout_fragment, split[0]) if len(split) > 0 and split[-1].endswith("\n"): @@ -119,30 +176,33 @@ def pump_output(testclass): testclass.vpp_stdout_deque.extend(split[:limit]) if not testclass.cache_vpp_output: for line in split[:limit]: - testclass.logger.debug( + testclass.logger.info( "VPP STDOUT: %s" % line.rstrip("\n")) if testclass.vpp.stderr.fileno() in readable: read = os.read(testclass.vpp.stderr.fileno(), 102400) if len(read) > 0: - split = read.splitlines(True) + split = read.decode('ascii', + errors='backslashreplace').splitlines(True) if len(stderr_fragment) > 0: split[0] = "%s%s" % (stderr_fragment, split[0]) - if len(split) > 0 and split[-1].endswith(b"\n"): + if len(split) > 0 and split[-1].endswith("\n"): limit = None else: limit = -1 stderr_fragment = split[-1] + testclass.vpp_stderr_deque.extend(split[:limit]) if not testclass.cache_vpp_output: for line in split[:limit]: - testclass.logger.debug( + testclass.logger.error( "VPP STDERR: %s" % line.rstrip("\n")) # ignoring the dummy pipe here intentionally - the # flag will take care of properly terminating the loop def _is_skip_aarch64_set(): - return os.getenv('SKIP_AARCH64', 'n').lower() in ('yes', 'y', '1') + return BoolEnvironmentVariable('SKIP_AARCH64') + is_skip_aarch64_set = _is_skip_aarch64_set() @@ -150,21 +210,37 @@ is_skip_aarch64_set = _is_skip_aarch64_set() def _is_platform_aarch64(): return platform.machine() == 'aarch64' + is_platform_aarch64 = _is_platform_aarch64() def _running_extended_tests(): - s = os.getenv("EXTENDED_TESTS", "n") - return True if s.lower() in ("y", "yes", "1") else False + return BoolEnvironmentVariable("EXTENDED_TESTS") + running_extended_tests = _running_extended_tests() -def _running_on_centos(): - os_id = os.getenv("OS_ID", "") - return True if "centos" in os_id.lower() else False +def _running_gcov_tests(): + return BoolEnvironmentVariable("GCOV_TESTS") + + +running_gcov_tests = _running_gcov_tests() -running_on_centos = _running_on_centos + +def get_environ_vpp_worker_count(): + worker_config = os.getenv("VPP_WORKER_CONFIG", None) + if worker_config: + elems = worker_config.split(" ") + if elems[0] != "workers" or len(elems) != 2: + raise ValueError("Wrong VPP_WORKER_CONFIG == '%s' value." % + worker_config) + return int(elems[1]) + else: + return 0 + + +environ_vpp_worker_count = get_environ_vpp_worker_count() class KeepAliveReporter(object): @@ -203,13 +279,63 @@ class KeepAliveReporter(object): self.pipe.send((desc, test.vpp_bin, test.tempdir, test.vpp.pid)) -class VppTestCase(unittest.TestCase): +class TestCaseTag(Enum): + # marks the suites that must run at the end + # using only a single test runner + RUN_SOLO = 1 + # marks the suites broken on VPP multi-worker + FIXME_VPP_WORKERS = 2 + + +def create_tag_decorator(e): + def decorator(cls): + try: + cls.test_tags.append(e) + except AttributeError: + cls.test_tags = [e] + return cls + return decorator + + +tag_run_solo = create_tag_decorator(TestCaseTag.RUN_SOLO) +tag_fixme_vpp_workers = create_tag_decorator(TestCaseTag.FIXME_VPP_WORKERS) + + +class DummyVpp: + returncode = None + pid = 0xcafebafe + + def poll(self): + pass + + def terminate(self): + pass + + +class CPUInterface(ABC): + cpus = [] + skipped_due_to_cpu_lack = False + + @classmethod + @abstractmethod + def get_cpus_required(cls): + pass + + @classmethod + def assign_cpus(cls, cpus): + cls.cpus = cpus + + +class VppTestCase(CPUInterface, unittest.TestCase): """This subclass is a base class for VPP test cases that are implemented as classes. It provides methods to create and run test case. """ + extra_vpp_statseg_config = "" extra_vpp_punt_config = [] extra_vpp_plugin_config = [] + logger = null_logger + vapi_response_timeout = 5 @property def packet_infos(self): @@ -224,6 +350,20 @@ class VppTestCase(unittest.TestCase): else: return 0 + @classmethod + def has_tag(cls, tag): + """ if the test case has a given tag - return true """ + try: + return tag in cls.test_tags + except AttributeError: + pass + return False + + @classmethod + def is_tagged_run_solo(cls): + """ if the test case class is timing-sensitive - return true """ + return cls.has_tag(TestCaseTag.RUN_SOLO) + @classmethod def instance(cls): """Return the instance of this testcase""" @@ -231,59 +371,48 @@ class VppTestCase(unittest.TestCase): @classmethod def set_debug_flags(cls, d): + cls.gdbserver_port = 7777 cls.debug_core = False cls.debug_gdb = False cls.debug_gdbserver = False + cls.debug_all = False + cls.debug_attach = False if d is None: return dl = d.lower() if dl == "core": cls.debug_core = True - elif dl == "gdb": + elif dl == "gdb" or dl == "gdb-all": cls.debug_gdb = True - elif dl == "gdbserver": + elif dl == "gdbserver" or dl == "gdbserver-all": cls.debug_gdbserver = True + elif dl == "attach": + cls.debug_attach = True else: raise Exception("Unrecognized DEBUG option: '%s'" % d) + if dl == "gdb-all" or dl == "gdbserver-all": + cls.debug_all = True - @staticmethod - def get_least_used_cpu(): - cpu_usage_list = [set(range(psutil.cpu_count()))] - vpp_processes = [p for p in psutil.process_iter(attrs=['pid', 'name']) - if 'vpp_main' == p.info['name']] - for vpp_process in vpp_processes: - for cpu_usage_set in cpu_usage_list: - try: - cpu_num = vpp_process.cpu_num() - if cpu_num in cpu_usage_set: - cpu_usage_set_index = cpu_usage_list.index( - cpu_usage_set) - if cpu_usage_set_index == len(cpu_usage_list) - 1: - cpu_usage_list.append({cpu_num}) - else: - cpu_usage_list[cpu_usage_set_index + 1].add( - cpu_num) - cpu_usage_set.remove(cpu_num) - break - except psutil.NoSuchProcess: - pass - - for cpu_usage_set in cpu_usage_list: - if len(cpu_usage_set) > 0: - min_usage_set = cpu_usage_set - break + @classmethod + def get_vpp_worker_count(cls): + if not hasattr(cls, "vpp_worker_count"): + if cls.has_tag(TestCaseTag.FIXME_VPP_WORKERS): + cls.vpp_worker_count = 0 + else: + cls.vpp_worker_count = environ_vpp_worker_count + return cls.vpp_worker_count - return random.choice(tuple(min_usage_set)) + @classmethod + def get_cpus_required(cls): + return 1 + cls.get_vpp_worker_count() @classmethod def setUpConstants(cls): """ Set-up the test case class based on environment variables """ - s = os.getenv("STEP", "n") - cls.step = True if s.lower() in ("y", "yes", "1") else False - d = os.getenv("DEBUG", None) + cls.step = BoolEnvironmentVariable('STEP') + # inverted case to handle '' == True c = os.getenv("CACHE_OUTPUT", "1") cls.cache_vpp_output = False if c.lower() in ("n", "no", "0") else True - cls.set_debug_flags(d) cls.vpp_bin = os.getenv('VPP_BIN', "vpp") cls.plugin_path = os.getenv('VPP_PLUGIN_PATH') cls.test_plugin_path = os.getenv('VPP_TEST_PLUGIN_PATH') @@ -307,21 +436,40 @@ class VppTestCase(unittest.TestCase): if coredump_size is None: coredump_size = "coredump-size unlimited" - cpu_core_number = cls.get_least_used_cpu() - - cls.vpp_cmdline = [cls.vpp_bin, "unix", - "{", "nodaemon", debug_cli, "full-coredump", - coredump_size, "runtime-dir", cls.tempdir, "}", - "api-trace", "{", "on", "}", "api-segment", "{", - "prefix", cls.shm_prefix, "}", "cpu", "{", - "main-core", str(cpu_core_number), "}", - "statseg", "{", "socket-name", cls.stats_sock, "}", - "socksvr", "{", "socket-name", cls.api_sock, "}", - "plugins", - "{", "plugin", "dpdk_plugin.so", "{", "disable", - "}", "plugin", "rdma_plugin.so", "{", "disable", - "}", "plugin", "unittest_plugin.so", "{", "enable", - "}"] + cls.extra_vpp_plugin_config + ["}", ] + default_variant = os.getenv("VARIANT") + if default_variant is not None: + default_variant = "defaults { %s 100 }" % default_variant + else: + default_variant = "" + + api_fuzzing = os.getenv("API_FUZZ") + if api_fuzzing is None: + api_fuzzing = 'off' + + cls.vpp_cmdline = [ + cls.vpp_bin, + "unix", "{", "nodaemon", debug_cli, "full-coredump", + coredump_size, "runtime-dir", cls.tempdir, "}", + "api-trace", "{", "on", "}", + "api-segment", "{", "prefix", cls.get_api_segment_prefix(), "}", + "cpu", "{", "main-core", str(cls.cpus[0]), ] + if cls.get_vpp_worker_count(): + cls.vpp_cmdline.extend([ + "corelist-workers", ",".join([str(x) for x in cls.cpus[1:]])]) + cls.vpp_cmdline.extend([ + "}", + "physmem", "{", "max-size", "32m", "}", + "statseg", "{", "socket-name", cls.get_stats_sock_path(), + cls.extra_vpp_statseg_config, "}", + "socksvr", "{", "socket-name", cls.get_api_sock_path(), "}", + "node { ", default_variant, "}", + "api-fuzz {", api_fuzzing, "}", + "plugins", "{", "plugin", "dpdk_plugin.so", "{", "disable", "}", + "plugin", "rdma_plugin.so", "{", "disable", "}", + "plugin", "lisp_unittest_plugin.so", "{", "enable", "}", + "plugin", "unittest_plugin.so", "{", "enable", "}" + ] + cls.extra_vpp_plugin_config + ["}", ]) + if cls.extra_vpp_punt_config is not None: cls.vpp_cmdline.extend(cls.extra_vpp_punt_config) if plugin_path is not None: @@ -329,8 +477,9 @@ class VppTestCase(unittest.TestCase): if cls.test_plugin_path is not None: cls.vpp_cmdline.extend(["test_plugin_path", cls.test_plugin_path]) - cls.logger.info("vpp_cmdline args: %s" % cls.vpp_cmdline) - cls.logger.info("vpp_cmdline: %s" % " ".join(cls.vpp_cmdline)) + if not cls.debug_attach: + cls.logger.info("vpp_cmdline args: %s" % cls.vpp_cmdline) + cls.logger.info("vpp_cmdline: %s" % " ".join(cls.vpp_cmdline)) @classmethod def wait_for_enter(cls): @@ -344,38 +493,47 @@ class VppTestCase(unittest.TestCase): cls.logger.debug("Spawned VPP with PID: %d" % cls.vpp.pid) return print(single_line_delim) - print("You can debug the VPP using e.g.:") + print("You can debug VPP using:") if cls.debug_gdbserver: - print("gdb " + cls.vpp_bin + " -ex 'target remote localhost:7777'") - print("Now is the time to attach a gdb by running the above " - "command, set up breakpoints etc. and then resume VPP from " + print("sudo gdb " + cls.vpp_bin + + " -ex 'target remote localhost:{port}'" + .format(port=cls.gdbserver_port)) + print("Now is the time to attach gdb by running the above " + "command, set up breakpoints etc., then resume VPP from " "within gdb by issuing the 'continue' command") + cls.gdbserver_port += 1 elif cls.debug_gdb: - print("gdb " + cls.vpp_bin + " -ex 'attach %s'" % cls.vpp.pid) - print("Now is the time to attach a gdb by running the above " - "command and set up breakpoints etc.") + print("sudo gdb " + cls.vpp_bin + " -ex 'attach %s'" % cls.vpp.pid) + print("Now is the time to attach gdb by running the above " + "command and set up breakpoints etc., then resume VPP from" + " within gdb by issuing the 'continue' command") print(single_line_delim) input("Press ENTER to continue running the testcase...") + @classmethod + def attach_vpp(cls): + cls.vpp = DummyVpp() + @classmethod def run_vpp(cls): + cls.logger.debug(f"Assigned cpus: {cls.cpus}") cmdline = cls.vpp_cmdline if cls.debug_gdbserver: gdbserver = '/usr/bin/gdbserver' - if not os.path.isfile(gdbserver) or \ + if not os.path.isfile(gdbserver) or\ not os.access(gdbserver, os.X_OK): raise Exception("gdbserver binary '%s' does not exist or is " "not executable" % gdbserver) - cmdline = [gdbserver, 'localhost:7777'] + cls.vpp_cmdline + cmdline = [gdbserver, 'localhost:{port}' + .format(port=cls.gdbserver_port)] + cls.vpp_cmdline cls.logger.info("Gdbserver cmdline is %s", " ".join(cmdline)) try: cls.vpp = subprocess.Popen(cmdline, stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - bufsize=1) + stderr=subprocess.PIPE) except subprocess.CalledProcessError as e: cls.logger.critical("Subprocess returned with non-0 return code: (" "%s)", e.returncode) @@ -391,19 +549,6 @@ class VppTestCase(unittest.TestCase): cls.wait_for_enter() - @classmethod - def wait_for_stats_socket(cls): - deadline = time.time() + 3 - ok = False - while time.time() < deadline or \ - cls.debug_gdb or cls.debug_gdbserver: - if os.path.exists(cls.stats_sock): - ok = True - break - cls.sleep(0.8) - if not ok: - cls.logger.critical("Couldn't stat : {}".format(cls.stats_sock)) - @classmethod def wait_for_coredump(cls): corefile = cls.tempdir + "/core" @@ -426,6 +571,26 @@ class VppTestCase(unittest.TestCase): cls.logger.error("Coredump complete: %s, size %d", corefile, curr_size) + @classmethod + def get_stats_sock_path(cls): + return "%s/stats.sock" % cls.tempdir + + @classmethod + def get_api_sock_path(cls): + return "%s/api.sock" % cls.tempdir + + @classmethod + def get_api_segment_prefix(cls): + return os.path.basename(cls.tempdir) # Only used for VAPI + + @classmethod + def get_tempdir(cls): + if cls.debug_attach: + return os.getenv("VPP_IN_GDB_TMP_DIR", + "/tmp/unittest-attach-gdb") + else: + return tempfile.mkdtemp(prefix='vpp-unittest-%s-' % cls.__name__) + @classmethod def setUpClass(cls): """ @@ -433,33 +598,30 @@ class VppTestCase(unittest.TestCase): Remove shared memory files, start vpp and connect the vpp-api """ super(VppTestCase, cls).setUpClass() - gc.collect() # run garbage collection first - random.seed() cls.logger = get_logger(cls.__name__) + seed = os.environ["RND_SEED"] + random.seed(seed) if hasattr(cls, 'parallel_handler'): cls.logger.addHandler(cls.parallel_handler) cls.logger.propagate = False - - cls.tempdir = tempfile.mkdtemp( - prefix='vpp-unittest-%s-' % cls.__name__) - cls.stats_sock = "%s/stats.sock" % cls.tempdir - cls.api_sock = "%s/api.sock" % cls.tempdir + d = os.getenv("DEBUG", None) + cls.set_debug_flags(d) + cls.tempdir = cls.get_tempdir() cls.file_handler = FileHandler("%s/log.txt" % cls.tempdir) cls.file_handler.setFormatter( Formatter(fmt='%(asctime)s,%(msecs)03d %(message)s', datefmt="%H:%M:%S")) cls.file_handler.setLevel(DEBUG) cls.logger.addHandler(cls.file_handler) - cls.logger.debug("--- setUpClass() for %s called ---" % - cls.__name__) - cls.shm_prefix = os.path.basename(cls.tempdir) + cls.logger.debug("--- setUpClass() for %s called ---" % cls.__name__) os.chdir(cls.tempdir) - cls.logger.info("Temporary dir is %s, shm prefix is %s", - cls.tempdir, cls.shm_prefix) + cls.logger.info("Temporary dir is %s, api socket is %s", + cls.tempdir, cls.get_api_sock_path()) + cls.logger.debug("Random seed is %s", seed) cls.setUpConstants() cls.reset_packet_infos() - cls._captures = [] - cls._zombie_captures = [] + cls._pcaps = [] + cls._old_pcaps = [] cls.verbose = 0 cls.vpp_dead = False cls.registry = VppObjectRegistry() @@ -468,30 +630,31 @@ class VppTestCase(unittest.TestCase): # need to catch exceptions here because if we raise, then the cleanup # doesn't get called and we might end with a zombie vpp try: - cls.run_vpp() + if cls.debug_attach: + cls.attach_vpp() + else: + cls.run_vpp() cls.reporter.send_keep_alive(cls, 'setUpClass') VppTestResult.current_test_case_info = TestCaseInfo( cls.logger, cls.tempdir, cls.vpp.pid, cls.vpp_bin) cls.vpp_stdout_deque = deque() cls.vpp_stderr_deque = deque() - cls.pump_thread_stop_flag = Event() - cls.pump_thread_wakeup_pipe = os.pipe() - cls.pump_thread = Thread(target=pump_output, args=(cls,)) - cls.pump_thread.daemon = True - cls.pump_thread.start() - if cls.debug_gdb or cls.debug_gdbserver: - read_timeout = 0 - else: - read_timeout = 5 - cls.vapi = VppPapiProvider(cls.shm_prefix, cls.shm_prefix, cls, - read_timeout) + if not cls.debug_attach: + cls.pump_thread_stop_flag = Event() + cls.pump_thread_wakeup_pipe = os.pipe() + cls.pump_thread = Thread(target=pump_output, args=(cls,)) + cls.pump_thread.daemon = True + cls.pump_thread.start() + if cls.debug_gdb or cls.debug_gdbserver or cls.debug_attach: + cls.vapi_response_timeout = 0 + cls.vapi = VppPapiProvider(cls.__name__, cls, + cls.vapi_response_timeout) if cls.step: - hook = StepHook(cls) + hook = hookmodule.StepHook(cls) else: - hook = PollHook(cls) + hook = hookmodule.PollHook(cls) cls.vapi.register_hook(hook) - cls.wait_for_stats_socket() - cls.statistics = VPPStats(socketname=cls.stats_sock) + cls.statistics = VPPStats(socketname=cls.get_stats_sock_path()) try: hook.poll_vpp() except VppDiedError: @@ -502,36 +665,50 @@ class VppTestCase(unittest.TestCase): raise try: cls.vapi.connect() - except Exception: - try: - cls.vapi.disconnect() - except Exception: - pass + except (vpp_papi.VPPIOError, Exception) as e: + cls.logger.debug("Exception connecting to vapi: %s" % e) + cls.vapi.disconnect() + if cls.debug_gdbserver: print(colorize("You're running VPP inside gdbserver but " "VPP-API connection failed, did you forget " "to 'continue' VPP from within gdb?", RED)) - raise - except Exception: + raise e + if cls.debug_attach: + last_line = cls.vapi.cli("show thread").split("\n")[-2] + cls.vpp_worker_count = int(last_line.split(" ")[0]) + print("Detected VPP with %s workers." % cls.vpp_worker_count) + except vpp_papi.VPPRuntimeError as e: + cls.logger.debug("%s" % e) + cls.quit() + raise e + except Exception as e: + cls.logger.debug("Exception connecting to VPP: %s" % e) + cls.quit() + raise e + + @classmethod + def _debug_quit(cls): + if (cls.debug_gdbserver or cls.debug_gdb): try: - cls.quit() - except Exception: + cls.vpp.poll() + + if cls.vpp.returncode is None: + print() + print(double_line_delim) + print("VPP or GDB server is still running") + print(single_line_delim) + input("When done debugging, press ENTER to kill the " + "process and finish running the testcase...") + except AttributeError: pass - raise @classmethod def quit(cls): """ Disconnect vpp-api, kill vpp and cleanup shared memory files """ - if (cls.debug_gdbserver or cls.debug_gdb) and hasattr(cls, 'vpp'): - cls.vpp.poll() - if cls.vpp.returncode is None: - print(double_line_delim) - print("VPP or GDB server is still running") - print(single_line_delim) - input("When done debugging, press ENTER to kill the " - "process and finish running the testcase...") + cls._debug_quit() # first signal that we want to stop the pump thread, then wake it up if hasattr(cls, 'pump_thread_stop_flag'): @@ -542,11 +719,12 @@ class VppTestCase(unittest.TestCase): cls.logger.debug("Waiting for pump thread to stop") cls.pump_thread.join() if hasattr(cls, 'vpp_stderr_reader_thread'): - cls.logger.debug("Waiting for stdderr pump to stop") + cls.logger.debug("Waiting for stderr pump to stop") cls.vpp_stderr_reader_thread.join() if hasattr(cls, 'vpp'): if hasattr(cls, 'vapi'): + cls.logger.debug(cls.vapi.vpp.get_stats()) cls.logger.debug("Disconnecting class vapi client on %s", cls.__name__) cls.vapi.disconnect() @@ -554,14 +732,21 @@ class VppTestCase(unittest.TestCase): cls.__name__) del cls.vapi cls.vpp.poll() - if cls.vpp.returncode is None: + if not cls.debug_attach and cls.vpp.returncode is None: cls.wait_for_coredump() cls.logger.debug("Sending TERM to vpp") cls.vpp.terminate() cls.logger.debug("Waiting for vpp to die") - cls.vpp.communicate() + try: + outs, errs = cls.vpp.communicate(timeout=5) + except subprocess.TimeoutExpired: + cls.vpp.kill() + outs, errs = cls.vpp.communicate() cls.logger.debug("Deleting class vpp attribute on %s", cls.__name__) + if not cls.debug_attach: + cls.vpp.stdout.close() + cls.vpp.stderr.close() del cls.vpp if cls.vpp_startup_failed: @@ -612,20 +797,22 @@ class VppTestCase(unittest.TestCase): self.logger.debug("--- tearDown() for %s.%s(%s) called ---" % (self.__class__.__name__, self._testMethodName, self._testMethodDoc)) - if not self.vpp_dead: - self.logger.info( - "--- Logging show commands common to all testcases. ---") - self.logger.debug(self.vapi.cli("show trace max 1000")) - self.logger.info(self.vapi.ppcli("show interface")) - self.logger.info(self.vapi.ppcli("show hardware")) - self.logger.info(self.statistics.set_errors_str()) - self.logger.info(self.vapi.ppcli("show run")) - self.logger.info(self.vapi.ppcli("show log")) - self.logger.info("Logging testcase specific show commands.") - self.show_commands_at_teardown() - self.registry.remove_vpp_config(self.logger) + + try: + if not self.vpp_dead: + self.logger.debug(self.vapi.cli("show trace max 1000")) + self.logger.info(self.vapi.ppcli("show interface")) + self.logger.info(self.vapi.ppcli("show hardware")) + self.logger.info(self.statistics.set_errors_str()) + self.logger.info(self.vapi.ppcli("show run")) + self.logger.info(self.vapi.ppcli("show log")) + self.logger.info(self.vapi.ppcli("show bihash")) + self.logger.info("Logging testcase specific show commands.") + self.show_commands_at_teardown() + self.registry.remove_vpp_config(self.logger) # Save/Dump VPP api trace log - api_trace = "vpp_api_trace.%s.log" % self._testMethodName + m = self._testMethodName + api_trace = "vpp_api_trace.%s.%d.log" % (m, self.vpp.pid) tmp_api_trace = "/tmp/%s" % api_trace vpp_api_trace_log = "%s/%s" % (self.tempdir, api_trace) self.logger.info(self.vapi.ppcli("api trace save %s" % api_trace)) @@ -634,6 +821,10 @@ class VppTestCase(unittest.TestCase): os.rename(tmp_api_trace, vpp_api_trace_log) self.logger.info(self.vapi.ppcli("api trace custom-dump %s" % vpp_api_trace_log)) + except VppTransportSocketIOError: + self.logger.debug("VppTransportSocketIOError: Vpp dead. " + "Cannot log show commands.") + self.vpp_dead = True else: self.registry.unregister_all(self.logger) @@ -642,7 +833,8 @@ class VppTestCase(unittest.TestCase): super(VppTestCase, self).setUp() self.reporter.send_keep_alive(self) if self.vpp_dead: - raise Exception("VPP is dead when setting up the test") + raise VppDiedError(rv=None, testcase=self.__class__.__name__, + method_name=self._testMethodName) self.sleep(.1, "during setUp") self.vpp_stdout_deque.append( "--- test setUp() for %s.%s(%s) starts here ---\n" % @@ -672,37 +864,60 @@ class VppTestCase(unittest.TestCase): i.enable_capture() @classmethod - def register_capture(cls, cap_name): - """ Register a capture in the testclass """ + def register_pcap(cls, intf, worker): + """ Register a pcap in the testclass """ # add to the list of captures with current timestamp - cls._captures.append((time.time(), cap_name)) - # filter out from zombies - cls._zombie_captures = [(stamp, name) - for (stamp, name) in cls._zombie_captures - if name != cap_name] + cls._pcaps.append((intf, worker)) + + @classmethod + def get_vpp_time(cls): + # processes e.g. "Time now 2.190522, Wed, 11 Mar 2020 17:29:54 GMT" + # returns float("2.190522") + timestr = cls.vapi.cli('show clock') + head, sep, tail = timestr.partition(',') + head, sep, tail = head.partition('Time now') + return float(tail) @classmethod - def pg_start(cls): - """ Remove any zombie captures and enable the packet generator """ - # how long before capture is allowed to be deleted - otherwise vpp - # crashes - 100ms seems enough (this shouldn't be needed at all) - capture_ttl = 0.1 - now = time.time() - for stamp, cap_name in cls._zombie_captures: - wait = stamp + capture_ttl - now - if wait > 0: - cls.sleep(wait, "before deleting capture %s" % cap_name) - now = time.time() - cls.logger.debug("Removing zombie capture %s" % cap_name) - cls.vapi.cli('packet-generator delete %s' % cap_name) - - cls.vapi.cli("trace add pg-input 1000") + def sleep_on_vpp_time(cls, sec): + """ Sleep according to time in VPP world """ + # On a busy system with many processes + # we might end up with VPP time being slower than real world + # So take that into account when waiting for VPP to do something + start_time = cls.get_vpp_time() + while cls.get_vpp_time() - start_time < sec: + cls.sleep(0.1) + + @classmethod + def pg_start(cls, trace=True): + """ Enable the PG, wait till it is done, then clean up """ + for (intf, worker) in cls._old_pcaps: + intf.rename_old_pcap_file(intf.get_in_path(worker), + intf.in_history_counter) + cls._old_pcaps = [] + if trace: + cls.vapi.cli("clear trace") + cls.vapi.cli("trace add pg-input 1000") cls.vapi.cli('packet-generator enable') - cls._zombie_captures = cls._captures - cls._captures = [] + # PG, when starts, runs to completion - + # so let's avoid a race condition, + # and wait a little till it's done. + # Then clean it up - and then be gone. + deadline = time.time() + 300 + while cls.vapi.cli('show packet-generator').find("Yes") != -1: + cls.sleep(0.01) # yield + if time.time() > deadline: + cls.logger.error("Timeout waiting for pg to stop") + break + for intf, worker in cls._pcaps: + cls.vapi.cli('packet-generator delete %s' % + intf.get_cap_name(worker)) + cls._old_pcaps = cls._pcaps + cls._pcaps = [] @classmethod - def create_pg_interfaces(cls, interfaces): + def create_pg_interfaces_internal(cls, interfaces, gso=0, gso_size=0, + mode=None): """ Create packet-generator interfaces. @@ -712,12 +927,36 @@ class VppTestCase(unittest.TestCase): """ result = [] for i in interfaces: - intf = VppPGInterface(cls, i) + intf = VppPGInterface(cls, i, gso, gso_size, mode) setattr(cls, intf.name, intf) result.append(intf) cls.pg_interfaces = result return result + @classmethod + def create_pg_ip4_interfaces(cls, interfaces, gso=0, gso_size=0): + pgmode = VppEnum.vl_api_pg_interface_mode_t + return cls.create_pg_interfaces_internal(interfaces, gso, gso_size, + pgmode.PG_API_MODE_IP4) + + @classmethod + def create_pg_ip6_interfaces(cls, interfaces, gso=0, gso_size=0): + pgmode = VppEnum.vl_api_pg_interface_mode_t + return cls.create_pg_interfaces_internal(interfaces, gso, gso_size, + pgmode.PG_API_MODE_IP6) + + @classmethod + def create_pg_interfaces(cls, interfaces, gso=0, gso_size=0): + pgmode = VppEnum.vl_api_pg_interface_mode_t + return cls.create_pg_interfaces_internal(interfaces, gso, gso_size, + pgmode.PG_API_MODE_ETHERNET) + + @classmethod + def create_pg_ethernet_interfaces(cls, interfaces, gso=0, gso_size=0): + pgmode = VppEnum.vl_api_pg_interface_mode_t + return cls.create_pg_interfaces_internal(interfaces, gso, gso_size, + pgmode.PG_API_MODE_ETHERNET) + @classmethod def create_loopback_interfaces(cls, count): """ @@ -913,8 +1152,6 @@ class VppTestCase(unittest.TestCase): def assert_packet_checksums_valid(self, packet, ignore_zero_udp_checksums=True): received = packet.__class__(scapy.compat.raw(packet)) - self.logger.debug( - ppp("Verifying packet checksums for packet:", received)) udp_layers = ['UDP', 'UDPerror'] checksum_fields = ['cksum', 'chksum'] checksums = [] @@ -923,13 +1160,15 @@ class VppTestCase(unittest.TestCase): while True: layer = temp.getlayer(counter) if layer: + layer = layer.copy() + layer.remove_payload() for cf in checksum_fields: if hasattr(layer, cf): if ignore_zero_udp_checksums and \ - 0 == getattr(layer, cf) and \ - layer.name in udp_layers: + 0 == getattr(layer, cf) and \ + layer.name in udp_layers: continue - delattr(layer, cf) + delattr(temp.getlayer(counter), cf) checksums.append((counter, cf)) else: break @@ -1000,19 +1239,28 @@ class VppTestCase(unittest.TestCase): if pkt.haslayer(ICMPv6EchoReply): self.assert_checksum_valid(pkt, 'ICMPv6EchoReply', 'cksum') - def assert_packet_counter_equal(self, counter, expected_value): + def get_packet_counter(self, counter): if counter.startswith("/"): counter_value = self.statistics.get_counter(counter) - self.assert_equal(counter_value, expected_value, - "packet counter `%s'" % counter) else: counters = self.vapi.cli("sh errors").split('\n') - counter_value = -1 + counter_value = 0 for i in range(1, len(counters) - 1): results = counters[i].split() if results[1] == counter: counter_value = int(results[0]) break + return counter_value + + def assert_packet_counter_equal(self, counter, expected_value): + counter_value = self.get_packet_counter(counter) + self.assert_equal(counter_value, expected_value, + "packet counter `%s'" % counter) + + def assert_error_counter_equal(self, counter, expected_value): + counter_value = self.statistics[counter].sum() + self.assert_equal(counter_value, expected_value, + "error counter `%s'" % counter) @classmethod def sleep(cls, timeout, remark=None): @@ -1029,25 +1277,23 @@ class VppTestCase(unittest.TestCase): time.sleep(0) return - if hasattr(cls, 'logger'): - cls.logger.debug("Starting sleep for %es (%s)", timeout, remark) + cls.logger.debug("Starting sleep for %es (%s)", timeout, remark) before = time.time() time.sleep(timeout) after = time.time() - if hasattr(cls, 'logger') and after - before > 2 * timeout: + if after - before > 2 * timeout: cls.logger.error("unexpected self.sleep() result - " "slept for %es instead of ~%es!", after - before, timeout) - if hasattr(cls, 'logger'): - cls.logger.debug( - "Finished sleep (%s) - slept %es (wanted %es)", - remark, after - before, timeout) - def pg_send(self, intf, pkts): - self.vapi.cli("clear trace") - intf.add_stream(pkts) + cls.logger.debug( + "Finished sleep (%s) - slept %es (wanted %es)", + remark, after - before, timeout) + + def pg_send(self, intf, pkts, worker=None, trace=True): + intf.add_stream(pkts, worker=worker) self.pg_enable_capture(self.pg_interfaces) - self.pg_start() + self.pg_start(trace=trace) def send_and_assert_no_replies(self, intf, pkts, remark="", timeout=None): self.pg_send(intf, pkts) @@ -1058,10 +1304,11 @@ class VppTestCase(unittest.TestCase): i.assert_nothing_captured(remark=remark) timeout = 0.1 - def send_and_expect(self, intf, pkts, output, n_rx=None): + def send_and_expect(self, intf, pkts, output, n_rx=None, worker=None, + trace=True): if not n_rx: n_rx = len(pkts) - self.pg_send(intf, pkts) + self.pg_send(intf, pkts, worker=worker, trace=trace) rx = output.get_capture(n_rx) return rx @@ -1079,11 +1326,6 @@ class VppTestCase(unittest.TestCase): return rx - def runTest(self): - """ unittest calls runTest when TestCase is instantiated without a - test case. Use case: Writing unittests against VppTestCase""" - pass - def get_testcase_doc_name(test): return getdoc(test.__class__).splitlines()[0] @@ -1140,6 +1382,7 @@ class VppTestResult(unittest.TestResult): self.verbosity = verbosity self.result_string = None self.runner = runner + self.printed = [] def addSuccess(self, test): """ @@ -1174,7 +1417,10 @@ class VppTestResult(unittest.TestResult): unittest.TestResult.addSkip(self, test, reason) self.result_string = colorize("SKIP", YELLOW) - self.send_result_through_pipe(test, SKIP) + if reason == "not enough cpus": + self.send_result_through_pipe(test, SKIP_CPU_SHORTAGE) + else: + self.send_result_through_pipe(test, SKIP) def symlink_failed(self): if self.current_test_case_info: @@ -1184,22 +1430,20 @@ class VppTestResult(unittest.TestResult): failed_dir, '%s-FAILED' % os.path.basename(self.current_test_case_info.tempdir)) - if self.current_test_case_info.logger: - self.current_test_case_info.logger.debug( - "creating a link to the failed test") - self.current_test_case_info.logger.debug( - "os.symlink(%s, %s)" % - (self.current_test_case_info.tempdir, link_path)) + + self.current_test_case_info.logger.debug( + "creating a link to the failed test") + self.current_test_case_info.logger.debug( + "os.symlink(%s, %s)" % + (self.current_test_case_info.tempdir, link_path)) if os.path.exists(link_path): - if self.current_test_case_info.logger: - self.current_test_case_info.logger.debug( - 'symlink already exists') + self.current_test_case_info.logger.debug( + 'symlink already exists') else: os.symlink(self.current_test_case_info.tempdir, link_path) except Exception as e: - if self.current_test_case_info.logger: - self.current_test_case_info.logger.error(e) + self.current_test_case_info.logger.error(e) def send_result_through_pipe(self, test, result): if hasattr(self, 'test_framework_result_pipe'): @@ -1294,14 +1538,45 @@ class VppTestResult(unittest.TestResult): """ def print_header(test): - if not hasattr(test.__class__, '_header_printed'): - print(double_line_delim) - print(colorize(getdoc(test).splitlines()[0], GREEN)) - print(double_line_delim) - test.__class__._header_printed = True + if test.__class__ in self.printed: + return + + test_doc = getdoc(test) + if not test_doc: + raise Exception("No doc string for test '%s'" % test.id()) + + test_title = test_doc.splitlines()[0].rstrip() + test_title = colorize(test_title, GREEN) + if test.is_tagged_run_solo(): + test_title = colorize(f"SOLO RUN: {test_title}", YELLOW) + + # This block may overwrite the colorized title above, + # but we want this to stand out and be fixed + if test.has_tag(TestCaseTag.FIXME_VPP_WORKERS): + test_title = colorize( + f"FIXME with VPP workers: {test_title}", RED) + + if hasattr(test, 'vpp_worker_count'): + if test.vpp_worker_count == 0: + test_title += " [main thread only]" + elif test.vpp_worker_count == 1: + test_title += " [1 worker thread]" + else: + test_title += f" [{test.vpp_worker_count} worker threads]" - print_header(test) + if test.__class__.skipped_due_to_cpu_lack: + test_title = colorize( + f"{test_title} [skipped - not enough cpus, " + f"required={test.__class__.get_cpus_required()}, " + f"available={max_vpp_cpus}]", YELLOW) + print(double_line_delim) + print(test_title) + print(double_line_delim) + self.printed.append(test.__class__) + + print_header(test) + self.start_test = time.time() unittest.TestResult.startTest(self, test) if self.verbosity > 0: self.stream.writeln( @@ -1316,14 +1591,17 @@ class VppTestResult(unittest.TestResult): """ unittest.TestResult.stopTest(self, test) + if self.verbosity > 0: self.stream.writeln(single_line_delim) self.stream.writeln("%-73s%s" % (self.getDescription(test), self.result_string)) self.stream.writeln(single_line_delim) else: - self.stream.writeln("%-73s%s" % (self.getDescription(test), - self.result_string)) + self.stream.writeln("%-68s %4.2f %s" % + (self.getDescription(test), + time.time() - self.start_test, + self.result_string)) self.send_result_through_pipe(test, TEST_RUN) @@ -1407,35 +1685,95 @@ class VppTestRunner(unittest.TextTestRunner): class Worker(Thread): - def __init__(self, args, logger, env={}): + def __init__(self, executable_args, logger, env=None, *args, **kwargs): + super(Worker, self).__init__(*args, **kwargs) self.logger = logger - self.args = args + self.args = executable_args + if hasattr(self, 'testcase') and self.testcase.debug_all: + if self.testcase.debug_gdbserver: + self.args = ['/usr/bin/gdbserver', 'localhost:{port}' + .format(port=self.testcase.gdbserver_port)] + args + elif self.testcase.debug_gdb and hasattr(self, 'wait_for_gdb'): + self.args.append(self.wait_for_gdb) + self.app_bin = executable_args[0] + self.app_name = os.path.basename(self.app_bin) + if hasattr(self, 'role'): + self.app_name += ' {role}'.format(role=self.role) + self.process = None self.result = None + env = {} if env is None else env self.env = copy.deepcopy(env) - super(Worker, self).__init__() + + def wait_for_enter(self): + if not hasattr(self, 'testcase'): + return + if self.testcase.debug_all and self.testcase.debug_gdbserver: + print() + print(double_line_delim) + print("Spawned GDB Server for '{app}' with PID: {pid}" + .format(app=self.app_name, pid=self.process.pid)) + elif self.testcase.debug_all and self.testcase.debug_gdb: + print() + print(double_line_delim) + print("Spawned '{app}' with PID: {pid}" + .format(app=self.app_name, pid=self.process.pid)) + else: + return + print(single_line_delim) + print("You can debug '{app}' using:".format(app=self.app_name)) + if self.testcase.debug_gdbserver: + print("sudo gdb " + self.app_bin + + " -ex 'target remote localhost:{port}'" + .format(port=self.testcase.gdbserver_port)) + print("Now is the time to attach gdb by running the above " + "command, set up breakpoints etc., then resume from " + "within gdb by issuing the 'continue' command") + self.testcase.gdbserver_port += 1 + elif self.testcase.debug_gdb: + print("sudo gdb " + self.app_bin + + " -ex 'attach {pid}'".format(pid=self.process.pid)) + print("Now is the time to attach gdb by running the above " + "command and set up breakpoints etc., then resume from" + " within gdb by issuing the 'continue' command") + print(single_line_delim) + input("Press ENTER to continue running the testcase...") def run(self): executable = self.args[0] - self.logger.debug("Running executable w/args `%s'" % self.args) + if not os.path.exists(executable) or not os.access( + executable, os.F_OK | os.X_OK): + # Exit code that means some system file did not exist, + # could not be opened, or had some other kind of error. + self.result = os.EX_OSFILE + raise EnvironmentError( + "executable '%s' is not found or executable." % executable) + self.logger.debug("Running executable '{app}': '{cmd}'" + .format(app=self.app_name, + cmd=' '.join(self.args))) env = os.environ.copy() env.update(self.env) env["CK_LOG_FILE_NAME"] = "-" self.process = subprocess.Popen( - self.args, shell=False, env=env, preexec_fn=os.setpgrp, - stdout=subprocess.PIPE, stderr=subprocess.PIPE) + ['stdbuf', '-o0', '-e0'] + self.args, shell=False, env=env, + preexec_fn=os.setpgrp, stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + self.wait_for_enter() out, err = self.process.communicate() - self.logger.debug("Finished running `%s'" % executable) + self.logger.debug("Finished running `{app}'".format(app=self.app_name)) self.logger.info("Return code is `%s'" % self.process.returncode) self.logger.info(single_line_delim) - self.logger.info("Executable `%s' wrote to stdout:" % executable) + self.logger.info("Executable `{app}' wrote to stdout:" + .format(app=self.app_name)) self.logger.info(single_line_delim) - self.logger.info(out) + self.logger.info(out.decode('utf-8')) self.logger.info(single_line_delim) - self.logger.info("Executable `%s' wrote to stderr:" % executable) + self.logger.info("Executable `{app}' wrote to stderr:" + .format(app=self.app_name)) self.logger.info(single_line_delim) - self.logger.info(err) + self.logger.info(err.decode('utf-8')) self.logger.info(single_line_delim) self.result = self.process.returncode + if __name__ == '__main__': pass