X-Git-Url: https://gerrit.fd.io/r/gitweb?p=csit.git;a=blobdiff_plain;f=resources%2Ftools%2Fpresentation%2Finput_data_parser.py;h=ad3d32762ee717927186b0d73051ba86f4a6b028;hp=e12e2fb8df18c645bbe462cf5f8d4e2910b2fb42;hb=e3bed8e21e3c8b60734ce95eb2bb2a2d371a9907;hpb=5646509aea6b43ef1efb282aad908289cc005b26 diff --git a/resources/tools/presentation/input_data_parser.py b/resources/tools/presentation/input_data_parser.py index e12e2fb8df..ad3d32762e 100644 --- a/resources/tools/presentation/input_data_parser.py +++ b/resources/tools/presentation/input_data_parser.py @@ -16,20 +16,24 @@ - extract data from output.xml files generated by Jenkins jobs and store in pandas' Series, - provide access to the data. +- filter the data using tags, """ +import multiprocessing +import os import re import pandas as pd import logging -import xml.etree.ElementTree as ET from robot.api import ExecutionResult, ResultVisitor from robot import errors from collections import OrderedDict from string import replace from os import remove +from jumpavg.AvgStdevMetadataFactory import AvgStdevMetadataFactory from input_data_files import download_and_unzip_data_file +from utils import Worker class ExecutionChecker(ResultVisitor): @@ -40,18 +44,21 @@ class ExecutionChecker(ResultVisitor): Performance tests: { - "metadata": { # Optional - "version": "VPP version", + "metadata": { + "generated": "Timestamp", + "version": "SUT version", "job": "Jenkins job name", "build": "Information about the build" }, "suites": { - "Suite name 1": { + "Suite long name 1": { + "name": Suite name, "doc": "Suite 1 documentation", "parent": "Suite 1 parent", "level": "Level of the suite in the suite hierarchy" } - "Suite name N": { + "Suite long name N": { + "name": Suite name, "doc": "Suite N documentation", "parent": "Suite 2 parent", "level": "Level of the suite in the suite hierarchy" @@ -64,12 +71,12 @@ class ExecutionChecker(ResultVisitor): "doc": "Test documentation" "msg": "Test message" "tags": ["tag 1", "tag 2", "tag n"], - "type": "PDR" | "NDR", - "throughput": { + "type": "PDR" | "NDR" | "TCP" | "MRR" | "BMRR", + "throughput": { # Only type: "PDR" | "NDR" "value": int, "unit": "pps" | "bps" | "percentage" }, - "latency": { + "latency": { # Only type: "PDR" | "NDR" "direction1": { "100": { "min": int, @@ -105,9 +112,15 @@ class ExecutionChecker(ResultVisitor): } } }, - "lossTolerance": "lossTolerance", # Only for PDR - "vat-history": "DUT1 and DUT2 VAT History" + "result": { # Only type: "TCP" + "value": int, + "unit": "cps" | "rps" + }, + "result": { # Only type: "MRR" | "BMRR" + "receive-rate": AvgStdevMetadata, }, + "lossTolerance": "lossTolerance", # Only type: "PDR" + "vat-history": "DUT1 and DUT2 VAT History" "show-run": "Show Run" }, "ID" { @@ -116,8 +129,8 @@ class ExecutionChecker(ResultVisitor): } } - Functional tests: + Functional tests: { "metadata": { # Optional @@ -160,7 +173,7 @@ class ExecutionChecker(ResultVisitor): REGEX_RATE = re.compile(r'^[\D\d]*FINAL_RATE:\s(\d+\.\d+)\s(\w+)') REGEX_LAT_NDR = re.compile(r'^[\D\d]*' - r'LAT_\d+%NDR:\s\[\'(-?\d+\/-?\d+/-?\d+)\',' + r'LAT_\d+%NDR:\s\[\'(-?\d+/-?\d+/-?\d+)\',' r'\s\'(-?\d+/-?\d+/-?\d+)\'\]\s\n' r'LAT_\d+%NDR:\s\[\'(-?\d+/-?\d+/-?\d+)\',' r'\s\'(-?\d+/-?\d+/-?\d+)\'\]\s\n' @@ -174,13 +187,25 @@ class ExecutionChecker(ResultVisitor): REGEX_TOLERANCE = re.compile(r'^[\D\d]*LOSS_ACCEPTANCE:\s(\d*\.\d*)\s' r'[\D\d]*') - REGEX_VERSION = re.compile(r"(return STDOUT Version:\s*)(.*)") + REGEX_VERSION_VPP = re.compile(r"(return STDOUT Version:\s*)(.*)") + + REGEX_VERSION_DPDK = re.compile(r"(return STDOUT testpmd)([\d\D\n]*)" + r"(RTE Version: 'DPDK )(.*)(')") REGEX_TCP = re.compile(r'Total\s(rps|cps|throughput):\s([0-9]*).*$') REGEX_MRR = re.compile(r'MaxReceivedRate_Results\s\[pkts/(\d*)sec\]:\s' r'tx\s(\d*),\srx\s(\d*)') + REGEX_BMRR = re.compile(r'Maximum Receive Rate trial results' + r' in packets per second: \[(.*)\]') + + REGEX_TC_TAG = re.compile(r'\d+[tT]\d+[cC]') + + REGEX_TC_NAME_OLD = re.compile(r'-\d+[tT]\d+[cC]-') + + REGEX_TC_NAME_NEW = re.compile(r'-\d+[cC]-') + def __init__(self, metadata): """Initialisation. @@ -195,6 +220,9 @@ class ExecutionChecker(ResultVisitor): # VPP version self._version = None + # Timestamp + self._timestamp = None + # Number of VAT History messages found: # 0 - no message # 1 - VAT History of DUT1 @@ -225,7 +253,9 @@ class ExecutionChecker(ResultVisitor): # Dictionary defining the methods used to parse different types of # messages self.parse_msg = { - "setup-version": self._get_version, + "timestamp": self._get_timestamp, + "vpp-version": self._get_vpp_version, + "dpdk-version": self._get_dpdk_version, "teardown-vat-history": self._get_vat_history, "test-show-runtime": self._get_show_run } @@ -239,7 +269,7 @@ class ExecutionChecker(ResultVisitor): """ return self._data - def _get_version(self, msg): + def _get_vpp_version(self, msg): """Called when extraction of VPP version is required. :param msg: Message to process. @@ -248,11 +278,41 @@ class ExecutionChecker(ResultVisitor): """ if msg.message.count("return STDOUT Version:"): - self._version = str(re.search(self.REGEX_VERSION, msg.message). + self._version = str(re.search(self.REGEX_VERSION_VPP, msg.message). group(2)) self._data["metadata"]["version"] = self._version self._msg_type = None + def _get_dpdk_version(self, msg): + """Called when extraction of DPDK version is required. + + :param msg: Message to process. + :type msg: Message + :returns: Nothing. + """ + + if msg.message.count("return STDOUT testpmd"): + try: + self._version = str(re.search( + self.REGEX_VERSION_DPDK, msg.message). group(4)) + self._data["metadata"]["version"] = self._version + except IndexError: + pass + finally: + self._msg_type = None + + def _get_timestamp(self, msg): + """Called when extraction of timestamp is required. + + :param msg: Message to process. + :type msg: Message + :returns: Nothing. + """ + + self._timestamp = msg.timestamp[:14] + self._data["metadata"]["generated"] = self._timestamp + self._msg_type = None + def _get_vat_history(self, msg): """Called when extraction of VAT command history is required. @@ -424,9 +484,12 @@ class ExecutionChecker(ResultVisitor): test_result["doc"] = replace(doc_str, ' |br| [', '[', maxreplace=1) test_result["msg"] = test.message.replace('\n', ' |br| '). \ replace('\r', '').replace('"', "'") + test_result["status"] = test.status + self._test_ID = test.longname.lower() if test.status == "PASS" and ("NDRPDRDISC" in tags or "TCP" in tags or - "MRR" in tags): + "MRR" in tags or + "BMRR" in tags): if "NDRDISC" in tags: test_type = "NDR" elif "PDRDISC" in tags: @@ -435,11 +498,40 @@ class ExecutionChecker(ResultVisitor): test_type = "TCP" elif "MRR" in tags: test_type = "MRR" + elif "FRMOBL" in tags or "BMRR" in tags: + test_type = "BMRR" else: return test_result["type"] = test_type + # Replace info about cores (e.g. -1c-) with the info about threads + # and cores (e.g. -1t1c-) in the long test case names and in the + # test case names if necessary. + groups = re.search(self.REGEX_TC_NAME_OLD, self._test_ID) + if not groups: + tag_count = 0 + for tag in test_result["tags"]: + groups = re.search(self.REGEX_TC_TAG, tag) + if groups: + tag_count += 1 + tag_tc = tag + + if tag_count == 1: + self._test_ID = re.sub(self.REGEX_TC_NAME_NEW, + "-{0}-".format(tag_tc.lower()), + self._test_ID, + count=1) + test_result["name"] = re.sub(self.REGEX_TC_NAME_NEW, + "-{0}-".format(tag_tc.lower()), + test_result["name"], + count=1) + else: + test_result["status"] = "FAIL" + logging.error("The test '{0}' has no or more than one " + "multi-threading tags.".format(self._test_ID)) + return + if test_type in ("NDR", "PDR"): try: rate_value = str(re.search( @@ -467,19 +559,22 @@ class ExecutionChecker(ResultVisitor): test_result["result"] = dict() test_result["result"]["value"] = int(groups.group(2)) test_result["result"]["unit"] = groups.group(1) - elif test_type in ("MRR", ): - groups = re.search(self.REGEX_MRR, test.message) + + elif test_type in ("MRR", "BMRR"): test_result["result"] = dict() - test_result["result"]["duration"] = int(groups.group(1)) - test_result["result"]["tx"] = int(groups.group(2)) - test_result["result"]["rx"] = int(groups.group(3)) - test_result["result"]["throughput"] = int( - test_result["result"]["rx"] / - test_result["result"]["duration"]) - else: - test_result["status"] = test.status + groups = re.search(self.REGEX_BMRR, test.message) + if groups is not None: + items_str = groups.group(1) + items_float = [float(item.strip()) for item + in items_str.split(",")] + test_result["result"]["receive-rate"] = \ + AvgStdevMetadataFactory.from_data(items_float) + else: + groups = re.search(self.REGEX_MRR, test.message) + test_result["result"]["receive-rate"] = \ + AvgStdevMetadataFactory.from_data([ + float(groups.group(3)) / float(groups.group(1)), ]) - self._test_ID = test.longname.lower() self._data["tests"][self._test_ID] = test_result def end_test(self, test): @@ -554,7 +649,11 @@ class ExecutionChecker(ResultVisitor): self._lookup_kw_nr += 1 self._show_run_lookup_nr = 0 self._msg_type = "test-show-runtime" - test_kw.messages.visit(self) + elif test_kw.name.count("Start The L2fwd Test") and not self._version: + self._msg_type = "dpdk-version" + else: + return + test_kw.messages.visit(self) def end_test_kw(self, test_kw): """Called when keyword ends. Default implementation does nothing. @@ -588,8 +687,14 @@ class ExecutionChecker(ResultVisitor): """ if setup_kw.name.count("Show Vpp Version On All Duts") \ and not self._version: - self._msg_type = "setup-version" - setup_kw.messages.visit(self) + self._msg_type = "vpp-version" + + elif setup_kw.name.count("Setup performance global Variables") \ + and not self._timestamp: + self._msg_type = "timestamp" + else: + return + setup_kw.messages.visit(self) def end_setup_kw(self, setup_kw): """Called when keyword ends. Default implementation does nothing. @@ -678,12 +783,11 @@ class InputData(object): - job name - build number - metadata - - job - - build - - vpp version + (as described in ExecutionChecker documentation) - suites + (as described in ExecutionChecker documentation) - tests - - ID: test data (as described in ExecutionChecker documentation) + (as described in ExecutionChecker documentation) """ def __init__(self, spec): @@ -697,7 +801,7 @@ class InputData(object): self._cfg = spec # Data store: - self._input_data = None + self._input_data = pd.Series() @property def data(self): @@ -748,17 +852,16 @@ class InputData(object): return self.data[job][build]["tests"] @staticmethod - def _parse_tests(job, build, get_timestamp=False): + def _parse_tests(job, build, log): """Process data from robot output.xml file and return JSON structured data. :param job: The name of job which build output data will be processed. :param build: The build which output data will be processed. - :param get_timestamp: If True, timestamp is read form the xml source - file. + :param log: List of log messages. :type job: str :type build: dict - :type get_timestamp: bool + :type log: list of tuples (severity, msg) :returns: JSON data structure. :rtype: dict """ @@ -767,68 +870,132 @@ class InputData(object): "job": job, "build": build } - if get_timestamp: - tree = ET.parse(build["file-name"]) - root = tree.getroot() - metadata["generated"] = root.attrib["generated"] with open(build["file-name"], 'r') as data_file: try: result = ExecutionResult(data_file) except errors.DataError as err: - logging.error("Error occurred while parsing output.xml: {0}". - format(err)) + log.append(("ERROR", "Error occurred while parsing output.xml: " + "{0}".format(err))) return None checker = ExecutionChecker(metadata) result.visit(checker) return checker.data - def download_and_parse_data(self, get_timestamp=False): + def _download_and_parse_build(self, pid, data_queue, job, build, repeat): + """Download and parse the input data file. + + :param pid: PID of the process executing this method. + :param data_queue: Shared memory between processes. Queue which keeps + the result data. This data is then read by the main process and used + in further processing. + :param job: Name of the Jenkins job which generated the processed input + file. + :param build: Information about the Jenkins build which generated the + processed input file. + :param repeat: Repeat the download specified number of times if not + successful. + :type pid: int + :type data_queue: multiprocessing.Manager().Queue() + :type job: str + :type build: dict + :type repeat: int + """ + + logs = list() + + logging.info(" Processing the job/build: {0}: {1}". + format(job, build["build"])) + + logs.append(("INFO", " Processing the job/build: {0}: {1}". + format(job, build["build"]))) + + state = "failed" + success = False + data = None + do_repeat = repeat + while do_repeat: + success = download_and_unzip_data_file(self._cfg, job, build, pid, + logs) + if success: + break + do_repeat -= 1 + if not success: + logs.append(("ERROR", "It is not possible to download the input " + "data file from the job '{job}', build " + "'{build}', or it is damaged. Skipped.". + format(job=job, build=build["build"]))) + if success: + logs.append(("INFO", " Processing data from the build '{0}' ...". + format(build["build"]))) + data = InputData._parse_tests(job, build, logs) + if data is None: + logs.append(("ERROR", "Input data file from the job '{job}', " + "build '{build}' is damaged. Skipped.". + format(job=job, build=build["build"]))) + else: + state = "processed" + + try: + remove(build["file-name"]) + except OSError as err: + logs.append(("ERROR", "Cannot remove the file '{0}': {1}". + format(build["file-name"], err))) + logs.append(("INFO", " Done.")) + + result = { + "data": data, + "state": state, + "job": job, + "build": build, + "logs": logs + } + data_queue.put(result) + + def download_and_parse_data(self, repeat=1): """Download the input data files, parse input data from input files and store in pandas' Series. - :param get_timestamp: If True, timestamp is read form the xml source - file. - :type get_timestamp: bool + :param repeat: Repeat the download specified number of times if not + successful. + :type repeat: int """ logging.info("Downloading and parsing input files ...") - job_data = dict() + work_queue = multiprocessing.JoinableQueue() + manager = multiprocessing.Manager() + data_queue = manager.Queue() + cpus = multiprocessing.cpu_count() + + workers = list() + for cpu in range(cpus): + worker = Worker(work_queue, + data_queue, + self._download_and_parse_build) + worker.daemon = True + worker.start() + workers.append(worker) + os.system("taskset -p -c {0} {1} > /dev/null 2>&1". + format(cpu, worker.pid)) + for job, builds in self._cfg.builds.items(): - logging.info(" Processing data from the job '{0}' ...'". - format(job)) - builds_data = dict() for build in builds: - logging.info(" Processing the build '{0}'". - format(build["build"])) - self._cfg.set_input_state(job, build["build"], "failed") - if not download_and_unzip_data_file(self._cfg, job, build): - logging.error("It is not possible to download the input " - "data file from the job '{job}', build " - "'{build}', or it is damaged. Skipped.". - format(job=job, build=build["build"])) - continue + work_queue.put((job, build, repeat)) - logging.info(" Processing data from the build '{0}' ...". - format(build["build"])) - data = InputData._parse_tests(job, build, - get_timestamp=get_timestamp) - if data is None: - logging.error("Input data file from the job '{job}', build " - "'{build}' is damaged. Skipped.". - format(job=job, build=build["build"])) - continue + work_queue.join() - self._cfg.set_input_state(job, build["build"], "processed") + logging.info("Done.") - try: - remove(build["file-name"]) - except OSError as err: - logging.error("Cannot remove the file '{0}': {1}". - format(build["file-name"], err)) + while not data_queue.empty(): + result = data_queue.get() + + job = result["job"] + build_nr = result["build"]["build"] + if result["data"]: + data = result["data"] build_data = pd.Series({ "metadata": pd.Series(data["metadata"].values(), index=data["metadata"].keys()), @@ -836,15 +1003,35 @@ class InputData(object): index=data["suites"].keys()), "tests": pd.Series(data["tests"].values(), index=data["tests"].keys())}) - builds_data[str(build["build"])] = build_data - build["status"] = "processed" - logging.info(" Done.") - job_data[job] = pd.Series(builds_data.values(), - index=builds_data.keys()) - logging.info(" Done.") + if self._input_data.get(job, None) is None: + self._input_data[job] = pd.Series() + self._input_data[job][str(build_nr)] = build_data + + self._cfg.set_input_file_name(job, build_nr, + result["build"]["file-name"]) + + self._cfg.set_input_state(job, build_nr, result["state"]) + + for item in result["logs"]: + if item[0] == "INFO": + logging.info(item[1]) + elif item[0] == "ERROR": + logging.error(item[1]) + elif item[0] == "DEBUG": + logging.debug(item[1]) + elif item[0] == "CRITICAL": + logging.critical(item[1]) + elif item[0] == "WARNING": + logging.warning(item[1]) + + del data_queue + + # Terminate all workers + for worker in workers: + worker.terminate() + worker.join() - self._input_data = pd.Series(job_data.values(), index=job_data.keys()) logging.info("Done.") @staticmethod @@ -893,13 +1080,13 @@ class InputData(object): - job 1 - build 1 - - test (suite) 1 ID: + - test (or suite) 1 ID: - param 1 - param 2 ... - param n ... - - test (suite) n ID: + - test (or suite) n ID: ... ... - build n @@ -921,9 +1108,6 @@ class InputData(object): :rtype pandas.Series """ - logging.info(" Creating the data set for the {0} '{1}'.". - format(element.get("type", ""), element.get("title", ""))) - try: if element["filter"] in ("all", "template"): cond = "True"