X-Git-Url: https://gerrit.fd.io/r/gitweb?p=csit.git;a=blobdiff_plain;f=resources%2Ftools%2Fpresentation%2Finput_data_parser.py;h=9428b2cdc9695fe66b8a171d43a242657b017ccb;hp=203a3bfd253e1ec1d7e0f0bfe02b729769d79707;hb=372eab0eac428149d547b2d6eb2ce43cd0d750f6;hpb=a4cdbf05722f6e880f465121a416828278195bd7 diff --git a/resources/tools/presentation/input_data_parser.py b/resources/tools/presentation/input_data_parser.py index 203a3bfd25..9428b2cdc9 100644 --- a/resources/tools/presentation/input_data_parser.py +++ b/resources/tools/presentation/input_data_parser.py @@ -1,4 +1,4 @@ -# Copyright (c) 2017 Cisco and/or its affiliates. +# Copyright (c) 2018 Cisco and/or its affiliates. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at: @@ -18,13 +18,19 @@ - provide access to the data. """ +import multiprocessing +import os import re import pandas as pd import logging from robot.api import ExecutionResult, ResultVisitor +from robot import errors from collections import OrderedDict from string import replace +from os import remove + +from input_data_files import download_and_unzip_data_file class ExecutionChecker(ResultVisitor): @@ -169,11 +175,14 @@ class ExecutionChecker(ResultVisitor): REGEX_TOLERANCE = re.compile(r'^[\D\d]*LOSS_ACCEPTANCE:\s(\d*\.\d*)\s' r'[\D\d]*') - REGEX_VERSION = re.compile(r"(stdout: 'vat# vat# Version:)(\s*)(.*)") + REGEX_VERSION = re.compile(r"(return STDOUT Version:\s*)(.*)") REGEX_TCP = re.compile(r'Total\s(rps|cps|throughput):\s([0-9]*).*$') - def __init__(self, **metadata): + REGEX_MRR = re.compile(r'MaxReceivedRate_Results\s\[pkts/(\d*)sec\]:\s' + r'tx\s(\d*),\srx\s(\d*)') + + def __init__(self, metadata): """Initialisation. :param metadata: Key-value pairs to be included in "metadata" part of @@ -219,7 +228,7 @@ class ExecutionChecker(ResultVisitor): self.parse_msg = { "setup-version": self._get_version, "teardown-vat-history": self._get_vat_history, - "teardown-show-runtime": self._get_show_run + "test-show-runtime": self._get_show_run } @property @@ -239,14 +248,13 @@ class ExecutionChecker(ResultVisitor): :returns: Nothing. """ - if msg.message.count("stdout: 'vat# vat# Version:"): + if msg.message.count("return STDOUT Version:"): self._version = str(re.search(self.REGEX_VERSION, msg.message). - group(3)) + group(2)) self._data["metadata"]["version"] = self._version + self._data["metadata"]["generated"] = msg.timestamp self._msg_type = None - logging.debug(" VPP version: {0}".format(self._version)) - def _get_vat_history(self, msg): """Called when extraction of VAT command history is required. @@ -372,11 +380,11 @@ class ExecutionChecker(ResultVisitor): self._data["suites"][suite.longname.lower().replace('"', "'"). replace(" ", "_")] = { - "name": suite.name.lower(), - "doc": doc_str, - "parent": parent_name, - "level": len(suite.longname.split(".")) - } + "name": suite.name.lower(), + "doc": doc_str, + "parent": parent_name, + "level": len(suite.longname.split(".")) + } suite.keywords.visit(self) @@ -415,17 +423,20 @@ class ExecutionChecker(ResultVisitor): test_result["tags"] = tags doc_str = test.doc.replace('"', "'").replace('\n', ' '). \ replace('\r', '').replace('[', ' |br| [') - test_result["doc"] = replace(doc_str, ' |br| [', '[', maxreplace=1) + test_result["doc"] = replace(doc_str, ' |br| [', '[', maxreplace=1) test_result["msg"] = test.message.replace('\n', ' |br| '). \ replace('\r', '').replace('"', "'") - if test.status == "PASS" and ("NDRPDRDISC" in tags or "TCP" in tags): - + if test.status == "PASS" and ("NDRPDRDISC" in tags or + "TCP" in tags or + "MRR" in tags): if "NDRDISC" in tags: test_type = "NDR" elif "PDRDISC" in tags: test_type = "PDR" - elif "TCP" in tags: # Change to wrk? + elif "TCP" in tags: test_type = "TCP" + elif "MRR" in tags: + test_type = "MRR" else: return @@ -458,6 +469,15 @@ class ExecutionChecker(ResultVisitor): test_result["result"] = dict() test_result["result"]["value"] = int(groups.group(2)) test_result["result"]["unit"] = groups.group(1) + elif test_type in ("MRR", ): + groups = re.search(self.REGEX_MRR, test.message) + test_result["result"] = dict() + test_result["result"]["duration"] = int(groups.group(1)) + test_result["result"]["tx"] = int(groups.group(2)) + test_result["result"]["rx"] = int(groups.group(3)) + test_result["result"]["throughput"] = int( + test_result["result"]["rx"] / + test_result["result"]["duration"]) else: test_result["status"] = test.status @@ -496,6 +516,9 @@ class ExecutionChecker(ResultVisitor): elif keyword.type == "teardown": self._lookup_kw_nr = 0 self.visit_teardown_kw(keyword) + else: + self._lookup_kw_nr = 0 + self.visit_test_kw(keyword) except AttributeError: pass @@ -508,6 +531,42 @@ class ExecutionChecker(ResultVisitor): """ pass + def visit_test_kw(self, test_kw): + """Implements traversing through the test keyword and its child + keywords. + + :param test_kw: Keyword to process. + :type test_kw: Keyword + :returns: Nothing. + """ + for keyword in test_kw.keywords: + if self.start_test_kw(keyword) is not False: + self.visit_test_kw(keyword) + self.end_test_kw(keyword) + + def start_test_kw(self, test_kw): + """Called when test keyword starts. Default implementation does + nothing. + + :param test_kw: Keyword to process. + :type test_kw: Keyword + :returns: Nothing. + """ + if test_kw.name.count("Show Runtime Counters On All Duts"): + self._lookup_kw_nr += 1 + self._show_run_lookup_nr = 0 + self._msg_type = "test-show-runtime" + test_kw.messages.visit(self) + + def end_test_kw(self, test_kw): + """Called when keyword ends. Default implementation does nothing. + + :param test_kw: Keyword to process. + :type test_kw: Keyword + :returns: Nothing. + """ + pass + def visit_setup_kw(self, setup_kw): """Implements traversing through the teardown keyword and its child keywords. @@ -529,7 +588,7 @@ class ExecutionChecker(ResultVisitor): :type setup_kw: Keyword :returns: Nothing. """ - if setup_kw.name.count("Vpp Show Version Verbose") \ + if setup_kw.name.count("Show Vpp Version On All Duts") \ and not self._version: self._msg_type = "setup-version" setup_kw.messages.visit(self) @@ -568,12 +627,6 @@ class ExecutionChecker(ResultVisitor): if teardown_kw.name.count("Show Vat History On All Duts"): self._vat_history_lookup_nr = 0 self._msg_type = "teardown-vat-history" - elif teardown_kw.name.count("Show Statistics On All Duts"): - self._lookup_kw_nr += 1 - self._show_run_lookup_nr = 0 - self._msg_type = "teardown-show-runtime" - - if self._msg_type: teardown_kw.messages.visit(self) def end_teardown_kw(self, teardown_kw): @@ -646,7 +699,7 @@ class InputData(object): self._cfg = spec # Data store: - self._input_data = None + self._input_data = pd.Series() @property def data(self): @@ -697,62 +750,188 @@ class InputData(object): return self.data[job][build]["tests"] @staticmethod - def _parse_tests(job, build): + def _parse_tests(job, build, log): """Process data from robot output.xml file and return JSON structured data. :param job: The name of job which build output data will be processed. :param build: The build which output data will be processed. + :param log: List of log messages. :type job: str :type build: dict + :type log: list of tuples (severity, msg) :returns: JSON data structure. :rtype: dict """ + metadata = { + "job": job, + "build": build + } + with open(build["file-name"], 'r') as data_file: - result = ExecutionResult(data_file) - checker = ExecutionChecker(job=job, build=build) + try: + result = ExecutionResult(data_file) + except errors.DataError as err: + log.append(("ERROR", "Error occurred while parsing output.xml: " + "{0}".format(err))) + return None + checker = ExecutionChecker(metadata) result.visit(checker) return checker.data - def read_data(self): - """Parse input data from input files and store in pandas' Series. + def _download_and_parse_build(self, pid, data_queue, job, build, repeat): + """Download and parse the input data file. + + :param pid: PID of the process executing this method. + :param data_queue: Shared memory between processes. Queue which keeps + the result data. This data is then read by the main process and used + in further processing. + :param job: Name of the Jenkins job which generated the processed input + file. + :param build: Information about the Jenkins build which generated the + processed input file. + :param repeat: Repeat the download specified number of times if not + successful. + :type pid: int + :type data_queue: multiprocessing.Manager().Queue() + :type job: str + :type build: dict + :type repeat: int """ - logging.info("Parsing input files ...") + logs = list() + + logging.info(" Processing the job/build: {0}: {1}". + format(job, build["build"])) + + logs.append(("INFO", " Processing the job/build: {0}: {1}". + format(job, build["build"]))) + + state = "failed" + success = False + data = None + do_repeat = repeat + while do_repeat: + success = download_and_unzip_data_file(self._cfg, job, build, pid, + logs) + if success: + break + do_repeat -= 1 + if not success: + logs.append(("ERROR", "It is not possible to download the input " + "data file from the job '{job}', build " + "'{build}', or it is damaged. Skipped.". + format(job=job, build=build["build"]))) + if success: + logs.append(("INFO", " Processing data from the build '{0}' ...". + format(build["build"]))) + data = InputData._parse_tests(job, build, logs) + if data is None: + logs.append(("ERROR", "Input data file from the job '{job}', " + "build '{build}' is damaged. Skipped.". + format(job=job, build=build["build"]))) + else: + state = "processed" + + try: + remove(build["file-name"]) + except OSError as err: + logs.append(("ERROR", "Cannot remove the file '{0}': {1}". + format(build["file-name"], err))) + logs.append(("INFO", " Done.")) + + result = { + "data": data, + "state": state, + "job": job, + "build": build, + "logs": logs + } + data_queue.put(result) + + def download_and_parse_data(self, repeat=1): + """Download the input data files, parse input data from input files and + store in pandas' Series. + + :param repeat: Repeat the download specified number of times if not + successful. + :type repeat: int + """ + + logging.info("Downloading and parsing input files ...") + + work_queue = multiprocessing.JoinableQueue() + + manager = multiprocessing.Manager() + + data_queue = manager.Queue() + + cpus = multiprocessing.cpu_count() + workers = list() + for cpu in range(cpus): + worker = Worker(work_queue, + data_queue, + self._download_and_parse_build) + worker.daemon = True + worker.start() + workers.append(worker) + os.system("taskset -p -c {0} {1} > /dev/null 2>&1". + format(cpu, worker.pid)) - job_data = dict() for job, builds in self._cfg.builds.items(): - logging.info(" Extracting data from the job '{0}' ...'". - format(job)) - builds_data = dict() for build in builds: - if build["status"] == "failed" \ - or build["status"] == "not found": - continue - logging.info(" Extracting data from the build '{0}'". - format(build["build"])) - logging.info(" Processing the file '{0}'". - format(build["file-name"])) - data = InputData._parse_tests(job, build) + work_queue.put((job, build, repeat)) + + work_queue.join() + + logging.info("Done.") + while not data_queue.empty(): + result = data_queue.get() + + job = result["job"] + build_nr = result["build"]["build"] + + if result["data"]: + data = result["data"] build_data = pd.Series({ "metadata": pd.Series(data["metadata"].values(), index=data["metadata"].keys()), "suites": pd.Series(data["suites"].values(), index=data["suites"].keys()), "tests": pd.Series(data["tests"].values(), - index=data["tests"].keys()), - }) - builds_data[str(build["build"])] = build_data - logging.info(" Done.") + index=data["tests"].keys())}) + + if self._input_data.get(job, None) is None: + self._input_data[job] = pd.Series() + self._input_data[job][str(build_nr)] = build_data + + self._cfg.set_input_file_name(job, build_nr, + result["build"]["file-name"]) - job_data[job] = pd.Series(builds_data.values(), - index=builds_data.keys()) - logging.info(" Done.") + self._cfg.set_input_state(job, build_nr, result["state"]) + + for item in result["logs"]: + if item[0] == "INFO": + logging.info(item[1]) + elif item[0] == "ERROR": + logging.error(item[1]) + elif item[0] == "DEBUG": + logging.debug(item[1]) + elif item[0] == "CRITICAL": + logging.critical(item[1]) + elif item[0] == "WARNING": + logging.warning(item[1]) + + del data_queue + + # Terminate all workers + for worker in workers: + worker.terminate() + worker.join() - self._input_data = pd.Series(job_data.values(), index=job_data.keys()) logging.info("Done.") @staticmethod @@ -793,7 +972,8 @@ class InputData(object): index += 1 tag_filter = tag_filter[:index] + " in tags" + tag_filter[index:] - def filter_data(self, element, params=None, data_set="tests"): + def filter_data(self, element, params=None, data_set="tests", + continue_on_error=False): """Filter required data from the given jobs and builds. The output data structure is: @@ -818,15 +998,18 @@ class InputData(object): all parameters are included. :param data_set: The set of data to be filtered: tests, suites, metadata. + :param continue_on_error: Continue if there is error while reading the + data. The Item will be empty then :type element: pandas.Series :type params: list :type data_set: str + :type continue_on_error: bool :returns: Filtered data. :rtype pandas.Series """ logging.info(" Creating the data set for the {0} '{1}'.". - format(element["type"], element.get("title", ""))) + format(element.get("type", ""), element.get("title", ""))) try: if element["filter"] in ("all", "template"): @@ -847,8 +1030,15 @@ class InputData(object): data[job] = pd.Series() for build in builds: data[job][str(build)] = pd.Series() - for test_ID, test_data in \ - self.data[job][str(build)][data_set].iteritems(): + try: + data_iter = self.data[job][str(build)][data_set].\ + iteritems() + except KeyError: + if continue_on_error: + continue + else: + return None + for test_ID, test_data in data_iter: if eval(cond, {"tags": test_data.get("tags", "")}): data[job][str(build)][test_ID] = pd.Series() if params is None: @@ -905,3 +1095,44 @@ class InputData(object): merged_data[ID] = item_data return merged_data + + +class Worker(multiprocessing.Process): + """Worker class used to download and process input files in separate + parallel processes. + """ + + def __init__(self, work_queue, data_queue, func): + """Initialization. + + :param work_queue: Queue with items to process. + :param data_queue: Shared memory between processes. Queue which keeps + the result data. This data is then read by the main process and used + in further processing. + :param func: Function which is executed by the worker. + :type work_queue: multiprocessing.JoinableQueue + :type data_queue: multiprocessing.Manager().Queue() + :type func: Callable object + """ + super(Worker, self).__init__() + self._work_queue = work_queue + self._data_queue = data_queue + self._func = func + + def run(self): + """Method representing the process's activity. + """ + + while True: + try: + self.process(self._work_queue.get()) + finally: + self._work_queue.task_done() + + def process(self, item_to_process): + """Method executed by the runner. + + :param item_to_process: Data to be processed by the function. + :type item_to_process: tuple + """ + self._func(self.pid, self._data_queue, *item_to_process)