CSIT-1116: Add dpdk mrr tests to trending

[csit.git] / resources / tools / presentation / input_data_parser.py
diff --git a/resources/tools/presentation/input_data_parser.py b/resources/tools/presentation/input_data_parser.py

index e12e2fb..0bb2b6c 100644 (file)
--- a/resources/tools/presentation/input_data_parser.py
+++ b/resources/tools/presentation/input_data_parser.py
@@ -18,10 +18,11 @@
  - provide access to the data.
  """
  
+import multiprocessing
+import os
  import re
  import pandas as pd
  import logging
-import xml.etree.ElementTree as ET
  
  from robot.api import ExecutionResult, ResultVisitor
  from robot import errors
@@ -30,6 +31,7 @@ from string import replace
  from os import remove
  
  from input_data_files import download_and_unzip_data_file
+from utils import Worker
  
  
  class ExecutionChecker(ResultVisitor):
@@ -174,7 +176,10 @@ class ExecutionChecker(ResultVisitor):
      REGEX_TOLERANCE = re.compile(r'^[\D\d]*LOSS_ACCEPTANCE:\s(\d*\.\d*)\s'
                                   r'[\D\d]*')
  
-    REGEX_VERSION = re.compile(r"(return STDOUT Version:\s*)(.*)")
+    REGEX_VERSION_VPP = re.compile(r"(return STDOUT Version:\s*)(.*)")
+
+    REGEX_VERSION_DPDK = re.compile(r"(return STDOUT testpmd)([\d\D\n]*)"
+                                    r"(RTE Version: 'DPDK )(.*)(')")
  
      REGEX_TCP = re.compile(r'Total\s(rps|cps|throughput):\s([0-9]*).*$')
  
@@ -195,6 +200,9 @@ class ExecutionChecker(ResultVisitor):
          # VPP version
          self._version = None
  
+        # Timestamp
+        self._timestamp = None
+
          # Number of VAT History messages found:
          # 0 - no message
          # 1 - VAT History of DUT1
@@ -225,7 +233,9 @@ class ExecutionChecker(ResultVisitor):
          # Dictionary defining the methods used to parse different types of
          # messages
          self.parse_msg = {
-            "setup-version": self._get_version,
+            "timestamp": self._get_timestamp,
+            "vpp-version": self._get_vpp_version,
+            "dpdk-version": self._get_dpdk_version,
              "teardown-vat-history": self._get_vat_history,
              "test-show-runtime": self._get_show_run
          }
@@ -239,7 +249,7 @@ class ExecutionChecker(ResultVisitor):
          """
          return self._data
  
-    def _get_version(self, msg):
+    def _get_vpp_version(self, msg):
          """Called when extraction of VPP version is required.
  
          :param msg: Message to process.
@@ -248,11 +258,41 @@ class ExecutionChecker(ResultVisitor):
          """
  
          if msg.message.count("return STDOUT Version:"):
-            self._version = str(re.search(self.REGEX_VERSION, msg.message).
+            self._version = str(re.search(self.REGEX_VERSION_VPP, msg.message).
                                  group(2))
              self._data["metadata"]["version"] = self._version
              self._msg_type = None
  
+    def _get_dpdk_version(self, msg):
+        """Called when extraction of DPDK version is required.
+
+        :param msg: Message to process.
+        :type msg: Message
+        :returns: Nothing.
+        """
+
+        if msg.message.count("return STDOUT testpmd"):
+            try:
+                self._version = str(re.search(
+                    self.REGEX_VERSION_DPDK, msg.message). group(4))
+                self._data["metadata"]["version"] = self._version
+            except IndexError:
+                pass
+            finally:
+                self._msg_type = None
+
+    def _get_timestamp(self, msg):
+        """Called when extraction of timestamp is required.
+
+        :param msg: Message to process.
+        :type msg: Message
+        :returns: Nothing.
+        """
+
+        self._timestamp = msg.timestamp[:14]
+        self._data["metadata"]["generated"] = self._timestamp
+        self._msg_type = None
+
      def _get_vat_history(self, msg):
          """Called when extraction of VAT command history is required.
  
@@ -554,7 +594,11 @@ class ExecutionChecker(ResultVisitor):
              self._lookup_kw_nr += 1
              self._show_run_lookup_nr = 0
              self._msg_type = "test-show-runtime"
-            test_kw.messages.visit(self)
+        elif test_kw.name.count("Start The L2fwd Test") and not self._version:
+            self._msg_type = "dpdk-version"
+        else:
+            return
+        test_kw.messages.visit(self)
  
      def end_test_kw(self, test_kw):
          """Called when keyword ends. Default implementation does nothing.
@@ -588,8 +632,14 @@ class ExecutionChecker(ResultVisitor):
          """
          if setup_kw.name.count("Show Vpp Version On All Duts") \
                  and not self._version:
-            self._msg_type = "setup-version"
-            setup_kw.messages.visit(self)
+            self._msg_type = "vpp-version"
+
+        elif setup_kw.name.count("Setup performance global Variables") \
+                and not self._timestamp:
+            self._msg_type = "timestamp"
+        else:
+            return
+        setup_kw.messages.visit(self)
  
      def end_setup_kw(self, setup_kw):
          """Called when keyword ends. Default implementation does nothing.
@@ -697,7 +747,7 @@ class InputData(object):
          self._cfg = spec
  
          # Data store:
-        self._input_data = None
+        self._input_data = pd.Series()
  
      @property
      def data(self):
@@ -748,17 +798,16 @@ class InputData(object):
          return self.data[job][build]["tests"]
  
      @staticmethod
-    def _parse_tests(job, build, get_timestamp=False):
+    def _parse_tests(job, build, log):
          """Process data from robot output.xml file and return JSON structured
          data.
  
          :param job: The name of job which build output data will be processed.
          :param build: The build which output data will be processed.
-        :param get_timestamp: If True, timestamp is read form the xml source
-            file.
+        :param log: List of log messages.
          :type job: str
          :type build: dict
-        :type get_timestamp: bool
+        :type log: list of tuples (severity, msg)
          :returns: JSON data structure.
          :rtype: dict
          """
@@ -767,68 +816,132 @@ class InputData(object):
              "job": job,
              "build": build
          }
-        if get_timestamp:
-            tree = ET.parse(build["file-name"])
-            root = tree.getroot()
-            metadata["generated"] = root.attrib["generated"]
  
          with open(build["file-name"], 'r') as data_file:
              try:
                  result = ExecutionResult(data_file)
              except errors.DataError as err:
-                logging.error("Error occurred while parsing output.xml: {0}".
-                              format(err))
+                log.append(("ERROR", "Error occurred while parsing output.xml: "
+                                     "{0}".format(err)))
                  return None
          checker = ExecutionChecker(metadata)
          result.visit(checker)
  
          return checker.data
  
-    def download_and_parse_data(self, get_timestamp=False):
+    def _download_and_parse_build(self, pid, data_queue, job, build, repeat):
+        """Download and parse the input data file.
+
+        :param pid: PID of the process executing this method.
+        :param data_queue: Shared memory between processes. Queue which keeps
+            the result data. This data is then read by the main process and used
+            in further processing.
+        :param job: Name of the Jenkins job which generated the processed input
+            file.
+        :param build: Information about the Jenkins build which generated the
+            processed input file.
+        :param repeat: Repeat the download specified number of times if not
+            successful.
+        :type pid: int
+        :type data_queue: multiprocessing.Manager().Queue()
+        :type job: str
+        :type build: dict
+        :type repeat: int
+        """
+
+        logs = list()
+
+        logging.info("  Processing the job/build: {0}: {1}".
+                     format(job, build["build"]))
+
+        logs.append(("INFO", "  Processing the job/build: {0}: {1}".
+                     format(job, build["build"])))
+
+        state = "failed"
+        success = False
+        data = None
+        do_repeat = repeat
+        while do_repeat:
+            success = download_and_unzip_data_file(self._cfg, job, build, pid,
+                                                   logs)
+            if success:
+                break
+            do_repeat -= 1
+        if not success:
+            logs.append(("ERROR", "It is not possible to download the input "
+                                  "data file from the job '{job}', build "
+                                  "'{build}', or it is damaged. Skipped.".
+                         format(job=job, build=build["build"])))
+        if success:
+            logs.append(("INFO", "  Processing data from the build '{0}' ...".
+                         format(build["build"])))
+            data = InputData._parse_tests(job, build, logs)
+            if data is None:
+                logs.append(("ERROR", "Input data file from the job '{job}', "
+                                      "build '{build}' is damaged. Skipped.".
+                             format(job=job, build=build["build"])))
+            else:
+                state = "processed"
+
+            try:
+                remove(build["file-name"])
+            except OSError as err:
+                logs.append(("ERROR", "Cannot remove the file '{0}': {1}".
+                             format(build["file-name"], err)))
+        logs.append(("INFO", "  Done."))
+
+        result = {
+            "data": data,
+            "state": state,
+            "job": job,
+            "build": build,
+            "logs": logs
+        }
+        data_queue.put(result)
+
+    def download_and_parse_data(self, repeat=1):
          """Download the input data files, parse input data from input files and
          store in pandas' Series.
  
-        :param get_timestamp: If True, timestamp is read form the xml source
-            file.
-        :type get_timestamp: bool
+        :param repeat: Repeat the download specified number of times if not
+            successful.
+        :type repeat: int
          """
  
          logging.info("Downloading and parsing input files ...")
  
-        job_data = dict()
+        work_queue = multiprocessing.JoinableQueue()
+        manager = multiprocessing.Manager()
+        data_queue = manager.Queue()
+        cpus = multiprocessing.cpu_count()
+
+        workers = list()
+        for cpu in range(cpus):
+            worker = Worker(work_queue,
+                            data_queue,
+                            self._download_and_parse_build)
+            worker.daemon = True
+            worker.start()
+            workers.append(worker)
+            os.system("taskset -p -c {0} {1} > /dev/null 2>&1".
+                      format(cpu, worker.pid))
+
          for job, builds in self._cfg.builds.items():
-            logging.info("  Processing data from the job '{0}' ...'".
-                         format(job))
-            builds_data = dict()
              for build in builds:
-                logging.info("    Processing the build '{0}'".
-                             format(build["build"]))
-                self._cfg.set_input_state(job, build["build"], "failed")
-                if not download_and_unzip_data_file(self._cfg, job, build):
-                    logging.error("It is not possible to download the input "
-                                  "data file from the job '{job}', build "
-                                  "'{build}', or it is damaged. Skipped.".
-                                  format(job=job, build=build["build"]))
-                    continue
+                work_queue.put((job, build, repeat))
  
-                logging.info("      Processing data from the build '{0}' ...".
-                             format(build["build"]))
-                data = InputData._parse_tests(job, build,
-                                              get_timestamp=get_timestamp)
-                if data is None:
-                    logging.error("Input data file from the job '{job}', build "
-                                  "'{build}' is damaged. Skipped.".
-                                  format(job=job, build=build["build"]))
-                    continue
+        work_queue.join()
  
-                self._cfg.set_input_state(job, build["build"], "processed")
+        logging.info("Done.")
  
-                try:
-                    remove(build["file-name"])
-                except OSError as err:
-                    logging.error("Cannot remove the file '{0}': {1}".
-                                  format(build["file-name"], err))
+        while not data_queue.empty():
+            result = data_queue.get()
+
+            job = result["job"]
+            build_nr = result["build"]["build"]
  
+            if result["data"]:
+                data = result["data"]
                  build_data = pd.Series({
                      "metadata": pd.Series(data["metadata"].values(),
                                            index=data["metadata"].keys()),
@@ -836,15 +949,35 @@ class InputData(object):
                                          index=data["suites"].keys()),
                      "tests": pd.Series(data["tests"].values(),
                                         index=data["tests"].keys())})
-                builds_data[str(build["build"])] = build_data
-                build["status"] = "processed"
-                logging.info("    Done.")
  
-            job_data[job] = pd.Series(builds_data.values(),
-                                      index=builds_data.keys())
-            logging.info("  Done.")
+                if self._input_data.get(job, None) is None:
+                    self._input_data[job] = pd.Series()
+                self._input_data[job][str(build_nr)] = build_data
+
+                self._cfg.set_input_file_name(job, build_nr,
+                                              result["build"]["file-name"])
+
+            self._cfg.set_input_state(job, build_nr, result["state"])
+
+            for item in result["logs"]:
+                if item[0] == "INFO":
+                    logging.info(item[1])
+                elif item[0] == "ERROR":
+                    logging.error(item[1])
+                elif item[0] == "DEBUG":
+                    logging.debug(item[1])
+                elif item[0] == "CRITICAL":
+                    logging.critical(item[1])
+                elif item[0] == "WARNING":
+                    logging.warning(item[1])
+
+        del data_queue
+
+        # Terminate all workers
+        for worker in workers:
+            worker.terminate()
+            worker.join()
  
-        self._input_data = pd.Series(job_data.values(), index=job_data.keys())
          logging.info("Done.")
  
      @staticmethod
@@ -921,9 +1054,6 @@ class InputData(object):
          :rtype pandas.Series
          """
  
-        logging.info("    Creating the data set for the {0} '{1}'.".
-                     format(element.get("type", ""), element.get("title", "")))
-
          try:
              if element["filter"] in ("all", "template"):
                  cond = "True"