CSIT-1078: Optimize input data files download and processing

[csit.git] / resources / tools / presentation / input_data_parser.py
diff --git a/resources/tools/presentation/input_data_parser.py b/resources/tools/presentation/input_data_parser.py

index 87d822f..e12e2fb 100644 (file)
--- a/resources/tools/presentation/input_data_parser.py
+++ b/resources/tools/presentation/input_data_parser.py
@@ -21,11 +21,15 @@
  import re
  import pandas as pd
  import logging
  import re
  import pandas as pd
  import logging
+import xml.etree.ElementTree as ET
  
  from robot.api import ExecutionResult, ResultVisitor
  from robot import errors
  from collections import OrderedDict
  from string import replace
  
  from robot.api import ExecutionResult, ResultVisitor
  from robot import errors
  from collections import OrderedDict
  from string import replace
+from os import remove
+
+from input_data_files import download_and_unzip_data_file
  
  
  class ExecutionChecker(ResultVisitor):
  
  
  class ExecutionChecker(ResultVisitor):
@@ -170,14 +174,14 @@ class ExecutionChecker(ResultVisitor):
      REGEX_TOLERANCE = re.compile(r'^[\D\d]*LOSS_ACCEPTANCE:\s(\d*\.\d*)\s'
                                   r'[\D\d]*')
  
      REGEX_TOLERANCE = re.compile(r'^[\D\d]*LOSS_ACCEPTANCE:\s(\d*\.\d*)\s'
                                   r'[\D\d]*')
  
-    REGEX_VERSION = re.compile(r"(stdout: 'vat# vat# Version:)(\s*)(.*)")
+    REGEX_VERSION = re.compile(r"(return STDOUT Version:\s*)(.*)")
  
      REGEX_TCP = re.compile(r'Total\s(rps|cps|throughput):\s([0-9]*).*$')
  
      REGEX_MRR = re.compile(r'MaxReceivedRate_Results\s\[pkts/(\d*)sec\]:\s'
                             r'tx\s(\d*),\srx\s(\d*)')
  
  
      REGEX_TCP = re.compile(r'Total\s(rps|cps|throughput):\s([0-9]*).*$')
  
      REGEX_MRR = re.compile(r'MaxReceivedRate_Results\s\[pkts/(\d*)sec\]:\s'
                             r'tx\s(\d*),\srx\s(\d*)')
  
-    def __init__(self, **metadata):
+    def __init__(self, metadata):
          """Initialisation.
  
          :param metadata: Key-value pairs to be included in "metadata" part of
          """Initialisation.
  
          :param metadata: Key-value pairs to be included in "metadata" part of
@@ -243,14 +247,12 @@ class ExecutionChecker(ResultVisitor):
          :returns: Nothing.
          """
  
          :returns: Nothing.
          """
  
-        if msg.message.count("stdout: 'vat# vat# Version:"):
+        if msg.message.count("return STDOUT Version:"):
              self._version = str(re.search(self.REGEX_VERSION, msg.message).
              self._version = str(re.search(self.REGEX_VERSION, msg.message).
-                                group(3))
+                                group(2))
              self._data["metadata"]["version"] = self._version
              self._msg_type = None
  
              self._data["metadata"]["version"] = self._version
              self._msg_type = None
  
-            logging.debug("    VPP version: {0}".format(self._version))
-
      def _get_vat_history(self, msg):
          """Called when extraction of VAT command history is required.
  
      def _get_vat_history(self, msg):
          """Called when extraction of VAT command history is required.
  
@@ -584,7 +586,7 @@ class ExecutionChecker(ResultVisitor):
          :type setup_kw: Keyword
          :returns: Nothing.
          """
          :type setup_kw: Keyword
          :returns: Nothing.
          """
-        if setup_kw.name.count("Vpp Show Version Verbose") \
+        if setup_kw.name.count("Show Vpp Version On All Duts") \
                  and not self._version:
              self._msg_type = "setup-version"
              setup_kw.messages.visit(self)
                  and not self._version:
              self._msg_type = "setup-version"
              setup_kw.messages.visit(self)
@@ -746,18 +748,30 @@ class InputData(object):
          return self.data[job][build]["tests"]
  
      @staticmethod
          return self.data[job][build]["tests"]
  
      @staticmethod
-    def _parse_tests(job, build):
+    def _parse_tests(job, build, get_timestamp=False):
          """Process data from robot output.xml file and return JSON structured
          data.
  
          :param job: The name of job which build output data will be processed.
          :param build: The build which output data will be processed.
          """Process data from robot output.xml file and return JSON structured
          data.
  
          :param job: The name of job which build output data will be processed.
          :param build: The build which output data will be processed.
+        :param get_timestamp: If True, timestamp is read form the xml source
+            file.
          :type job: str
          :type build: dict
          :type job: str
          :type build: dict
+        :type get_timestamp: bool
          :returns: JSON data structure.
          :rtype: dict
          """
  
          :returns: JSON data structure.
          :rtype: dict
          """
  
+        metadata = {
+            "job": job,
+            "build": build
+        }
+        if get_timestamp:
+            tree = ET.parse(build["file-name"])
+            root = tree.getroot()
+            metadata["generated"] = root.attrib["generated"]
+
          with open(build["file-name"], 'r') as data_file:
              try:
                  result = ExecutionResult(data_file)
          with open(build["file-name"], 'r') as data_file:
              try:
                  result = ExecutionResult(data_file)
@@ -765,46 +779,65 @@ class InputData(object):
                  logging.error("Error occurred while parsing output.xml: {0}".
                                format(err))
                  return None
                  logging.error("Error occurred while parsing output.xml: {0}".
                                format(err))
                  return None
-        checker = ExecutionChecker(job=job, build=build)
+        checker = ExecutionChecker(metadata)
          result.visit(checker)
  
          return checker.data
  
          result.visit(checker)
  
          return checker.data
  
-    def read_data(self):
-        """Parse input data from input files and store in pandas' Series.
+    def download_and_parse_data(self, get_timestamp=False):
+        """Download the input data files, parse input data from input files and
+        store in pandas' Series.
+
+        :param get_timestamp: If True, timestamp is read form the xml source
+            file.
+        :type get_timestamp: bool
          """
  
          """
  
-        logging.info("Parsing input files ...")
+        logging.info("Downloading and parsing input files ...")
  
          job_data = dict()
          for job, builds in self._cfg.builds.items():
  
          job_data = dict()
          for job, builds in self._cfg.builds.items():
-            logging.info("  Extracting data from the job '{0}' ...'".
+            logging.info("  Processing data from the job '{0}' ...'".
                           format(job))
              builds_data = dict()
              for build in builds:
                           format(job))
              builds_data = dict()
              for build in builds:
-                if build["status"] == "failed" \
-                        or build["status"] == "not found":
+                logging.info("    Processing the build '{0}'".
+                             format(build["build"]))
+                self._cfg.set_input_state(job, build["build"], "failed")
+                if not download_and_unzip_data_file(self._cfg, job, build):
+                    logging.error("It is not possible to download the input "
+                                  "data file from the job '{job}', build "
+                                  "'{build}', or it is damaged. Skipped.".
+                                  format(job=job, build=build["build"]))
                      continue
                      continue
-                logging.info("    Extracting data from the build '{0}'".
+
+                logging.info("      Processing data from the build '{0}' ...".
                               format(build["build"]))
                               format(build["build"]))
-                logging.info("    Processing the file '{0}'".
-                             format(build["file-name"]))
-                data = InputData._parse_tests(job, build)
+                data = InputData._parse_tests(job, build,
+                                              get_timestamp=get_timestamp)
                  if data is None:
                      logging.error("Input data file from the job '{job}', build "
                                    "'{build}' is damaged. Skipped.".
                                    format(job=job, build=build["build"]))
                      continue
  
                  if data is None:
                      logging.error("Input data file from the job '{job}', build "
                                    "'{build}' is damaged. Skipped.".
                                    format(job=job, build=build["build"]))
                      continue
  
+                self._cfg.set_input_state(job, build["build"], "processed")
+
+                try:
+                    remove(build["file-name"])
+                except OSError as err:
+                    logging.error("Cannot remove the file '{0}': {1}".
+                                  format(build["file-name"], err))
+
                  build_data = pd.Series({
                      "metadata": pd.Series(data["metadata"].values(),
                                            index=data["metadata"].keys()),
                      "suites": pd.Series(data["suites"].values(),
                                          index=data["suites"].keys()),
                      "tests": pd.Series(data["tests"].values(),
                  build_data = pd.Series({
                      "metadata": pd.Series(data["metadata"].values(),
                                            index=data["metadata"].keys()),
                      "suites": pd.Series(data["suites"].values(),
                                          index=data["suites"].keys()),
                      "tests": pd.Series(data["tests"].values(),
-                                       index=data["tests"].keys()),
-                    })
+                                       index=data["tests"].keys())})
                  builds_data[str(build["build"])] = build_data
                  builds_data[str(build["build"])] = build_data
+                build["status"] = "processed"
                  logging.info("    Done.")
  
              job_data[job] = pd.Series(builds_data.values(),
                  logging.info("    Done.")
  
              job_data[job] = pd.Series(builds_data.values(),