CSIT-1078: Optimize input data files download and processing
[csit.git] / resources / tools / presentation / input_data_parser.py
index 87d822f..e12e2fb 100644 (file)
 import re
 import pandas as pd
 import logging
 import re
 import pandas as pd
 import logging
+import xml.etree.ElementTree as ET
 
 from robot.api import ExecutionResult, ResultVisitor
 from robot import errors
 from collections import OrderedDict
 from string import replace
 
 from robot.api import ExecutionResult, ResultVisitor
 from robot import errors
 from collections import OrderedDict
 from string import replace
+from os import remove
+
+from input_data_files import download_and_unzip_data_file
 
 
 class ExecutionChecker(ResultVisitor):
 
 
 class ExecutionChecker(ResultVisitor):
@@ -170,14 +174,14 @@ class ExecutionChecker(ResultVisitor):
     REGEX_TOLERANCE = re.compile(r'^[\D\d]*LOSS_ACCEPTANCE:\s(\d*\.\d*)\s'
                                  r'[\D\d]*')
 
     REGEX_TOLERANCE = re.compile(r'^[\D\d]*LOSS_ACCEPTANCE:\s(\d*\.\d*)\s'
                                  r'[\D\d]*')
 
-    REGEX_VERSION = re.compile(r"(stdout: 'vat# vat# Version:)(\s*)(.*)")
+    REGEX_VERSION = re.compile(r"(return STDOUT Version:\s*)(.*)")
 
     REGEX_TCP = re.compile(r'Total\s(rps|cps|throughput):\s([0-9]*).*$')
 
     REGEX_MRR = re.compile(r'MaxReceivedRate_Results\s\[pkts/(\d*)sec\]:\s'
                            r'tx\s(\d*),\srx\s(\d*)')
 
 
     REGEX_TCP = re.compile(r'Total\s(rps|cps|throughput):\s([0-9]*).*$')
 
     REGEX_MRR = re.compile(r'MaxReceivedRate_Results\s\[pkts/(\d*)sec\]:\s'
                            r'tx\s(\d*),\srx\s(\d*)')
 
-    def __init__(self, **metadata):
+    def __init__(self, metadata):
         """Initialisation.
 
         :param metadata: Key-value pairs to be included in "metadata" part of
         """Initialisation.
 
         :param metadata: Key-value pairs to be included in "metadata" part of
@@ -243,14 +247,12 @@ class ExecutionChecker(ResultVisitor):
         :returns: Nothing.
         """
 
         :returns: Nothing.
         """
 
-        if msg.message.count("stdout: 'vat# vat# Version:"):
+        if msg.message.count("return STDOUT Version:"):
             self._version = str(re.search(self.REGEX_VERSION, msg.message).
             self._version = str(re.search(self.REGEX_VERSION, msg.message).
-                                group(3))
+                                group(2))
             self._data["metadata"]["version"] = self._version
             self._msg_type = None
 
             self._data["metadata"]["version"] = self._version
             self._msg_type = None
 
-            logging.debug("    VPP version: {0}".format(self._version))
-
     def _get_vat_history(self, msg):
         """Called when extraction of VAT command history is required.
 
     def _get_vat_history(self, msg):
         """Called when extraction of VAT command history is required.
 
@@ -584,7 +586,7 @@ class ExecutionChecker(ResultVisitor):
         :type setup_kw: Keyword
         :returns: Nothing.
         """
         :type setup_kw: Keyword
         :returns: Nothing.
         """
-        if setup_kw.name.count("Vpp Show Version Verbose") \
+        if setup_kw.name.count("Show Vpp Version On All Duts") \
                 and not self._version:
             self._msg_type = "setup-version"
             setup_kw.messages.visit(self)
                 and not self._version:
             self._msg_type = "setup-version"
             setup_kw.messages.visit(self)
@@ -746,18 +748,30 @@ class InputData(object):
         return self.data[job][build]["tests"]
 
     @staticmethod
         return self.data[job][build]["tests"]
 
     @staticmethod
-    def _parse_tests(job, build):
+    def _parse_tests(job, build, get_timestamp=False):
         """Process data from robot output.xml file and return JSON structured
         data.
 
         :param job: The name of job which build output data will be processed.
         :param build: The build which output data will be processed.
         """Process data from robot output.xml file and return JSON structured
         data.
 
         :param job: The name of job which build output data will be processed.
         :param build: The build which output data will be processed.
+        :param get_timestamp: If True, timestamp is read form the xml source
+            file.
         :type job: str
         :type build: dict
         :type job: str
         :type build: dict
+        :type get_timestamp: bool
         :returns: JSON data structure.
         :rtype: dict
         """
 
         :returns: JSON data structure.
         :rtype: dict
         """
 
+        metadata = {
+            "job": job,
+            "build": build
+        }
+        if get_timestamp:
+            tree = ET.parse(build["file-name"])
+            root = tree.getroot()
+            metadata["generated"] = root.attrib["generated"]
+
         with open(build["file-name"], 'r') as data_file:
             try:
                 result = ExecutionResult(data_file)
         with open(build["file-name"], 'r') as data_file:
             try:
                 result = ExecutionResult(data_file)
@@ -765,46 +779,65 @@ class InputData(object):
                 logging.error("Error occurred while parsing output.xml: {0}".
                               format(err))
                 return None
                 logging.error("Error occurred while parsing output.xml: {0}".
                               format(err))
                 return None
-        checker = ExecutionChecker(job=job, build=build)
+        checker = ExecutionChecker(metadata)
         result.visit(checker)
 
         return checker.data
 
         result.visit(checker)
 
         return checker.data
 
-    def read_data(self):
-        """Parse input data from input files and store in pandas' Series.
+    def download_and_parse_data(self, get_timestamp=False):
+        """Download the input data files, parse input data from input files and
+        store in pandas' Series.
+
+        :param get_timestamp: If True, timestamp is read form the xml source
+            file.
+        :type get_timestamp: bool
         """
 
         """
 
-        logging.info("Parsing input files ...")
+        logging.info("Downloading and parsing input files ...")
 
         job_data = dict()
         for job, builds in self._cfg.builds.items():
 
         job_data = dict()
         for job, builds in self._cfg.builds.items():
-            logging.info("  Extracting data from the job '{0}' ...'".
+            logging.info("  Processing data from the job '{0}' ...'".
                          format(job))
             builds_data = dict()
             for build in builds:
                          format(job))
             builds_data = dict()
             for build in builds:
-                if build["status"] == "failed" \
-                        or build["status"] == "not found":
+                logging.info("    Processing the build '{0}'".
+                             format(build["build"]))
+                self._cfg.set_input_state(job, build["build"], "failed")
+                if not download_and_unzip_data_file(self._cfg, job, build):
+                    logging.error("It is not possible to download the input "
+                                  "data file from the job '{job}', build "
+                                  "'{build}', or it is damaged. Skipped.".
+                                  format(job=job, build=build["build"]))
                     continue
                     continue
-                logging.info("    Extracting data from the build '{0}'".
+
+                logging.info("      Processing data from the build '{0}' ...".
                              format(build["build"]))
                              format(build["build"]))
-                logging.info("    Processing the file '{0}'".
-                             format(build["file-name"]))
-                data = InputData._parse_tests(job, build)
+                data = InputData._parse_tests(job, build,
+                                              get_timestamp=get_timestamp)
                 if data is None:
                     logging.error("Input data file from the job '{job}', build "
                                   "'{build}' is damaged. Skipped.".
                                   format(job=job, build=build["build"]))
                     continue
 
                 if data is None:
                     logging.error("Input data file from the job '{job}', build "
                                   "'{build}' is damaged. Skipped.".
                                   format(job=job, build=build["build"]))
                     continue
 
+                self._cfg.set_input_state(job, build["build"], "processed")
+
+                try:
+                    remove(build["file-name"])
+                except OSError as err:
+                    logging.error("Cannot remove the file '{0}': {1}".
+                                  format(build["file-name"], err))
+
                 build_data = pd.Series({
                     "metadata": pd.Series(data["metadata"].values(),
                                           index=data["metadata"].keys()),
                     "suites": pd.Series(data["suites"].values(),
                                         index=data["suites"].keys()),
                     "tests": pd.Series(data["tests"].values(),
                 build_data = pd.Series({
                     "metadata": pd.Series(data["metadata"].values(),
                                           index=data["metadata"].keys()),
                     "suites": pd.Series(data["suites"].values(),
                                         index=data["suites"].keys()),
                     "tests": pd.Series(data["tests"].values(),
-                                       index=data["tests"].keys()),
-                    })
+                                       index=data["tests"].keys())})
                 builds_data[str(build["build"])] = build_data
                 builds_data[str(build["build"])] = build_data
+                build["status"] = "processed"
                 logging.info("    Done.")
 
             job_data[job] = pd.Series(builds_data.values(),
                 logging.info("    Done.")
 
             job_data[job] = pd.Series(builds_data.values(),