+ logs = list()
+
+ logs.append(("INFO", " Processing the job/build: {0}: {1}".
+ format(job, build["build"])))
+
+ state = "failed"
+ success = False
+ data = None
+ do_repeat = repeat
+ while do_repeat:
+ success = download_and_unzip_data_file(self._cfg, job, build, pid,
+ logs)
+ if success:
+ break
+ do_repeat -= 1
+ if not success:
+ logs.append(("ERROR", "It is not possible to download the input "
+ "data file from the job '{job}', build "
+ "'{build}', or it is damaged. Skipped.".
+ format(job=job, build=build["build"])))
+ if success:
+ logs.append(("INFO", " Processing data from the build '{0}' ...".
+ format(build["build"])))
+ data = self._parse_tests(job, build, logs)
+ if data is None:
+ logs.append(("ERROR", "Input data file from the job '{job}', "
+ "build '{build}' is damaged. Skipped.".
+ format(job=job, build=build["build"])))
+ else:
+ state = "processed"
+
+ try:
+ remove(build["file-name"])
+ except OSError as err:
+ logs.append(("ERROR", "Cannot remove the file '{0}': {1}".
+ format(build["file-name"], repr(err))))
+
+ # If the time-period is defined in the specification file, remove all
+ # files which are outside the time period.
+ timeperiod = self._cfg.input.get("time-period", None)
+ if timeperiod and data:
+ now = dt.utcnow()
+ timeperiod = timedelta(int(timeperiod))
+ metadata = data.get("metadata", None)
+ if metadata:
+ generated = metadata.get("generated", None)
+ if generated:
+ generated = dt.strptime(generated, "%Y%m%d %H:%M")
+ if (now - generated) > timeperiod:
+ # Remove the data and the file:
+ state = "removed"
+ data = None
+ logs.append(
+ ("INFO",
+ " The build {job}/{build} is outdated, will be "
+ "removed".format(job=job, build=build["build"])))
+ file_name = self._cfg.input["file-name"]
+ full_name = join(
+ self._cfg.environment["paths"]["DIR[WORKING,DATA]"],
+ "{job}{sep}{build}{sep}{name}".
+ format(job=job,
+ sep=SEPARATOR,
+ build=build["build"],
+ name=file_name))
+ try:
+ remove(full_name)
+ logs.append(("INFO",
+ " The file {name} has been removed".
+ format(name=full_name)))
+ except OSError as err:
+ logs.append(("ERROR",
+ "Cannot remove the file '{0}': {1}".
+ format(full_name, repr(err))))
+ logs.append(("INFO", " Done."))
+
+ for level, line in logs:
+ if level == "INFO":
+ logging.info(line)
+ elif level == "ERROR":
+ logging.error(line)
+ elif level == "DEBUG":
+ logging.debug(line)
+ elif level == "CRITICAL":
+ logging.critical(line)
+ elif level == "WARNING":
+ logging.warning(line)
+
+ result = {
+ "data": data,
+ "state": state,
+ "job": job,
+ "build": build
+ }
+ data_queue.put(result)
+
+ def download_and_parse_data(self, repeat=1):
+ """Download the input data files, parse input data from input files and
+ store in pandas' Series.
+
+ :param repeat: Repeat the download specified number of times if not
+ successful.
+ :type repeat: int
+ """
+
+ logging.info("Downloading and parsing input files ...")
+
+ work_queue = multiprocessing.JoinableQueue()
+ manager = multiprocessing.Manager()
+ data_queue = manager.Queue()
+ cpus = multiprocessing.cpu_count()
+
+ workers = list()
+ for cpu in range(cpus):
+ worker = Worker(work_queue,
+ data_queue,
+ self._download_and_parse_build)
+ worker.daemon = True
+ worker.start()
+ workers.append(worker)
+ os.system("taskset -p -c {0} {1} > /dev/null 2>&1".
+ format(cpu, worker.pid))