+ data_file = spec.input[u"extract"]
+
+ directory = spec.environment[u"paths"][u"DIR[WORKING,DATA]"]
+ tmp_dir = join(directory, str(pid))
+ try:
+ mkdir(tmp_dir)
+ except OSError:
+ pass
+ new_name = \
+ f"{file_name.rsplit(u'.')[-2]}{SEPARATOR}{data_file.split(u'/')[-1]}"
+
+ logging.info(f" Unzipping: {data_file} from {file_name}.")
+ try:
+ with ZipFile(file_name, u'r') as zip_file:
+ zip_file.extract(data_file, tmp_dir)
+ logging.info(
+ f" Renaming the file {join(tmp_dir, data_file)} to {new_name}"
+ )
+ rename(join(tmp_dir, data_file), new_name)
+ build[u"file-name"] = new_name
+ return True
+ except (BadZipfile, RuntimeError) as err:
+ logging.error(f"Failed to unzip the file {file_name}: {repr(err)}.")
+ return False
+ except OSError as err:
+ logging.error(f"Failed to rename the file {data_file}: {repr(err)}.")
+ return False
+
+
+def download_and_unzip_data_file(spec, job, build, pid):
+ """Download and unzip a source file.
+
+ :param spec: Specification read form the specification file.
+ :param job: Name of the Jenkins job.
+ :param build: Information about the build.
+ :param pid: PID of the process executing this method.
+ :type spec: Specification
+ :type job: str
+ :type build: dict
+ :type pid: int
+ :returns: True if the download was successful, otherwise False.
+ :rtype: bool
+ """
+
+ # Try to download .gz from s3_storage
+ file_name = spec.input[u"file-name"]
+ url = u"{0}/{1}".format(
+ spec.environment[u'urls'][u'URL[S3_STORAGE,LOG]'],
+ spec.input[u'download-path'].format(
+ job=job, build=build[u'build'], filename=file_name
+ )
+ )
+ new_name = join(
+ spec.environment[u"paths"][u"DIR[WORKING,DATA]"],
+ f"{job}{SEPARATOR}{build[u'build']}{SEPARATOR}{file_name}"
+ )
+
+ logging.info(f"Trying to download {url}")
+
+ arch = bool(spec.configuration.get(u"archive-inputs", True))
+ success, downloaded_name = _download_file(url, new_name, arch=arch)
+
+ if not success:
+ # Try to download .gz from logs.fd.io
+ file_name = spec.input[u"file-name"]
+ url = u"{0}/{1}".format(
+ spec.environment[u'urls'][u'URL[NEXUS,LOG]'],
+ spec.input[u'download-path'].format(
+ job=job, build=build[u'build'], filename=file_name
+ )
+ )
+ new_name = join(
+ spec.environment[u"paths"][u"DIR[WORKING,DATA]"],
+ f"{job}{SEPARATOR}{build[u'build']}{SEPARATOR}{file_name}"
+ )
+
+ logging.info(f"Trying to download {url}")