X-Git-Url: https://gerrit.fd.io/r/gitweb?p=csit.git;a=blobdiff_plain;f=resources%2Ftools%2Fpresentation%2Finput_data_files.py;h=0a723cedc70868cee40d74e006746d59bca7b857;hp=9e0cfa6931176abeb1c05ec518fc300bff3d04a7;hb=ac2c84d9561e2344057dc5d4173b0c7718015c4b;hpb=bd18bd034ac9b6d3e50eea6a489d9ea0c3252c58 diff --git a/resources/tools/presentation/input_data_files.py b/resources/tools/presentation/input_data_files.py index 9e0cfa6931..0a723cedc7 100644 --- a/resources/tools/presentation/input_data_files.py +++ b/resources/tools/presentation/input_data_files.py @@ -16,18 +16,20 @@ Download all data. """ import re +import requests import logging -from os import rename +from os import rename, mkdir from os.path import join -from shutil import move from zipfile import ZipFile, is_zipfile, BadZipfile from httplib import responses -from requests import get, codes, RequestException, Timeout, TooManyRedirects, \ +from requests.adapters import HTTPAdapter +from requests.packages.urllib3.util.retry import Retry +from requests import codes, RequestException, Timeout, TooManyRedirects, \ HTTPError, ConnectionError from errors import PresentationError -from utils import execute_command + # Chunk size used for file download CHUNK_SIZE = 512 @@ -38,179 +40,296 @@ SEPARATOR = "__" REGEX_RELEASE = re.compile(r'(\D*)(\d{4}|master)(\D*)') -def _download_file(url, file_name): +def _download_file(url, file_name, log, arch=False): """Download a file with input data. :param url: URL to the file to download. :param file_name: Name of file to download. + :param log: List of log messages. + :param arch: If True, also .gz file is downloaded :type url: str :type file_name: str + :type log: list of tuples (severity, msg) + :type arch: bool :returns: True if the download was successful, otherwise False. :rtype: bool """ + def requests_retry_session(retries=3, + backoff_factor=0.3, + status_forcelist=(500, 502, 504)): + """ + + :param retries: Total number of retries to allow. + :param backoff_factor: A backoff factor to apply between attempts after + the second try. + :param status_forcelist: A set of integer HTTP status codes that are + forced to retry. + :type retries: int + :type backoff_factor: float + :type status_forcelist: iterable + :returns: Session object. + :rtype: requests.Session + """ + + retry = Retry( + total=retries, + read=retries, + connect=retries, + backoff_factor=backoff_factor, + status_forcelist=status_forcelist, + ) + adapter = HTTPAdapter(max_retries=retry) + session = requests.Session() + session.mount('http://', adapter) + session.mount('https://', adapter) + return session + success = False + session = None try: - logging.info(" Connecting to '{0}' ...".format(url)) - - response = get(url, stream=True) + log.append(("INFO", " Connecting to '{0}' ...".format(url))) + session = requests_retry_session() + response = session.get(url, stream=True) code = response.status_code - - logging.info(" {0}: {1}".format(code, responses[code])) + log.append(("INFO", " {0}: {1}".format(code, responses[code]))) if code != codes["OK"]: - return False + if session: + session.close() + url = url.replace("_info", "") + log.append(("INFO", " Connecting to '{0}' ...".format(url))) + session = requests_retry_session() + response = session.get(url, stream=True) + code = response.status_code + log.append(("INFO", " {0}: {1}".format(code, responses[code]))) + if code != codes["OK"]: + return False, file_name + file_name = file_name.replace("_info", "") - logging.info(" Downloading the file '{0}' to '{1}' ...". - format(url, file_name)) + dst_file_name = file_name.replace(".gz", "") + log.append(("INFO", " Downloading the file '{0}' to '{1}' ...". + format(url, dst_file_name))) + with open(dst_file_name, "wb") as file_handle: + for chunk in response.iter_content(chunk_size=CHUNK_SIZE): + if chunk: + file_handle.write(chunk) + + if arch and ".gz" in file_name: + if session: + session.close() + log.append(("INFO", " Downloading the file '{0}' to '{1}' ...". + format(url, file_name))) + session = requests_retry_session() + response = session.get(url, stream=True) + if response.status_code == codes["OK"]: + with open(file_name, "wb") as file_handle: + file_handle.write(response.raw.read()) + else: + log.append(("ERROR", "Not possible to download the file '{0}' " + "to '{1}' ...".format(url, file_name))) - file_handle = open(file_name, "wb") - for chunk in response.iter_content(chunk_size=CHUNK_SIZE): - if chunk: - file_handle.write(chunk) - file_handle.close() success = True except ConnectionError as err: - logging.error("Not possible to connect to '{0}'.".format(url)) - logging.debug(str(err)) + log.append(("ERROR", "Not possible to connect to '{0}'.".format(url))) + log.append(("DEBUG", repr(err))) except HTTPError as err: - logging.error("Invalid HTTP response from '{0}'.".format(url)) - logging.debug(str(err)) + log.append(("ERROR", "Invalid HTTP response from '{0}'.".format(url))) + log.append(("DEBUG", repr(err))) except TooManyRedirects as err: - logging.error("Request exceeded the configured number " - "of maximum re-directions.") - logging.debug(str(err)) + log.append(("ERROR", "Request exceeded the configured number " + "of maximum re-directions.")) + log.append(("DEBUG", repr(err))) except Timeout as err: - logging.error("Request timed out.") - logging.debug(str(err)) + log.append(("ERROR", "Request timed out.")) + log.append(("DEBUG", repr(err))) except RequestException as err: - logging.error("Unexpected HTTP request exception.") - logging.debug(str(err)) + log.append(("ERROR", "Unexpected HTTP request exception.")) + log.append(("DEBUG", repr(err))) except (IOError, ValueError, KeyError) as err: - logging.error("Download failed.") - logging.debug(str(err)) + log.append(("ERROR", "Download failed.")) + log.append(("DEBUG", repr(err))) + finally: + if session: + session.close() - logging.info(" Download finished.") - return success + log.append(("INFO", " Download finished.")) + return success, file_name -def _unzip_file(spec, job, build): +def _unzip_file(spec, build, pid, log): """Unzip downloaded source file. :param spec: Specification read form the specification file. - :param job: Name of the Jenkins job. :param build: Information about the build. + :param log: List of log messages. :type spec: Specification - :type job: str :type build: dict + :type log: list of tuples (severity, msg) :returns: True if the download was successful, otherwise False. :rtype: bool """ - data_file = spec.input["extract"] file_name = build["file-name"] + if ".zip" in file_name: + data_file = spec.input["zip-extract"] + else: + data_file = spec.input["extract"] + directory = spec.environment["paths"]["DIR[WORKING,DATA]"] + tmp_dir = join(directory, str(pid)) + try: + mkdir(tmp_dir) + except OSError: + pass new_name = "{0}{1}{2}".format(file_name.rsplit('.')[-2], SEPARATOR, data_file.split("/")[-1]) - logging.info(" Unzipping: '{0}' from '{1}'.". - format(data_file, file_name)) + + log.append(("INFO", " Unzipping: '{0}' from '{1}'.". + format(data_file, file_name))) try: with ZipFile(file_name, 'r') as zip_file: - zip_file.extract(data_file, directory) - logging.info(" Moving {0} to {1} ...". - format(join(directory, data_file), directory)) - move(join(directory, data_file), directory) - logging.info(" Renaming the file '{0}' to '{1}'". - format(join(directory, data_file.split("/")[-1]), - new_name)) - rename(join(directory, data_file.split("/")[-1]), - new_name) - spec.set_input_file_name(job, build["build"], - new_name) + zip_file.extract(data_file, tmp_dir) + log.append(("INFO", " Renaming the file '{0}' to '{1}'". + format(join(tmp_dir, data_file), new_name))) + rename(join(tmp_dir, data_file), new_name) + build["file-name"] = new_name return True except (BadZipfile, RuntimeError) as err: - logging.error("Failed to unzip the file '{0}': {1}.". - format(file_name, str(err))) + log.append(("ERROR", "Failed to unzip the file '{0}': {1}.". + format(file_name, str(err)))) return False except OSError as err: - logging.error("Failed to rename the file '{0}': {1}.". - format(data_file, str(err))) + log.append(("ERROR", "Failed to rename the file '{0}': {1}.". + format(data_file, str(err)))) return False -def download_and_unzip_data_file(spec, job, build): +def download_and_unzip_data_file(spec, job, build, pid, log): """Download and unzip a source file. :param spec: Specification read form the specification file. :param job: Name of the Jenkins job. :param build: Information about the build. + :param pid: PID of the process executing this method. + :param log: List of log messages. :type spec: Specification :type job: str :type build: dict + :type pid: int + :type log: list of tuples (severity, msg) :returns: True if the download was successful, otherwise False. :rtype: bool """ - if job.startswith("csit-"): - if spec.input["file-name"].endswith(".zip"): - url = spec.environment["urls"]["URL[JENKINS,CSIT]"] - elif spec.input["file-name"].endswith(".gz"): - url = spec.environment["urls"]["URL[NEXUS,LOG]"] - else: - logging.error("Not supported file format.") - return False - elif job.startswith("hc2vpp-"): - url = spec.environment["urls"]["URL[JENKINS,HC]"] - else: - raise PresentationError("No url defined for the job '{}'.". - format(job)) + # Try to download .gz from logs.fd.io + file_name = spec.input["file-name"] - full_name = spec.input["download-path"]. \ - format(job=job, build=build["build"], filename=file_name) - url = "{0}/{1}".format(url, full_name) + url = "{0}/{1}".format( + spec.environment["urls"]["URL[NEXUS,LOG]"], + spec.input["download-path"].format( + job=job, build=build["build"], filename=file_name)) new_name = join(spec.environment["paths"]["DIR[WORKING,DATA]"], - "{job}{sep}{build}{sep}{name}". - format(job=job, sep=SEPARATOR, build=build["build"], - name=file_name)) - # Download the file from the defined source (Jenkins, logs.fd.io): - success = _download_file(url, new_name) + "{job}{sep}{build}{sep}{name}".format( + job=job, sep=SEPARATOR, build=build["build"], + name=file_name)) + + logging.info("Trying to download {0}".format(url)) + + arch = True if spec.configuration.get("archive-inputs", True) else False + success, downloaded_name = _download_file(url, new_name, log, arch=arch) + + if not success: + + # Try to download .gz from docs.fd.io + + file_name = spec.input["file-name"] + url = "{0}/{1}".format( + spec.environment["urls"]["URL[NEXUS,DOC]"], + spec.input["download-path"].format( + job=job, build=build["build"], filename=file_name)) + new_name = join(spec.environment["paths"]["DIR[WORKING,DATA]"], + "{job}{sep}{build}{sep}{name}".format( + job=job, sep=SEPARATOR, build=build["build"], + name=file_name)) + + logging.info("Downloading {0}".format(url)) + + if spec.configuration.get("archive-inputs", True): + arch = True + success, downloaded_name = _download_file(url, new_name, log, arch=arch) - # If not successful, download from docs.fd.io: if not success: - logging.info(" Trying to download from https://docs.fd.io:") + + # Try to download .zip from docs.fd.io + + file_name = spec.input["zip-file-name"] + new_name = join(spec.environment["paths"]["DIR[WORKING,DATA]"], + "{job}{sep}{build}{sep}{name}".format( + job=job, sep=SEPARATOR, build=build["build"], + name=file_name)) release = re.search(REGEX_RELEASE, job).group(2) - nexus_file_name = "{job}{sep}{build}{sep}{name}". \ - format(job=job, sep=SEPARATOR, build=build["build"], name=file_name) - try: - release = "rls{0}".format(int(release)) - except ValueError: - pass - url = "{url}/{release}/{dir}/{file}". \ - format(url=spec.environment["urls"]["URL[NEXUS]"], - release=release, - dir=spec.environment["urls"]["DIR[NEXUS]"], - file=nexus_file_name) - success = _download_file(url, new_name) + for rls in (release, "master"): + nexus_file_name = "{job}{sep}{build}{sep}{name}". \ + format(job=job, sep=SEPARATOR, build=build["build"], + name=file_name) + try: + rls = "rls{0}".format(int(rls)) + except ValueError: + # It is 'master' + pass + url = "{url}/{release}/{dir}/{file}". \ + format(url=spec.environment["urls"]["URL[NEXUS,DOC]"], + release=rls, + dir=spec.environment["urls"]["DIR[NEXUS,DOC]"], + file=nexus_file_name) - if success: - spec.set_input_file_name(job, build["build"], new_name) - else: - return False + logging.info("Downloading {0}".format(url)) - if spec.input["file-name"].endswith(".gz"): - if "docs.fd.io" in url: - execute_command("gzip --decompress --keep --force {0}". - format(new_name)) - else: - rename(new_name, new_name[:-3]) - execute_command("gzip --keep {0}".format(new_name[:-3])) - spec.set_input_file_name(job, build["build"], new_name[:-3]) + success, downloaded_name = _download_file(url, new_name, log) + if success: + break + + if not success: + + # Try to download .zip from jenkins.fd.io - if new_name.endswith(".zip"): - if is_zipfile(new_name): - return _unzip_file(spec, job, build) + file_name = spec.input["zip-file-name"] + download_path = spec.input["zip-download-path"] + if job.startswith("csit-"): + url = spec.environment["urls"]["URL[JENKINS,CSIT]"] + elif job.startswith("hc2vpp-"): + url = spec.environment["urls"]["URL[JENKINS,HC]"] else: - return False - else: - return True + raise PresentationError( + "No url defined for the job '{}'.".format(job)) + + full_name = download_path.format( + job=job, build=build["build"], filename=file_name) + url = "{0}/{1}".format(url, full_name) + new_name = join(spec.environment["paths"]["DIR[WORKING,DATA]"], + "{job}{sep}{build}{sep}{name}". + format(job=job, sep=SEPARATOR, build=build["build"], + name=file_name)) + + logging.info("Downloading {0}".format(url)) + + success, downloaded_name = _download_file(url, new_name, log) + + if success and downloaded_name.endswith(".zip"): + if not is_zipfile(downloaded_name): + log.append(("ERROR", + "Zip file '{0}' is corrupted.".format(new_name))) + success = False + + if success: + build["file-name"] = downloaded_name + + if file_name.endswith(".gz"): + build["file-name"] = downloaded_name[:-3] + + if downloaded_name.endswith(".zip"): + success = _unzip_file(spec, build, pid, log) + + return success