X-Git-Url: https://gerrit.fd.io/r/gitweb?p=csit.git;a=blobdiff_plain;f=resources%2Ftools%2Fpresentation%2Finput_data_files.py;h=441a240d79d8e7b95dc99c37b46f760e29de14c3;hp=d90f113edb757169ce37cd7364d6dfbe09e7fa0c;hb=9e476b274acf41b5ac358bc4dba3a0f1e16f04b2;hpb=b55e3999a15c04b77039e1df2946d8ba02088de4 diff --git a/resources/tools/presentation/input_data_files.py b/resources/tools/presentation/input_data_files.py index d90f113edb..441a240d79 100644 --- a/resources/tools/presentation/input_data_files.py +++ b/resources/tools/presentation/input_data_files.py @@ -16,17 +16,20 @@ Download all data. """ import re +import requests import logging from os import rename, mkdir from os.path import join from zipfile import ZipFile, is_zipfile, BadZipfile from httplib import responses -from requests import get, codes, RequestException, Timeout, TooManyRedirects, \ +from requests.adapters import HTTPAdapter +from requests.packages.urllib3.util.retry import Retry +from requests import codes, RequestException, Timeout, TooManyRedirects, \ HTTPError, ConnectionError from errors import PresentationError -from utils import execute_command + # Chunk size used for file download CHUNK_SIZE = 512 @@ -37,62 +40,120 @@ SEPARATOR = "__" REGEX_RELEASE = re.compile(r'(\D*)(\d{4}|master)(\D*)') -def _download_file(url, file_name, log): +def _download_file(url, file_name, log, arch=False): """Download a file with input data. :param url: URL to the file to download. :param file_name: Name of file to download. :param log: List of log messages. + :param arch: If True, also .gz file is downloaded :type url: str :type file_name: str :type log: list of tuples (severity, msg) + :type arch: bool :returns: True if the download was successful, otherwise False. :rtype: bool """ + def requests_retry_session(retries=3, + backoff_factor=0.3, + status_forcelist=(500, 502, 504)): + """ + + :param retries: Total number of retries to allow. + :param backoff_factor: A backoff factor to apply between attempts after + the second try. + :param status_forcelist: A set of integer HTTP status codes that are + forced to retry. + :type retries: int + :type backoff_factor: float + :type status_forcelist: iterable + :returns: Session object. + :rtype: requests.Session + """ + + retry = Retry( + total=retries, + read=retries, + connect=retries, + backoff_factor=backoff_factor, + status_forcelist=status_forcelist, + ) + adapter = HTTPAdapter(max_retries=retry) + session = requests.Session() + session.mount('http://', adapter) + session.mount('https://', adapter) + return session + success = False try: log.append(("INFO", " Connecting to '{0}' ...".format(url))) - - response = get(url, stream=True) + session = requests_retry_session() + response = session.get(url, stream=True) code = response.status_code - log.append(("INFO", " {0}: {1}".format(code, responses[code]))) if code != codes["OK"]: - return False - + if session: + session.close() + url = url.replace("_info", "") + log.append(("INFO", " Connecting to '{0}' ...".format(url))) + session = requests_retry_session() + response = session.get(url, stream=True) + code = response.status_code + log.append(("INFO", " {0}: {1}".format(code, responses[code]))) + if code != codes["OK"]: + return False, file_name + file_name = file_name.replace("_info", "") + + dst_file_name = file_name.replace(".gz", "") log.append(("INFO", " Downloading the file '{0}' to '{1}' ...". - format(url, file_name))) + format(url, dst_file_name))) + with open(dst_file_name, "wb") as file_handle: + for chunk in response.iter_content(chunk_size=CHUNK_SIZE): + if chunk: + file_handle.write(chunk) + + if arch and ".gz" in file_name: + if session: + session.close() + log.append(("INFO", " Downloading the file '{0}' to '{1}' ...". + format(url, file_name))) + session = requests_retry_session() + response = session.get(url, stream=True) + if response.status_code == codes["OK"]: + with open(file_name, "wb") as file_handle: + file_handle.write(response.raw.read()) + else: + log.append(("ERROR", "Not possible to download the file '{0}' " + "to '{1}' ...".format(url, file_name))) - file_handle = open(file_name, "wb") - for chunk in response.iter_content(chunk_size=CHUNK_SIZE): - if chunk: - file_handle.write(chunk) - file_handle.close() success = True except ConnectionError as err: log.append(("ERROR", "Not possible to connect to '{0}'.".format(url))) - log.append(("DEBUG", str(err))) + log.append(("DEBUG", repr(err))) except HTTPError as err: log.append(("ERROR", "Invalid HTTP response from '{0}'.".format(url))) - log.append(("DEBUG", str(err))) + log.append(("DEBUG", repr(err))) except TooManyRedirects as err: log.append(("ERROR", "Request exceeded the configured number " "of maximum re-directions.")) - log.append(("DEBUG", str(err))) + log.append(("DEBUG", repr(err))) except Timeout as err: log.append(("ERROR", "Request timed out.")) - log.append(("DEBUG", str(err))) + log.append(("DEBUG", repr(err))) except RequestException as err: log.append(("ERROR", "Unexpected HTTP request exception.")) - log.append(("DEBUG", str(err))) + log.append(("DEBUG", repr(err))) except (IOError, ValueError, KeyError) as err: log.append(("ERROR", "Download failed.")) - log.append(("DEBUG", str(err))) + log.append(("DEBUG", repr(err))) + finally: + if session: + session.close() log.append(("INFO", " Download finished.")) - return success + return success, file_name def _unzip_file(spec, build, pid, log): @@ -108,8 +169,12 @@ def _unzip_file(spec, build, pid, log): :rtype: bool """ - data_file = spec.input["extract"] file_name = build["file-name"] + if ".zip" in file_name: + data_file = spec.input["zip-extract"] + else: + data_file = spec.input["extract"] + directory = spec.environment["paths"]["DIR[WORKING,DATA]"] tmp_dir = join(directory, str(pid)) try: @@ -157,45 +222,52 @@ def download_and_unzip_data_file(spec, job, build, pid, log): :rtype: bool """ - if job.startswith("csit-"): - if spec.input["file-name"].endswith(".zip"): - url = spec.environment["urls"]["URL[JENKINS,CSIT]"] - elif spec.input["file-name"].endswith(".gz"): - url = spec.environment["urls"]["URL[NEXUS,LOG]"] - else: - log.append(("ERROR", "Not supported file format.")) - return False - elif job.startswith("hc2vpp-"): - url = spec.environment["urls"]["URL[JENKINS,HC]"] - elif job.startswith("intel-dnv-"): - url = spec.environment["urls"]["URL[VIRL,DNV]"] - else: - raise PresentationError("No url defined for the job '{}'.". - format(job)) + # Try to download .gz from logs.fd.io + file_name = spec.input["file-name"] - full_name = spec.input["download-path"]. \ - format(job=job, build=build["build"], filename=file_name) - if not job.startswith("intel-dnv-"): - url = "{0}/{1}".format(url, full_name) + url = "{0}/{1}".format( + spec.environment["urls"]["URL[NEXUS,LOG]"], + spec.input["download-path"].format( + job=job, build=build["build"], filename=file_name)) new_name = join(spec.environment["paths"]["DIR[WORKING,DATA]"], - "{job}{sep}{build}{sep}{name}". - format(job=job, sep=SEPARATOR, build=build["build"], - name=file_name)) + "{job}{sep}{build}{sep}{name}".format( + job=job, sep=SEPARATOR, build=build["build"], + name=file_name)) + + logging.info("Trying to download {0}".format(url)) - logging.info(new_name) + arch = True if spec.configuration.get("archive-inputs", True) else False + success, downloaded_name = _download_file(url, new_name, log, arch=arch) - # Download the file from the defined source (Jenkins, logs.fd.io): - success = _download_file(url, new_name, log) + if not success: - logging.info("{}: {}".format(url, success)) + # Try to download .gz from docs.fd.io - if success and new_name.endswith(".zip"): - if not is_zipfile(new_name): - success = False + file_name = spec.input["file-name"] + url = "{0}/{1}".format( + spec.environment["urls"]["URL[NEXUS,DOC]"], + spec.input["download-path"].format( + job=job, build=build["build"], filename=file_name)) + new_name = join(spec.environment["paths"]["DIR[WORKING,DATA]"], + "{job}{sep}{build}{sep}{name}".format( + job=job, sep=SEPARATOR, build=build["build"], + name=file_name)) + + logging.info("Downloading {0}".format(url)) + + if spec.configuration.get("archive-inputs", True): + arch = True + success, downloaded_name = _download_file(url, new_name, log, arch=arch) - # If not successful, download from docs.fd.io: if not success: - log.append(("INFO", " Trying to download from https://docs.fd.io:")) + + # Try to download .zip from docs.fd.io + + file_name = spec.input["zip-file-name"] + new_name = join(spec.environment["paths"]["DIR[WORKING,DATA]"], + "{job}{sep}{build}{sep}{name}".format( + job=job, sep=SEPARATOR, build=build["build"], + name=file_name)) release = re.search(REGEX_RELEASE, job).group(2) for rls in (release, "master"): nexus_file_name = "{job}{sep}{build}{sep}{name}". \ @@ -204,36 +276,59 @@ def download_and_unzip_data_file(spec, job, build, pid, log): try: rls = "rls{0}".format(int(rls)) except ValueError: + # It is 'master' pass url = "{url}/{release}/{dir}/{file}". \ - format(url=spec.environment["urls"]["URL[NEXUS]"], + format(url=spec.environment["urls"]["URL[NEXUS,DOC]"], release=rls, - dir=spec.environment["urls"]["DIR[NEXUS]"], + dir=spec.environment["urls"]["DIR[NEXUS,DOC]"], file=nexus_file_name) - success = _download_file(url, new_name, log) + + logging.info("Downloading {0}".format(url)) + + success, downloaded_name = _download_file(url, new_name, log) if success: break - if success: - build["file-name"] = new_name - else: - return False + if not success: - if spec.input["file-name"].endswith(".gz"): - if "docs.fd.io" in url: - execute_command("gzip --decompress --keep --force {0}". - format(new_name)) - else: - rename(new_name, new_name[:-3]) - execute_command("gzip --keep {0}".format(new_name[:-3])) - build["file-name"] = new_name[:-3] + # Try to download .zip from jenkins.fd.io - if new_name.endswith(".zip"): - if is_zipfile(new_name): - return _unzip_file(spec, build, pid, log) + file_name = spec.input["zip-file-name"] + download_path = spec.input["zip-download-path"] + if job.startswith("csit-"): + url = spec.environment["urls"]["URL[JENKINS,CSIT]"] + elif job.startswith("hc2vpp-"): + url = spec.environment["urls"]["URL[JENKINS,HC]"] else: + raise PresentationError( + "No url defined for the job '{}'.".format(job)) + + full_name = download_path.format( + job=job, build=build["build"], filename=file_name) + url = "{0}/{1}".format(url, full_name) + new_name = join(spec.environment["paths"]["DIR[WORKING,DATA]"], + "{job}{sep}{build}{sep}{name}". + format(job=job, sep=SEPARATOR, build=build["build"], + name=file_name)) + + logging.info("Downloading {0}".format(url)) + + success, downloaded_name = _download_file(url, new_name, log) + + if success and downloaded_name.endswith(".zip"): + if not is_zipfile(downloaded_name): log.append(("ERROR", "Zip file '{0}' is corrupted.".format(new_name))) - return False - else: - return True + success = False + + if success: + build["file-name"] = downloaded_name + + if file_name.endswith(".gz"): + build["file-name"] = downloaded_name[:-3] + + if downloaded_name.endswith(".zip"): + success = _unzip_file(spec, build, pid, log) + + return success