resources/tools/presentation/input_data_files.py

   1 # Copyright (c) 2018 Cisco and/or its affiliates.
   2 # Licensed under the Apache License, Version 2.0 (the "License");
   3 # you may not use this file except in compliance with the License.
   4 # You may obtain a copy of the License at:
   5 #
   6 #     http://www.apache.org/licenses/LICENSE-2.0
   7 #
   8 # Unless required by applicable law or agreed to in writing, software
   9 # distributed under the License is distributed on an "AS IS" BASIS,
  10 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  11 # See the License for the specific language governing permissions and
  12 # limitations under the License.
  13
  14 """Inputs
  15 Download all data.
  16 """
  17
  18 import re
  19 import requests
  20 import logging
  21
  22 from os import rename, mkdir
  23 from os.path import join
  24 from zipfile import ZipFile, is_zipfile, BadZipfile
  25 from httplib import responses
  26 from requests.adapters import HTTPAdapter
  27 from requests.packages.urllib3.util.retry import Retry
  28 from requests import codes, RequestException, Timeout, TooManyRedirects, \
  29     HTTPError, ConnectionError
  30
  31 from errors import PresentationError
  32
  33
  34 # Chunk size used for file download
  35 CHUNK_SIZE = 512
  36
  37 # Separator used in file names
  38 SEPARATOR = "__"
  39
  40 REGEX_RELEASE = re.compile(r'(\D*)(\d{4}|master)(\D*)')
  41
  42
  43 def _download_file(url, file_name, log, arch=False):
  44     """Download a file with input data.
  45
  46     :param url: URL to the file to download.
  47     :param file_name: Name of file to download.
  48     :param log: List of log messages.
  49     :param arch: If True, also .gz file is downloaded
  50     :type url: str
  51     :type file_name: str
  52     :type log: list of tuples (severity, msg)
  53     :type arch: bool
  54     :returns: True if the download was successful, otherwise False.
  55     :rtype: bool
  56     """
  57
  58     def requests_retry_session(retries=3,
  59                                backoff_factor=0.3,
  60                                status_forcelist=(500, 502, 504)):
  61         """
  62
  63         :param retries: Total number of retries to allow.
  64         :param backoff_factor: A backoff factor to apply between attempts after
  65             the second try.
  66         :param status_forcelist: A set of integer HTTP status codes that are
  67             forced to retry.
  68         :type retries: int
  69         :type backoff_factor: float
  70         :type status_forcelist: iterable
  71         :returns: Session object.
  72         :rtype: requests.Session
  73         """
  74
  75         retry = Retry(
  76             total=retries,
  77             read=retries,
  78             connect=retries,
  79             backoff_factor=backoff_factor,
  80             status_forcelist=status_forcelist,
  81         )
  82         adapter = HTTPAdapter(max_retries=retry)
  83         session = requests.Session()
  84         session.mount('http://', adapter)
  85         session.mount('https://', adapter)
  86         return session
  87
  88     success = False
  89     try:
  90         log.append(("INFO", "    Connecting to '{0}' ...".format(url)))
  91         response = requests_retry_session().get(url, stream=True)
  92         code = response.status_code
  93         log.append(("INFO", "    {0}: {1}".format(code, responses[code])))
  94
  95         if code != codes["OK"]:
  96             url = url.replace("_info", "")
  97             log.append(("INFO", "    Connecting to '{0}' ...".format(url)))
  98             response = requests_retry_session().get(url, stream=True)
  99             code = response.status_code
 100             log.append(("INFO", "    {0}: {1}".format(code, responses[code])))
 101             if code != codes["OK"]:
 102                 return False, file_name
 103             file_name = file_name.replace("_info", "")
 104
 105         log.append(("INFO", "    Downloading the file '{0}' to '{1}' ...".
 106                     format(url, file_name)))
 107
 108         dst_file_name = file_name.replace(".gz", "")
 109         with open(dst_file_name, "wb") as file_handle:
 110             for chunk in response.iter_content(chunk_size=CHUNK_SIZE):
 111                 if chunk:
 112                     file_handle.write(chunk)
 113
 114         if arch and ".gz" in file_name:
 115             with open(file_name, "wb") as file_handle:
 116                 file_handle.write(response.raw.read())
 117
 118         success = True
 119     except ConnectionError as err:
 120         log.append(("ERROR", "Not possible to connect to '{0}'.".format(url)))
 121         log.append(("DEBUG", repr(err)))
 122     except HTTPError as err:
 123         log.append(("ERROR", "Invalid HTTP response from '{0}'.".format(url)))
 124         log.append(("DEBUG", repr(err)))
 125     except TooManyRedirects as err:
 126         log.append(("ERROR", "Request exceeded the configured number "
 127                              "of maximum re-directions."))
 128         log.append(("DEBUG", repr(err)))
 129     except Timeout as err:
 130         log.append(("ERROR", "Request timed out."))
 131         log.append(("DEBUG", repr(err)))
 132     except RequestException as err:
 133         log.append(("ERROR", "Unexpected HTTP request exception."))
 134         log.append(("DEBUG", repr(err)))
 135     except (IOError, ValueError, KeyError) as err:
 136         log.append(("ERROR", "Download failed."))
 137         log.append(("DEBUG", repr(err)))
 138
 139     log.append(("INFO", "    Download finished."))
 140     return success, file_name
 141
 142
 143 def _unzip_file(spec, build, pid, log):
 144     """Unzip downloaded source file.
 145
 146     :param spec: Specification read form the specification file.
 147     :param build: Information about the build.
 148     :param log: List of log messages.
 149     :type spec: Specification
 150     :type build: dict
 151     :type log: list of tuples (severity, msg)
 152     :returns: True if the download was successful, otherwise False.
 153     :rtype: bool
 154     """
 155
 156     file_name = build["file-name"]
 157     if ".zip" in file_name:
 158         data_file = spec.input["zip-extract"]
 159     else:
 160         data_file = spec.input["extract"]
 161
 162     directory = spec.environment["paths"]["DIR[WORKING,DATA]"]
 163     tmp_dir = join(directory, str(pid))
 164     try:
 165         mkdir(tmp_dir)
 166     except OSError:
 167         pass
 168     new_name = "{0}{1}{2}".format(file_name.rsplit('.')[-2],
 169                                   SEPARATOR,
 170                                   data_file.split("/")[-1])
 171
 172     log.append(("INFO", "    Unzipping: '{0}' from '{1}'.".
 173                 format(data_file, file_name)))
 174     try:
 175         with ZipFile(file_name, 'r') as zip_file:
 176             zip_file.extract(data_file, tmp_dir)
 177         log.append(("INFO", "    Renaming the file '{0}' to '{1}'".
 178                     format(join(tmp_dir, data_file), new_name)))
 179         rename(join(tmp_dir, data_file), new_name)
 180         build["file-name"] = new_name
 181         return True
 182     except (BadZipfile, RuntimeError) as err:
 183         log.append(("ERROR", "Failed to unzip the file '{0}': {1}.".
 184                     format(file_name, str(err))))
 185         return False
 186     except OSError as err:
 187         log.append(("ERROR", "Failed to rename the file '{0}': {1}.".
 188                     format(data_file, str(err))))
 189         return False
 190
 191
 192 def download_and_unzip_data_file(spec, job, build, pid, log):
 193     """Download and unzip a source file.
 194
 195     :param spec: Specification read form the specification file.
 196     :param job: Name of the Jenkins job.
 197     :param build: Information about the build.
 198     :param pid: PID of the process executing this method.
 199     :param log: List of log messages.
 200     :type spec: Specification
 201     :type job: str
 202     :type build: dict
 203     :type pid: int
 204     :type log: list of tuples (severity, msg)
 205     :returns: True if the download was successful, otherwise False.
 206     :rtype: bool
 207     """
 208
 209     # Try to download .gz from logs.fd.io
 210
 211     file_name = spec.input["file-name"]
 212     url = "{0}/{1}".format(
 213         spec.environment["urls"]["URL[NEXUS,LOG]"],
 214         spec.input["download-path"].format(
 215             job=job, build=build["build"], filename=file_name))
 216     new_name = join(spec.environment["paths"]["DIR[WORKING,DATA]"],
 217                     "{job}{sep}{build}{sep}{name}".format(
 218                         job=job, sep=SEPARATOR, build=build["build"],
 219                         name=file_name))
 220
 221     logging.info("Downloading {0}".format(url))
 222
 223     arch = True if spec.configuration.get("archive-inputs", True) else False
 224     success, downloaded_name = _download_file(url, new_name, log, arch=arch)
 225
 226     if not success:
 227
 228         # Try to download .gz from docs.fd.io
 229
 230         file_name = spec.input["file-name"]
 231         url = "{0}/{1}".format(
 232             spec.environment["urls"]["URL[NEXUS,DOC]"],
 233             spec.input["download-path"].format(
 234                 job=job, build=build["build"], filename=file_name))
 235         new_name = join(spec.environment["paths"]["DIR[WORKING,DATA]"],
 236                         "{job}{sep}{build}{sep}{name}".format(
 237                             job=job, sep=SEPARATOR, build=build["build"],
 238                             name=file_name))
 239
 240         logging.info("Downloading {0}".format(url))
 241
 242         if spec.configuration.get("archive-inputs", True):
 243             arch = True
 244         success, downloaded_name = _download_file(url, new_name, log, arch=arch)
 245
 246     if not success:
 247
 248         # Try to download .zip from docs.fd.io
 249
 250         file_name = spec.input["zip-file-name"]
 251         new_name = join(spec.environment["paths"]["DIR[WORKING,DATA]"],
 252                         "{job}{sep}{build}{sep}{name}".format(
 253                             job=job, sep=SEPARATOR, build=build["build"],
 254                             name=file_name))
 255         release = re.search(REGEX_RELEASE, job).group(2)
 256         for rls in (release, "master"):
 257             nexus_file_name = "{job}{sep}{build}{sep}{name}". \
 258                 format(job=job, sep=SEPARATOR, build=build["build"],
 259                        name=file_name)
 260             try:
 261                 rls = "rls{0}".format(int(rls))
 262             except ValueError:
 263                 # It is 'master'
 264                 pass
 265             url = "{url}/{release}/{dir}/{file}". \
 266                 format(url=spec.environment["urls"]["URL[NEXUS,DOC]"],
 267                        release=rls,
 268                        dir=spec.environment["urls"]["DIR[NEXUS,DOC]"],
 269                        file=nexus_file_name)
 270
 271             logging.info("Downloading {0}".format(url))
 272
 273             success, downloaded_name = _download_file(url, new_name, log)
 274             if success:
 275                 break
 276
 277     if not success:
 278
 279         # Try to download .zip from jenkins.fd.io
 280
 281         file_name = spec.input["zip-file-name"]
 282         download_path = spec.input["zip-download-path"]
 283         if job.startswith("csit-"):
 284             url = spec.environment["urls"]["URL[JENKINS,CSIT]"]
 285         elif job.startswith("hc2vpp-"):
 286             url = spec.environment["urls"]["URL[JENKINS,HC]"]
 287         else:
 288             raise PresentationError(
 289                 "No url defined for the job '{}'.".format(job))
 290
 291         full_name = download_path.format(
 292             job=job, build=build["build"], filename=file_name)
 293         url = "{0}/{1}".format(url, full_name)
 294         new_name = join(spec.environment["paths"]["DIR[WORKING,DATA]"],
 295                         "{job}{sep}{build}{sep}{name}".
 296                         format(job=job, sep=SEPARATOR, build=build["build"],
 297                                name=file_name))
 298
 299         logging.info("Downloading {0}".format(url))
 300
 301         success, downloaded_name = _download_file(url, new_name, log)
 302
 303     if success and downloaded_name.endswith(".zip"):
 304         if not is_zipfile(downloaded_name):
 305             log.append(("ERROR",
 306                         "Zip file '{0}' is corrupted.".format(new_name)))
 307             success = False
 308
 309     if success:
 310         build["file-name"] = downloaded_name
 311
 312         if file_name.endswith(".gz"):
 313             build["file-name"] = downloaded_name[:-3]
 314
 315         if downloaded_name.endswith(".zip"):
 316             success = _unzip_file(spec, build, pid, log)
 317
 318     return success