resources/tools/presentation/input_data_files.py

   1 # Copyright (c) 2018 Cisco and/or its affiliates.
   2 # Licensed under the Apache License, Version 2.0 (the "License");
   3 # you may not use this file except in compliance with the License.
   4 # You may obtain a copy of the License at:
   5 #
   6 #     http://www.apache.org/licenses/LICENSE-2.0
   7 #
   8 # Unless required by applicable law or agreed to in writing, software
   9 # distributed under the License is distributed on an "AS IS" BASIS,
  10 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  11 # See the License for the specific language governing permissions and
  12 # limitations under the License.
  13
  14 """Inputs
  15 Download all data.
  16 """
  17
  18 import re
  19 import requests
  20 import logging
  21
  22 from os import rename, mkdir
  23 from os.path import join
  24 from zipfile import ZipFile, is_zipfile, BadZipfile
  25 from httplib import responses
  26 from requests.adapters import HTTPAdapter
  27 from requests.packages.urllib3.util.retry import Retry
  28 from requests import codes, RequestException, Timeout, TooManyRedirects, \
  29     HTTPError, ConnectionError
  30
  31 from errors import PresentationError
  32
  33
  34 # Chunk size used for file download
  35 CHUNK_SIZE = 512
  36
  37 # Separator used in file names
  38 SEPARATOR = "__"
  39
  40 REGEX_RELEASE = re.compile(r'(\D*)(\d{4}|master)(\D*)')
  41
  42
  43 def _download_file(url, file_name, log, arch=False):
  44     """Download a file with input data.
  45
  46     :param url: URL to the file to download.
  47     :param file_name: Name of file to download.
  48     :param log: List of log messages.
  49     :param arch: If True, also .gz file is downloaded
  50     :type url: str
  51     :type file_name: str
  52     :type log: list of tuples (severity, msg)
  53     :type arch: bool
  54     :returns: True if the download was successful, otherwise False.
  55     :rtype: bool
  56     """
  57
  58     def requests_retry_session(retries=3,
  59                                backoff_factor=0.3,
  60                                status_forcelist=(500, 502, 504)):
  61         """
  62
  63         :param retries: Total number of retries to allow.
  64         :param backoff_factor: A backoff factor to apply between attempts after
  65             the second try.
  66         :param status_forcelist: A set of integer HTTP status codes that are
  67             forced to retry.
  68         :type retries: int
  69         :type backoff_factor: float
  70         :type status_forcelist: iterable
  71         :returns: Session object.
  72         :rtype: requests.Session
  73         """
  74
  75         retry = Retry(
  76             total=retries,
  77             read=retries,
  78             connect=retries,
  79             backoff_factor=backoff_factor,
  80             status_forcelist=status_forcelist,
  81         )
  82         adapter = HTTPAdapter(max_retries=retry)
  83         session = requests.Session()
  84         session.mount('http://', adapter)
  85         session.mount('https://', adapter)
  86         return session
  87
  88     success = False
  89     session = None
  90     try:
  91         log.append(("INFO", "    Connecting to '{0}' ...".format(url)))
  92         session = requests_retry_session()
  93         response = session.get(url, stream=True)
  94         code = response.status_code
  95         log.append(("INFO", "    {0}: {1}".format(code, responses[code])))
  96
  97         if code != codes["OK"]:
  98             if session:
  99                 session.close()
 100             url = url.replace("_info", "")
 101             log.append(("INFO", "    Connecting to '{0}' ...".format(url)))
 102             session = requests_retry_session()
 103             response = session.get(url, stream=True)
 104             code = response.status_code
 105             log.append(("INFO", "    {0}: {1}".format(code, responses[code])))
 106             if code != codes["OK"]:
 107                 return False, file_name
 108             file_name = file_name.replace("_info", "")
 109
 110         dst_file_name = file_name.replace(".gz", "")
 111         log.append(("INFO", "    Downloading the file '{0}' to '{1}' ...".
 112                     format(url, dst_file_name)))
 113         with open(dst_file_name, "wb") as file_handle:
 114             for chunk in response.iter_content(chunk_size=CHUNK_SIZE):
 115                 if chunk:
 116                     file_handle.write(chunk)
 117
 118         if arch and ".gz" in file_name:
 119             if session:
 120                 session.close()
 121             log.append(("INFO", "    Downloading the file '{0}' to '{1}' ...".
 122                         format(url, file_name)))
 123             session = requests_retry_session()
 124             response = session.get(url, stream=True)
 125             if response.status_code == codes["OK"]:
 126                 with open(file_name, "wb") as file_handle:
 127                     file_handle.write(response.raw.read())
 128             else:
 129                 log.append(("ERROR", "Not possible to download the file '{0}' "
 130                                      "to '{1}' ...".format(url, file_name)))
 131
 132         success = True
 133     except ConnectionError as err:
 134         log.append(("ERROR", "Not possible to connect to '{0}'.".format(url)))
 135         log.append(("DEBUG", repr(err)))
 136     except HTTPError as err:
 137         log.append(("ERROR", "Invalid HTTP response from '{0}'.".format(url)))
 138         log.append(("DEBUG", repr(err)))
 139     except TooManyRedirects as err:
 140         log.append(("ERROR", "Request exceeded the configured number "
 141                              "of maximum re-directions."))
 142         log.append(("DEBUG", repr(err)))
 143     except Timeout as err:
 144         log.append(("ERROR", "Request timed out."))
 145         log.append(("DEBUG", repr(err)))
 146     except RequestException as err:
 147         log.append(("ERROR", "Unexpected HTTP request exception."))
 148         log.append(("DEBUG", repr(err)))
 149     except (IOError, ValueError, KeyError) as err:
 150         log.append(("ERROR", "Download failed."))
 151         log.append(("DEBUG", repr(err)))
 152     finally:
 153         if session:
 154             session.close()
 155
 156     log.append(("INFO", "    Download finished."))
 157     return success, file_name
 158
 159
 160 def _unzip_file(spec, build, pid, log):
 161     """Unzip downloaded source file.
 162
 163     :param spec: Specification read form the specification file.
 164     :param build: Information about the build.
 165     :param log: List of log messages.
 166     :type spec: Specification
 167     :type build: dict
 168     :type log: list of tuples (severity, msg)
 169     :returns: True if the download was successful, otherwise False.
 170     :rtype: bool
 171     """
 172
 173     file_name = build["file-name"]
 174     if ".zip" in file_name:
 175         data_file = spec.input["zip-extract"]
 176     else:
 177         data_file = spec.input["extract"]
 178
 179     directory = spec.environment["paths"]["DIR[WORKING,DATA]"]
 180     tmp_dir = join(directory, str(pid))
 181     try:
 182         mkdir(tmp_dir)
 183     except OSError:
 184         pass
 185     new_name = "{0}{1}{2}".format(file_name.rsplit('.')[-2],
 186                                   SEPARATOR,
 187                                   data_file.split("/")[-1])
 188
 189     log.append(("INFO", "    Unzipping: '{0}' from '{1}'.".
 190                 format(data_file, file_name)))
 191     try:
 192         with ZipFile(file_name, 'r') as zip_file:
 193             zip_file.extract(data_file, tmp_dir)
 194         log.append(("INFO", "    Renaming the file '{0}' to '{1}'".
 195                     format(join(tmp_dir, data_file), new_name)))
 196         rename(join(tmp_dir, data_file), new_name)
 197         build["file-name"] = new_name
 198         return True
 199     except (BadZipfile, RuntimeError) as err:
 200         log.append(("ERROR", "Failed to unzip the file '{0}': {1}.".
 201                     format(file_name, str(err))))
 202         return False
 203     except OSError as err:
 204         log.append(("ERROR", "Failed to rename the file '{0}': {1}.".
 205                     format(data_file, str(err))))
 206         return False
 207
 208
 209 def download_and_unzip_data_file(spec, job, build, pid, log):
 210     """Download and unzip a source file.
 211
 212     :param spec: Specification read form the specification file.
 213     :param job: Name of the Jenkins job.
 214     :param build: Information about the build.
 215     :param pid: PID of the process executing this method.
 216     :param log: List of log messages.
 217     :type spec: Specification
 218     :type job: str
 219     :type build: dict
 220     :type pid: int
 221     :type log: list of tuples (severity, msg)
 222     :returns: True if the download was successful, otherwise False.
 223     :rtype: bool
 224     """
 225
 226     # Try to download .gz from logs.fd.io
 227
 228     file_name = spec.input["file-name"]
 229     url = "{0}/{1}".format(
 230         spec.environment["urls"]["URL[NEXUS,LOG]"],
 231         spec.input["download-path"].format(
 232             job=job, build=build["build"], filename=file_name))
 233     new_name = join(spec.environment["paths"]["DIR[WORKING,DATA]"],
 234                     "{job}{sep}{build}{sep}{name}".format(
 235                         job=job, sep=SEPARATOR, build=build["build"],
 236                         name=file_name))
 237
 238     logging.info("Trying to download {0}".format(url))
 239
 240     arch = True if spec.configuration.get("archive-inputs", True) else False
 241     success, downloaded_name = _download_file(url, new_name, log, arch=arch)
 242
 243     if not success:
 244
 245         # Try to download .gz from docs.fd.io
 246
 247         file_name = spec.input["file-name"]
 248         url = "{0}/{1}".format(
 249             spec.environment["urls"]["URL[NEXUS,DOC]"],
 250             spec.input["download-path"].format(
 251                 job=job, build=build["build"], filename=file_name))
 252         new_name = join(spec.environment["paths"]["DIR[WORKING,DATA]"],
 253                         "{job}{sep}{build}{sep}{name}".format(
 254                             job=job, sep=SEPARATOR, build=build["build"],
 255                             name=file_name))
 256
 257         logging.info("Downloading {0}".format(url))
 258
 259         if spec.configuration.get("archive-inputs", True):
 260             arch = True
 261         success, downloaded_name = _download_file(url, new_name, log, arch=arch)
 262
 263     if not success:
 264
 265         # Try to download .zip from docs.fd.io
 266
 267         file_name = spec.input["zip-file-name"]
 268         new_name = join(spec.environment["paths"]["DIR[WORKING,DATA]"],
 269                         "{job}{sep}{build}{sep}{name}".format(
 270                             job=job, sep=SEPARATOR, build=build["build"],
 271                             name=file_name))
 272         release = re.search(REGEX_RELEASE, job).group(2)
 273         for rls in (release, "master"):
 274             nexus_file_name = "{job}{sep}{build}{sep}{name}". \
 275                 format(job=job, sep=SEPARATOR, build=build["build"],
 276                        name=file_name)
 277             try:
 278                 rls = "rls{0}".format(int(rls))
 279             except ValueError:
 280                 # It is 'master'
 281                 pass
 282             url = "{url}/{release}/{dir}/{file}". \
 283                 format(url=spec.environment["urls"]["URL[NEXUS,DOC]"],
 284                        release=rls,
 285                        dir=spec.environment["urls"]["DIR[NEXUS,DOC]"],
 286                        file=nexus_file_name)
 287
 288             logging.info("Downloading {0}".format(url))
 289
 290             success, downloaded_name = _download_file(url, new_name, log)
 291             if success:
 292                 break
 293
 294     if not success:
 295
 296         # Try to download .zip from jenkins.fd.io
 297
 298         file_name = spec.input["zip-file-name"]
 299         download_path = spec.input["zip-download-path"]
 300         if job.startswith("csit-"):
 301             url = spec.environment["urls"]["URL[JENKINS,CSIT]"]
 302         elif job.startswith("hc2vpp-"):
 303             url = spec.environment["urls"]["URL[JENKINS,HC]"]
 304         else:
 305             raise PresentationError(
 306                 "No url defined for the job '{}'.".format(job))
 307
 308         full_name = download_path.format(
 309             job=job, build=build["build"], filename=file_name)
 310         url = "{0}/{1}".format(url, full_name)
 311         new_name = join(spec.environment["paths"]["DIR[WORKING,DATA]"],
 312                         "{job}{sep}{build}{sep}{name}".
 313                         format(job=job, sep=SEPARATOR, build=build["build"],
 314                                name=file_name))
 315
 316         logging.info("Downloading {0}".format(url))
 317
 318         success, downloaded_name = _download_file(url, new_name, log)
 319
 320     if success and downloaded_name.endswith(".zip"):
 321         if not is_zipfile(downloaded_name):
 322             log.append(("ERROR",
 323                         "Zip file '{0}' is corrupted.".format(new_name)))
 324             success = False
 325
 326     if success:
 327         build["file-name"] = downloaded_name
 328
 329         if file_name.endswith(".gz"):
 330             build["file-name"] = downloaded_name[:-3]
 331
 332         if downloaded_name.endswith(".zip"):
 333             success = _unzip_file(spec, build, pid, log)
 334
 335     return success