resources/tools/presentation/input_data_files.py

   1 # Copyright (c) 2018 Cisco and/or its affiliates.
   2 # Licensed under the Apache License, Version 2.0 (the "License");
   3 # you may not use this file except in compliance with the License.
   4 # You may obtain a copy of the License at:
   5 #
   6 #     http://www.apache.org/licenses/LICENSE-2.0
   7 #
   8 # Unless required by applicable law or agreed to in writing, software
   9 # distributed under the License is distributed on an "AS IS" BASIS,
  10 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  11 # See the License for the specific language governing permissions and
  12 # limitations under the License.
  13
  14 """Inputs
  15 Download all data.
  16 """
  17
  18 import re
  19 import requests
  20 import logging
  21
  22 from os import rename, mkdir
  23 from os.path import join
  24 from zipfile import ZipFile, is_zipfile, BadZipfile
  25 from httplib import responses
  26 from requests.adapters import HTTPAdapter
  27 from requests.packages.urllib3.util.retry import Retry
  28 from requests import codes, RequestException, Timeout, TooManyRedirects, \
  29     HTTPError, ConnectionError
  30
  31 from errors import PresentationError
  32
  33
  34 # Chunk size used for file download
  35 CHUNK_SIZE = 512
  36
  37 # Separator used in file names
  38 SEPARATOR = "__"
  39
  40 REGEX_RELEASE = re.compile(r'(\D*)(\d{4}|master)(\D*)')
  41
  42
  43 def _download_file(url, file_name, log, arch=False):
  44     """Download a file with input data.
  45
  46     :param url: URL to the file to download.
  47     :param file_name: Name of file to download.
  48     :param log: List of log messages.
  49     :param arch: If True, also .gz file is downloaded
  50     :type url: str
  51     :type file_name: str
  52     :type log: list of tuples (severity, msg)
  53     :type arch: bool
  54     :returns: True if the download was successful, otherwise False.
  55     :rtype: bool
  56     """
  57
  58     def requests_retry_session(retries=3,
  59                                backoff_factor=0.3,
  60                                status_forcelist=(500, 502, 504)):
  61         """
  62
  63         :param retries: Total number of retries to allow.
  64         :param backoff_factor: A backoff factor to apply between attempts after
  65             the second try.
  66         :param status_forcelist: A set of integer HTTP status codes that are
  67             forced to retry.
  68         :type retries: int
  69         :type backoff_factor: float
  70         :type status_forcelist: iterable
  71         :returns: Session object.
  72         :rtype: requests.Session
  73         """
  74
  75         retry = Retry(
  76             total=retries,
  77             read=retries,
  78             connect=retries,
  79             backoff_factor=backoff_factor,
  80             status_forcelist=status_forcelist,
  81         )
  82         adapter = HTTPAdapter(max_retries=retry)
  83         session = requests.Session()
  84         session.mount('http://', adapter)
  85         session.mount('https://', adapter)
  86         return session
  87
  88     success = False
  89     try:
  90         log.append(("INFO", "    Connecting to '{0}' ...".format(url)))
  91         session = requests_retry_session()
  92         response = session.get(url, stream=True)
  93         code = response.status_code
  94         log.append(("INFO", "    {0}: {1}".format(code, responses[code])))
  95
  96         if code != codes["OK"]:
  97             if session:
  98                 session.close()
  99             url = url.replace("_info", "")
 100             log.append(("INFO", "    Connecting to '{0}' ...".format(url)))
 101             session = requests_retry_session()
 102             response = session.get(url, stream=True)
 103             code = response.status_code
 104             log.append(("INFO", "    {0}: {1}".format(code, responses[code])))
 105             if code != codes["OK"]:
 106                 return False, file_name
 107             file_name = file_name.replace("_info", "")
 108
 109         dst_file_name = file_name.replace(".gz", "")
 110         log.append(("INFO", "    Downloading the file '{0}' to '{1}' ...".
 111                     format(url, dst_file_name)))
 112         with open(dst_file_name, "wb") as file_handle:
 113             for chunk in response.iter_content(chunk_size=CHUNK_SIZE):
 114                 if chunk:
 115                     file_handle.write(chunk)
 116
 117         if arch and ".gz" in file_name:
 118             if session:
 119                 session.close()
 120             log.append(("INFO", "    Downloading the file '{0}' to '{1}' ...".
 121                         format(url, file_name)))
 122             session = requests_retry_session()
 123             response = session.get(url, stream=True)
 124             if response.status_code == codes["OK"]:
 125                 with open(file_name, "wb") as file_handle:
 126                     file_handle.write(response.raw.read())
 127             else:
 128                 log.append(("ERROR", "Not possible to download the file '{0}' "
 129                                      "to '{1}' ...".format(url, file_name)))
 130
 131         success = True
 132     except ConnectionError as err:
 133         log.append(("ERROR", "Not possible to connect to '{0}'.".format(url)))
 134         log.append(("DEBUG", repr(err)))
 135     except HTTPError as err:
 136         log.append(("ERROR", "Invalid HTTP response from '{0}'.".format(url)))
 137         log.append(("DEBUG", repr(err)))
 138     except TooManyRedirects as err:
 139         log.append(("ERROR", "Request exceeded the configured number "
 140                              "of maximum re-directions."))
 141         log.append(("DEBUG", repr(err)))
 142     except Timeout as err:
 143         log.append(("ERROR", "Request timed out."))
 144         log.append(("DEBUG", repr(err)))
 145     except RequestException as err:
 146         log.append(("ERROR", "Unexpected HTTP request exception."))
 147         log.append(("DEBUG", repr(err)))
 148     except (IOError, ValueError, KeyError) as err:
 149         log.append(("ERROR", "Download failed."))
 150         log.append(("DEBUG", repr(err)))
 151     finally:
 152         if session:
 153             session.close()
 154
 155     log.append(("INFO", "    Download finished."))
 156     return success, file_name
 157
 158
 159 def _unzip_file(spec, build, pid, log):
 160     """Unzip downloaded source file.
 161
 162     :param spec: Specification read form the specification file.
 163     :param build: Information about the build.
 164     :param log: List of log messages.
 165     :type spec: Specification
 166     :type build: dict
 167     :type log: list of tuples (severity, msg)
 168     :returns: True if the download was successful, otherwise False.
 169     :rtype: bool
 170     """
 171
 172     file_name = build["file-name"]
 173     if ".zip" in file_name:
 174         data_file = spec.input["zip-extract"]
 175     else:
 176         data_file = spec.input["extract"]
 177
 178     directory = spec.environment["paths"]["DIR[WORKING,DATA]"]
 179     tmp_dir = join(directory, str(pid))
 180     try:
 181         mkdir(tmp_dir)
 182     except OSError:
 183         pass
 184     new_name = "{0}{1}{2}".format(file_name.rsplit('.')[-2],
 185                                   SEPARATOR,
 186                                   data_file.split("/")[-1])
 187
 188     log.append(("INFO", "    Unzipping: '{0}' from '{1}'.".
 189                 format(data_file, file_name)))
 190     try:
 191         with ZipFile(file_name, 'r') as zip_file:
 192             zip_file.extract(data_file, tmp_dir)
 193         log.append(("INFO", "    Renaming the file '{0}' to '{1}'".
 194                     format(join(tmp_dir, data_file), new_name)))
 195         rename(join(tmp_dir, data_file), new_name)
 196         build["file-name"] = new_name
 197         return True
 198     except (BadZipfile, RuntimeError) as err:
 199         log.append(("ERROR", "Failed to unzip the file '{0}': {1}.".
 200                     format(file_name, str(err))))
 201         return False
 202     except OSError as err:
 203         log.append(("ERROR", "Failed to rename the file '{0}': {1}.".
 204                     format(data_file, str(err))))
 205         return False
 206
 207
 208 def download_and_unzip_data_file(spec, job, build, pid, log):
 209     """Download and unzip a source file.
 210
 211     :param spec: Specification read form the specification file.
 212     :param job: Name of the Jenkins job.
 213     :param build: Information about the build.
 214     :param pid: PID of the process executing this method.
 215     :param log: List of log messages.
 216     :type spec: Specification
 217     :type job: str
 218     :type build: dict
 219     :type pid: int
 220     :type log: list of tuples (severity, msg)
 221     :returns: True if the download was successful, otherwise False.
 222     :rtype: bool
 223     """
 224
 225     # Try to download .gz from logs.fd.io
 226
 227     file_name = spec.input["file-name"]
 228     url = "{0}/{1}".format(
 229         spec.environment["urls"]["URL[NEXUS,LOG]"],
 230         spec.input["download-path"].format(
 231             job=job, build=build["build"], filename=file_name))
 232     new_name = join(spec.environment["paths"]["DIR[WORKING,DATA]"],
 233                     "{job}{sep}{build}{sep}{name}".format(
 234                         job=job, sep=SEPARATOR, build=build["build"],
 235                         name=file_name))
 236
 237     logging.info("Trying to download {0}".format(url))
 238
 239     arch = True if spec.configuration.get("archive-inputs", True) else False
 240     success, downloaded_name = _download_file(url, new_name, log, arch=arch)
 241
 242     if not success:
 243
 244         # Try to download .gz from docs.fd.io
 245
 246         file_name = spec.input["file-name"]
 247         url = "{0}/{1}".format(
 248             spec.environment["urls"]["URL[NEXUS,DOC]"],
 249             spec.input["download-path"].format(
 250                 job=job, build=build["build"], filename=file_name))
 251         new_name = join(spec.environment["paths"]["DIR[WORKING,DATA]"],
 252                         "{job}{sep}{build}{sep}{name}".format(
 253                             job=job, sep=SEPARATOR, build=build["build"],
 254                             name=file_name))
 255
 256         logging.info("Downloading {0}".format(url))
 257
 258         if spec.configuration.get("archive-inputs", True):
 259             arch = True
 260         success, downloaded_name = _download_file(url, new_name, log, arch=arch)
 261
 262     if not success:
 263
 264         # Try to download .zip from docs.fd.io
 265
 266         file_name = spec.input["zip-file-name"]
 267         new_name = join(spec.environment["paths"]["DIR[WORKING,DATA]"],
 268                         "{job}{sep}{build}{sep}{name}".format(
 269                             job=job, sep=SEPARATOR, build=build["build"],
 270                             name=file_name))
 271         release = re.search(REGEX_RELEASE, job).group(2)
 272         for rls in (release, "master"):
 273             nexus_file_name = "{job}{sep}{build}{sep}{name}". \
 274                 format(job=job, sep=SEPARATOR, build=build["build"],
 275                        name=file_name)
 276             try:
 277                 rls = "rls{0}".format(int(rls))
 278             except ValueError:
 279                 # It is 'master'
 280                 pass
 281             url = "{url}/{release}/{dir}/{file}". \
 282                 format(url=spec.environment["urls"]["URL[NEXUS,DOC]"],
 283                        release=rls,
 284                        dir=spec.environment["urls"]["DIR[NEXUS,DOC]"],
 285                        file=nexus_file_name)
 286
 287             logging.info("Downloading {0}".format(url))
 288
 289             success, downloaded_name = _download_file(url, new_name, log)
 290             if success:
 291                 break
 292
 293     if not success:
 294
 295         # Try to download .zip from jenkins.fd.io
 296
 297         file_name = spec.input["zip-file-name"]
 298         download_path = spec.input["zip-download-path"]
 299         if job.startswith("csit-"):
 300             url = spec.environment["urls"]["URL[JENKINS,CSIT]"]
 301         elif job.startswith("hc2vpp-"):
 302             url = spec.environment["urls"]["URL[JENKINS,HC]"]
 303         else:
 304             raise PresentationError(
 305                 "No url defined for the job '{}'.".format(job))
 306
 307         full_name = download_path.format(
 308             job=job, build=build["build"], filename=file_name)
 309         url = "{0}/{1}".format(url, full_name)
 310         new_name = join(spec.environment["paths"]["DIR[WORKING,DATA]"],
 311                         "{job}{sep}{build}{sep}{name}".
 312                         format(job=job, sep=SEPARATOR, build=build["build"],
 313                                name=file_name))
 314
 315         logging.info("Downloading {0}".format(url))
 316
 317         success, downloaded_name = _download_file(url, new_name, log)
 318
 319     if success and downloaded_name.endswith(".zip"):
 320         if not is_zipfile(downloaded_name):
 321             log.append(("ERROR",
 322                         "Zip file '{0}' is corrupted.".format(new_name)))
 323             success = False
 324
 325     if success:
 326         build["file-name"] = downloaded_name
 327
 328         if file_name.endswith(".gz"):
 329             build["file-name"] = downloaded_name[:-3]
 330
 331         if downloaded_name.endswith(".zip"):
 332             success = _unzip_file(spec, build, pid, log)
 333
 334     return success