resources/tools/presentation/input_data_files.py

   1 # Copyright (c) 2018 Cisco and/or its affiliates.
   2 # Licensed under the Apache License, Version 2.0 (the "License");
   3 # you may not use this file except in compliance with the License.
   4 # You may obtain a copy of the License at:
   5 #
   6 #     http://www.apache.org/licenses/LICENSE-2.0
   7 #
   8 # Unless required by applicable law or agreed to in writing, software
   9 # distributed under the License is distributed on an "AS IS" BASIS,
  10 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  11 # See the License for the specific language governing permissions and
  12 # limitations under the License.
  13
  14 """Inputs
  15 Download all data.
  16 """
  17
  18 import re
  19 import logging
  20
  21 from os import rename, mkdir
  22 from os.path import join
  23 from http.client import responses
  24 from zipfile import ZipFile, is_zipfile, BadZipfile
  25
  26 import requests
  27
  28 from requests.adapters import HTTPAdapter, Retry
  29 from requests.exceptions import RequestException
  30 from requests import codes
  31
  32 from pal_errors import PresentationError
  33
  34
  35 # Chunk size used for file download
  36 CHUNK_SIZE = 512
  37
  38 # Separator used in file names
  39 SEPARATOR = u"__"
  40
  41 REGEX_RELEASE = re.compile(r'(\D*)(\d{4}|master)(\D*)')
  42
  43
  44 def _download_file(url, file_name, log, arch=False):
  45     """Download a file with input data.
  46
  47     :param url: URL to the file to download.
  48     :param file_name: Name of file to download.
  49     :param log: List of log messages.
  50     :param arch: If True, also .gz file is downloaded
  51     :type url: str
  52     :type file_name: str
  53     :type log: list of tuples (severity, msg)
  54     :type arch: bool
  55     :returns: True if the download was successful, otherwise False.
  56     :rtype: bool
  57     """
  58
  59     def requests_retry_session(retries=3,
  60                                backoff_factor=0.3,
  61                                status_forcelist=(500, 502, 504)):
  62         """
  63
  64         :param retries: Total number of retries to allow.
  65         :param backoff_factor: A backoff factor to apply between attempts after
  66             the second try.
  67         :param status_forcelist: A set of integer HTTP status codes that are
  68             forced to retry.
  69         :type retries: int
  70         :type backoff_factor: float
  71         :type status_forcelist: iterable
  72         :returns: Session object.
  73         :rtype: requests.Session
  74         """
  75
  76         retry = Retry(
  77             total=retries,
  78             read=retries,
  79             connect=retries,
  80             backoff_factor=backoff_factor,
  81             status_forcelist=status_forcelist,
  82         )
  83         adapter = HTTPAdapter(max_retries=retry)
  84         session = requests.Session()
  85         session.mount(u"http://", adapter)
  86         session.mount(u"https://", adapter)
  87         return session
  88
  89     success = False
  90     session = None
  91     try:
  92         log.append((u"INFO", f"    Connecting to {url} ..."))
  93         session = requests_retry_session()
  94         response = session.get(url, stream=True)
  95         code = response.status_code
  96         log.append((u"INFO", f"    {code}: {responses[code]}"))
  97
  98         if code != codes[u"OK"]:
  99             if session:
 100                 session.close()
 101             url = url.replace(u"_info", u"")
 102             log.append((u"INFO", f"    Connecting to {url} ..."))
 103             session = requests_retry_session()
 104             response = session.get(url, stream=True)
 105             code = response.status_code
 106             log.append((u"INFO", f"    {code}: {responses[code]}"))
 107             if code != codes[u"OK"]:
 108                 return False, file_name
 109             file_name = file_name.replace(u"_info", u"")
 110
 111         dst_file_name = file_name.replace(u".gz", u"")
 112         log.append(
 113             (u"INFO", f"    Downloading the file {url} to {dst_file_name} ...")
 114         )
 115         with open(dst_file_name, u"wb") as file_handle:
 116             for chunk in response.iter_content(chunk_size=CHUNK_SIZE):
 117                 if chunk:
 118                     file_handle.write(chunk)
 119
 120         if arch and u".gz" in file_name:
 121             if session:
 122                 session.close()
 123             log.append(
 124                 (u"INFO", f"    Downloading the file {url} to {file_name} ...")
 125             )
 126             session = requests_retry_session()
 127             response = session.get(url, stream=True)
 128             if response.status_code == codes[u"OK"]:
 129                 with open(file_name, u"wb") as file_handle:
 130                     file_handle.write(response.raw.read())
 131             else:
 132                 log.append(
 133                     (u"ERROR", f"Not possible to download the file {url} to "
 134                                f"{file_name} ...")
 135                 )
 136
 137         success = True
 138     except RequestException as err:
 139         log.append(
 140             (u"ERROR", f"HTTP Request exception:\n{repr(err)}")
 141         )
 142     except (IOError, ValueError, KeyError) as err:
 143         log.append((u"ERROR", f"Download failed.\n{repr(err)}"))
 144     finally:
 145         if session:
 146             session.close()
 147
 148     log.append((u"INFO", u"    Download finished."))
 149     return success, file_name
 150
 151
 152 def _unzip_file(spec, build, pid, log):
 153     """Unzip downloaded source file.
 154
 155     :param spec: Specification read form the specification file.
 156     :param build: Information about the build.
 157     :param log: List of log messages.
 158     :type spec: Specification
 159     :type build: dict
 160     :type log: list of tuples (severity, msg)
 161     :returns: True if the download was successful, otherwise False.
 162     :rtype: bool
 163     """
 164
 165     file_name = build[u"file-name"]
 166     if u".zip" in file_name:
 167         data_file = spec.input[u"zip-extract"]
 168     else:
 169         data_file = spec.input[u"extract"]
 170
 171     directory = spec.environment[u"paths"][u"DIR[WORKING,DATA]"]
 172     tmp_dir = join(directory, str(pid))
 173     try:
 174         mkdir(tmp_dir)
 175     except OSError:
 176         pass
 177     new_name = \
 178         f"{file_name.rsplit(u'.')[-2]}{SEPARATOR}{data_file.split(u'/')[-1]}"
 179
 180     log.append((u"INFO", f"    Unzipping: {data_file} from {file_name}."))
 181     try:
 182         with ZipFile(file_name, u'r') as zip_file:
 183             zip_file.extract(data_file, tmp_dir)
 184         log.append(
 185             (u"INFO", f"    Renaming the file {join(tmp_dir, data_file)} to "
 186                       f"{new_name}")
 187         )
 188         rename(join(tmp_dir, data_file), new_name)
 189         build[u"file-name"] = new_name
 190         return True
 191     except (BadZipfile, RuntimeError) as err:
 192         log.append(
 193             (u"ERROR", f"Failed to unzip the file {file_name}: {repr(err)}.")
 194         )
 195         return False
 196     except OSError as err:
 197         log.append(
 198             (u"ERROR", f"Failed to rename the file {data_file}: {repr(err)}.")
 199         )
 200         return False
 201
 202
 203 def download_and_unzip_data_file(spec, job, build, pid, log):
 204     """Download and unzip a source file.
 205
 206     :param spec: Specification read form the specification file.
 207     :param job: Name of the Jenkins job.
 208     :param build: Information about the build.
 209     :param pid: PID of the process executing this method.
 210     :param log: List of log messages.
 211     :type spec: Specification
 212     :type job: str
 213     :type build: dict
 214     :type pid: int
 215     :type log: list of tuples (severity, msg)
 216     :returns: True if the download was successful, otherwise False.
 217     :rtype: bool
 218     """
 219
 220     # Try to download .gz from logs.fd.io
 221
 222     file_name = spec.input[u"file-name"]
 223     url = u"{0}/{1}".format(
 224         spec.environment[u'urls'][u'URL[NEXUS,LOG]'],
 225         spec.input[u'download-path'].format(
 226             job=job, build=build[u'build'], filename=file_name
 227         )
 228     )
 229     new_name = join(
 230         spec.environment[u"paths"][u"DIR[WORKING,DATA]"],
 231         f"{job}{SEPARATOR}{build[u'build']}{SEPARATOR}{file_name}"
 232     )
 233
 234     logging.info(f"Trying to download {url}")
 235
 236     arch = bool(spec.configuration.get(u"archive-inputs", True))
 237     success, downloaded_name = _download_file(url, new_name, log, arch=arch)
 238
 239     if not success:
 240
 241         # Try to download .gz from docs.fd.io
 242
 243         file_name = spec.input[u"file-name"]
 244         url = u"{0}/{1}".format(
 245             spec.environment[u"urls"][u"URL[NEXUS,DOC]"],
 246             spec.input[u"download-path"].format(
 247                 job=job, build=build[u"build"], filename=file_name
 248             )
 249         )
 250         new_name = join(
 251             spec.environment[u"paths"][u"DIR[WORKING,DATA]"],
 252             f"{job}{SEPARATOR}{build[u'build']}{SEPARATOR}{file_name}"
 253         )
 254
 255         logging.info(f"Downloading {url}")
 256
 257         success, downloaded_name = _download_file(url, new_name, log, arch=arch)
 258
 259     if not success:
 260
 261         # Try to download .zip from docs.fd.io
 262
 263         file_name = spec.input[u"zip-file-name"]
 264         new_name = join(
 265             spec.environment[u"paths"][u"DIR[WORKING,DATA]"],
 266             f"{job}{SEPARATOR}{build[u'build']}{SEPARATOR}{file_name}"
 267         )
 268         release = re.search(REGEX_RELEASE, job).group(2)
 269         for rls in (release, u"master"):
 270             nexus_file_name = \
 271                 f"{job}{SEPARATOR}{build[u'build']}{SEPARATOR}{file_name}"
 272             try:
 273                 rls = f"rls{int(rls)}"
 274             except ValueError:
 275                 # It is master
 276                 pass
 277             url = (
 278                 f"{spec.environment[u'urls'][u'URL[NEXUS,DOC]']}/"
 279                 f"{rls}/"
 280                 f"{spec.environment[u'urls'][u'DIR[NEXUS,DOC]']}/"
 281                 f"{nexus_file_name}"
 282             )
 283
 284             logging.info(f"Downloading {url}")
 285
 286             success, downloaded_name = _download_file(url, new_name, log)
 287             if success:
 288                 break
 289
 290     if not success:
 291
 292         # Try to download .zip from jenkins.fd.io
 293
 294         file_name = spec.input[u"zip-file-name"]
 295         download_path = spec.input[u"zip-download-path"]
 296         if job.startswith(u"csit-"):
 297             url = spec.environment[u"urls"][u"URL[JENKINS,CSIT]"]
 298         elif job.startswith(u"hc2vpp-"):
 299             url = spec.environment[u"urls"][u"URL[JENKINS,HC]"]
 300         else:
 301             raise PresentationError(f"No url defined for the job {job}.")
 302
 303         full_name = download_path.format(
 304             job=job, build=build[u"build"], filename=file_name
 305         )
 306         url = u"{0}/{1}".format(url, full_name)
 307         new_name = join(
 308             spec.environment[u"paths"][u"DIR[WORKING,DATA]"],
 309             f"{job}{SEPARATOR}{build[u'build']}{SEPARATOR}{file_name}"
 310         )
 311
 312         logging.info(f"Downloading {url}")
 313
 314         success, downloaded_name = _download_file(url, new_name, log)
 315
 316     if success and downloaded_name.endswith(u".zip"):
 317         if not is_zipfile(downloaded_name):
 318             log.append((u"ERROR", f"Zip file {new_name} is corrupted."))
 319             success = False
 320
 321     if success:
 322         build[u"file-name"] = downloaded_name
 323
 324         if file_name.endswith(u".gz"):
 325             build[u"file-name"] = downloaded_name[:-3]
 326
 327         if downloaded_name.endswith(u".zip"):
 328             success = _unzip_file(spec, build, pid, log)
 329
 330     return success