resources/tools/presentation/input_data_files.py

   1 # Copyright (c) 2020 Cisco and/or its affiliates.
   2 # Licensed under the Apache License, Version 2.0 (the "License");
   3 # you may not use this file except in compliance with the License.
   4 # You may obtain a copy of the License at:
   5 #
   6 #     http://www.apache.org/licenses/LICENSE-2.0
   7 #
   8 # Unless required by applicable law or agreed to in writing, software
   9 # distributed under the License is distributed on an "AS IS" BASIS,
  10 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  11 # See the License for the specific language governing permissions and
  12 # limitations under the License.
  13
  14 """Inputs
  15 Download all data.
  16 """
  17
  18 import re
  19 import logging
  20 import gzip
  21
  22 from os import rename, mkdir
  23 from os.path import join
  24 from http.client import responses, HTTPException
  25 from zipfile import ZipFile, is_zipfile, BadZipfile
  26
  27 import requests
  28
  29 from requests.adapters import HTTPAdapter, Retry
  30 from requests.exceptions import RequestException
  31 from requests import codes
  32
  33 from urllib3.exceptions import HTTPError
  34
  35 from pal_errors import PresentationError
  36
  37
  38 # Chunk size used for file download
  39 CHUNK_SIZE = 512
  40
  41 # Separator used in file names
  42 SEPARATOR = u"__"
  43
  44 REGEX_RELEASE = re.compile(r'(\D*)(\d{4}|master)(\D*)')
  45
  46
  47 def _download_file(url, file_name, arch=False, verify=True, repeat=1):
  48     """Download a file with input data.
  49
  50     :param url: URL to the file to download.
  51     :param file_name: Name of file to download.
  52     :param arch: If True, also .gz file is downloaded.
  53     :param verify: If true, verify the certificate.
  54     :param repeat: The number of attempts to download the file.
  55     :type url: str
  56     :type file_name: str
  57     :type arch: bool
  58     :type verify: bool
  59     :type repeat: int
  60     :returns: True if the download was successful, otherwise False.
  61     :rtype: bool
  62     """
  63
  64     def requests_retry_session(retries=3,
  65                                backoff_factor=0.3,
  66                                status_forcelist=(500, 502, 504)):
  67         """
  68
  69         :param retries: Total number of retries to allow.
  70         :param backoff_factor: A backoff factor to apply between attempts after
  71             the second try.
  72         :param status_forcelist: A set of integer HTTP status codes that are
  73             forced to retry.
  74         :type retries: int
  75         :type backoff_factor: float
  76         :type status_forcelist: iterable
  77         :returns: Session object.
  78         :rtype: requests.Session
  79         """
  80
  81         retry = Retry(
  82             total=retries,
  83             read=retries,
  84             connect=retries,
  85             backoff_factor=backoff_factor,
  86             status_forcelist=status_forcelist,
  87         )
  88         adapter = HTTPAdapter(max_retries=retry)
  89         session = requests.Session()
  90         session.mount(u"http://", adapter)
  91         session.mount(u"https://", adapter)
  92         return session
  93
  94     success = False
  95     while repeat:
  96         repeat -= 1
  97         session = None
  98         try:
  99             logging.info(f"    Connecting to {url} ...")
 100             session = requests_retry_session()
 101             response = session.get(url, stream=True, verify=verify)
 102             code = response.status_code
 103             logging.info(f"    {code}: {responses[code]}")
 104
 105             if code != codes[u"OK"]:
 106                 if session:
 107                     session.close()
 108                 url = url.replace(u"_info", u"")
 109                 logging.info(f"    Connecting to {url} ...")
 110                 session = requests_retry_session()
 111                 response = session.get(url, stream=True, verify=verify)
 112                 code = response.status_code
 113                 logging.info(f"    {code}: {responses[code]}")
 114                 if code != codes[u"OK"]:
 115                     return False, file_name
 116                 file_name = file_name.replace(u"_info", u"")
 117
 118             dst_file_name = file_name.replace(u".gz", u"")
 119             logging.info(f"    Downloading the file {url} to {dst_file_name}")
 120             with open(dst_file_name, u"wb") as file_handle:
 121                 for chunk in response.iter_content(chunk_size=CHUNK_SIZE):
 122                     if chunk:
 123                         file_handle.write(chunk)
 124
 125             if arch and u".gz" in file_name:
 126                 if session:
 127                     session.close()
 128                 logging.info(f"    Downloading the file {url} to {file_name}")
 129                 session = requests_retry_session()
 130                 response = session.get(url, stream=True, verify=verify)
 131                 if response.status_code == codes[u"OK"]:
 132                     with open(file_name, u"wb") as file_handle:
 133                         file_handle.write(response.raw.read())
 134                 else:
 135                     logging.error(
 136                         f"Not possible to download the file "
 137                         f"{url} to {file_name}"
 138                     )
 139
 140             success = True
 141             repeat = 0
 142         except (HTTPException, HTTPError) as err:
 143             logging.error(f"Connection broken:\n{repr(err)}")
 144         except RequestException as err:
 145             logging.error(f"HTTP Request exception:\n{repr(err)}")
 146         except (IOError, ValueError, KeyError) as err:
 147             logging.error(f"Download failed.\n{repr(err)}")
 148         finally:
 149             if session:
 150                 session.close()
 151
 152     logging.info(u"    Download finished.")
 153     return success, file_name
 154
 155
 156 def _unzip_file(spec, build, pid):
 157     """Unzip downloaded source file.
 158
 159     :param spec: Specification read form the specification file.
 160     :param build: Information about the build.
 161     :type spec: Specification
 162     :type build: dict
 163     :returns: True if the download was successful, otherwise False.
 164     :rtype: bool
 165     """
 166
 167     file_name = build[u"file-name"]
 168     if u".zip" in file_name:
 169         data_file = spec.input[u"zip-extract"]
 170     else:
 171         data_file = spec.input[u"extract"]
 172
 173     directory = spec.environment[u"paths"][u"DIR[WORKING,DATA]"]
 174     tmp_dir = join(directory, str(pid))
 175     try:
 176         mkdir(tmp_dir)
 177     except OSError:
 178         pass
 179     new_name = \
 180         f"{file_name.rsplit(u'.')[-2]}{SEPARATOR}{data_file.split(u'/')[-1]}"
 181
 182     logging.info(f"    Unzipping: {data_file} from {file_name}.")
 183     try:
 184         with ZipFile(file_name, u'r') as zip_file:
 185             zip_file.extract(data_file, tmp_dir)
 186         logging.info(
 187             f"    Renaming the file {join(tmp_dir, data_file)} to {new_name}"
 188         )
 189         rename(join(tmp_dir, data_file), new_name)
 190         build[u"file-name"] = new_name
 191         return True
 192     except (BadZipfile, RuntimeError) as err:
 193         logging.error(f"Failed to unzip the file {file_name}: {repr(err)}.")
 194         return False
 195     except OSError as err:
 196         logging.error(f"Failed to rename the file {data_file}: {repr(err)}.")
 197         return False
 198
 199
 200 def download_and_unzip_data_file(spec, job, build, pid):
 201     """Download and unzip a source file.
 202
 203     :param spec: Specification read form the specification file.
 204     :param job: Name of the Jenkins job.
 205     :param build: Information about the build.
 206     :param pid: PID of the process executing this method.
 207     :type spec: Specification
 208     :type job: str
 209     :type build: dict
 210     :type pid: int
 211     :returns: True if the download was successful, otherwise False.
 212     :rtype: bool
 213     """
 214
 215     # Try to download .gz from s3_storage
 216     file_name = spec.input[u"file-name"]
 217     url = u"{0}/{1}".format(
 218         spec.environment[u'urls'][u'URL[S3_STORAGE,LOG]'],
 219         spec.input[u'download-path'].format(
 220             job=job, build=build[u'build'], filename=file_name
 221         )
 222     )
 223     new_name = join(
 224         spec.environment[u"paths"][u"DIR[WORKING,DATA]"],
 225         f"{job}{SEPARATOR}{build[u'build']}{SEPARATOR}{file_name}"
 226     )
 227
 228     logging.info(f"Trying to download {url}")
 229
 230     arch = bool(spec.configuration.get(u"archive-inputs", True))
 231     success, downloaded_name = _download_file(
 232         url, new_name, arch=arch, verify=False, repeat=3
 233     )
 234
 235     if not success:
 236         # Try to download .gz from logs.fd.io
 237         file_name = spec.input[u"file-name"]
 238         url = u"{0}/{1}".format(
 239             spec.environment[u'urls'][u'URL[NEXUS,LOG]'],
 240             spec.input[u'download-path'].format(
 241                 job=job, build=build[u'build'], filename=file_name
 242             )
 243         )
 244         new_name = join(
 245             spec.environment[u"paths"][u"DIR[WORKING,DATA]"],
 246             f"{job}{SEPARATOR}{build[u'build']}{SEPARATOR}{file_name}"
 247         )
 248
 249         logging.info(f"Trying to download {url}")
 250
 251         arch = bool(spec.configuration.get(u"archive-inputs", True))
 252         success, downloaded_name = _download_file(
 253             url, new_name, arch=arch, verify=True, repeat=3
 254         )
 255
 256     if not success:
 257
 258         # Try to download .gz or .zip from docs.fd.io
 259         file_name = (spec.input[u"file-name"], spec.input[u"zip-file-name"])
 260         release = re.search(REGEX_RELEASE, job).group(2)
 261         for idx, rls in enumerate((release, u"master", )):
 262             try:
 263                 rls = f"rls{int(rls)}"
 264             except ValueError:
 265                 # It is master
 266                 pass
 267             url = (
 268                 f"{spec.environment[u'urls'][u'URL[NEXUS,DOC]']}/"
 269                 f"{rls}/"
 270                 f"{spec.environment[u'urls'][u'DIR[NEXUS,DOC]']}/"
 271                 f"{job}{SEPARATOR}{build[u'build']}{SEPARATOR}{file_name[idx]}"
 272             )
 273
 274             logging.info(f"Downloading {url}")
 275
 276             new_name = join(
 277                 spec.environment[u"paths"][u"DIR[WORKING,DATA]"],
 278                 f"{job}{SEPARATOR}{build[u'build']}{SEPARATOR}{file_name[idx]}"
 279             )
 280             success, downloaded_name = _download_file(url, new_name, arch=arch)
 281             if success:
 282                 file_name = file_name[idx]
 283                 if file_name.endswith(u".gz"):
 284                     with gzip.open(downloaded_name[:-3], u"rb") as gzip_file:
 285                         file_content = gzip_file.read()
 286                     with open(downloaded_name[:-3], u"wb") as xml_file:
 287                         xml_file.write(file_content)
 288                 break
 289
 290     if not success:
 291
 292         # Try to download .zip from jenkins.fd.io
 293         file_name = spec.input[u"zip-file-name"]
 294         download_path = spec.input[u"zip-download-path"]
 295         if job.startswith(u"csit-"):
 296             url = spec.environment[u"urls"][u"URL[JENKINS,CSIT]"]
 297         else:
 298             raise PresentationError(f"No url defined for the job {job}.")
 299
 300         full_name = download_path.format(
 301             job=job, build=build[u"build"], filename=file_name
 302         )
 303         url = u"{0}/{1}".format(url, full_name)
 304         new_name = join(
 305             spec.environment[u"paths"][u"DIR[WORKING,DATA]"],
 306             f"{job}{SEPARATOR}{build[u'build']}{SEPARATOR}{file_name}"
 307         )
 308
 309         logging.info(f"Downloading {url}")
 310
 311         success, downloaded_name = _download_file(url, new_name)
 312
 313     if success and downloaded_name.endswith(u".zip"):
 314         if not is_zipfile(downloaded_name):
 315             logging.error(f"Zip file {new_name} is corrupted.")
 316             success = False
 317
 318     if success:
 319         build[u"file-name"] = downloaded_name
 320
 321         if file_name.endswith(u".gz"):
 322             build[u"file-name"] = downloaded_name[:-3]
 323
 324         if downloaded_name.endswith(u".zip"):
 325             success = _unzip_file(spec, build, pid)
 326
 327     return success