resources/tools/presentation/input_data_files.py

   1 # Copyright (c) 2021 Cisco and/or its affiliates.
   2 # Licensed under the Apache License, Version 2.0 (the "License");
   3 # you may not use this file except in compliance with the License.
   4 # You may obtain a copy of the License at:
   5 #
   6 #     http://www.apache.org/licenses/LICENSE-2.0
   7 #
   8 # Unless required by applicable law or agreed to in writing, software
   9 # distributed under the License is distributed on an "AS IS" BASIS,
  10 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  11 # See the License for the specific language governing permissions and
  12 # limitations under the License.
  13
  14 """Inputs
  15 Download all data.
  16 """
  17
  18 import re
  19 import logging
  20 import gzip
  21
  22 from os import rename, mkdir
  23 from os.path import join
  24 from http.client import responses, HTTPException
  25 from zipfile import ZipFile, is_zipfile, BadZipfile
  26
  27 import requests
  28
  29 from requests.adapters import HTTPAdapter, Retry
  30 from requests.exceptions import RequestException
  31 from requests import codes
  32
  33 from urllib3.exceptions import HTTPError
  34
  35
  36 # Chunk size used for file download
  37 CHUNK_SIZE = 512
  38
  39 # Separator used in file names
  40 SEPARATOR = u"__"
  41
  42 REGEX_RELEASE = re.compile(r'(\D*)(\d{4}|master)(\D*)')
  43
  44
  45 def _download_file(url, file_name, arch=False, verify=True, repeat=1):
  46     """Download a file with input data.
  47
  48     :param url: URL to the file to download.
  49     :param file_name: Name of file to download.
  50     :param arch: If True, also .gz file is downloaded.
  51     :param verify: If true, verify the certificate.
  52     :param repeat: The number of attempts to download the file.
  53     :type url: str
  54     :type file_name: str
  55     :type arch: bool
  56     :type verify: bool
  57     :type repeat: int
  58     :returns: True if the download was successful, otherwise False.
  59     :rtype: bool
  60     """
  61
  62     def requests_retry_session(retries=3,
  63                                backoff_factor=0.3,
  64                                status_forcelist=(500, 502, 504)):
  65         """
  66
  67         :param retries: Total number of retries to allow.
  68         :param backoff_factor: A backoff factor to apply between attempts after
  69             the second try.
  70         :param status_forcelist: A set of integer HTTP status codes that are
  71             forced to retry.
  72         :type retries: int
  73         :type backoff_factor: float
  74         :type status_forcelist: iterable
  75         :returns: Session object.
  76         :rtype: requests.Session
  77         """
  78
  79         retry = Retry(
  80             total=retries,
  81             read=retries,
  82             connect=retries,
  83             backoff_factor=backoff_factor,
  84             status_forcelist=status_forcelist,
  85         )
  86         adapter = HTTPAdapter(max_retries=retry)
  87         session = requests.Session()
  88         session.mount(u"http://", adapter)
  89         session.mount(u"https://", adapter)
  90         return session
  91
  92     success = False
  93     while repeat:
  94         repeat -= 1
  95         session = None
  96         try:
  97             logging.info(f"  Connecting to {url} ...")
  98             session = requests_retry_session()
  99             response = session.get(url, stream=True, verify=verify)
 100             code = response.status_code
 101             logging.info(f"  {code}: {responses[code]}")
 102
 103             if code != codes[u"OK"]:
 104                 if session:
 105                     session.close()
 106                 return False, file_name
 107
 108             dst_file_name = file_name.replace(u".gz", u"")
 109             logging.info(f"  Downloading the file {url} to {dst_file_name}")
 110             with open(dst_file_name, u"wb") as file_handle:
 111                 for chunk in response.iter_content(chunk_size=CHUNK_SIZE):
 112                     if chunk:
 113                         file_handle.write(chunk)
 114
 115             if arch and u".gz" in file_name:
 116                 if session:
 117                     session.close()
 118                 logging.info(f"  Downloading the file {url} to {file_name}")
 119                 session = requests_retry_session()
 120                 response = session.get(url, stream=True, verify=verify)
 121                 if response.status_code == codes[u"OK"]:
 122                     with open(file_name, u"wb") as file_handle:
 123                         file_handle.write(response.raw.read())
 124                 else:
 125                     logging.error(
 126                         f"Not possible to download the file "
 127                         f"{url} to {file_name}"
 128                     )
 129
 130             success = True
 131             repeat = 0
 132         except (HTTPException, HTTPError) as err:
 133             logging.error(f"Connection broken:\n{repr(err)}")
 134         except RequestException as err:
 135             logging.error(f"HTTP Request exception:\n{repr(err)}")
 136         except (IOError, ValueError, KeyError) as err:
 137             logging.error(f"Download failed.\n{repr(err)}")
 138         finally:
 139             if session:
 140                 session.close()
 141     return success, file_name
 142
 143
 144 def _unzip_file(spec, build, pid):
 145     """Unzip downloaded source file.
 146
 147     :param spec: Specification read form the specification file.
 148     :param build: Information about the build.
 149     :type spec: Specification
 150     :type build: dict
 151     :returns: True if the download was successful, otherwise False.
 152     :rtype: bool
 153     """
 154
 155     file_name = build[u"file-name"]
 156     data_file = "robot-plugin/output.xml"
 157     directory = spec.environment[u"paths"][u"DIR[WORKING,DATA]"]
 158     tmp_dir = join(directory, str(pid))
 159     try:
 160         mkdir(tmp_dir)
 161     except OSError:
 162         pass
 163     new_name = \
 164         f"{file_name.rsplit(u'.')[-2]}{SEPARATOR}{data_file.split(u'/')[-1]}"
 165
 166     logging.info(f"    Unzipping: {data_file} from {file_name}.")
 167     try:
 168         with ZipFile(file_name, u'r') as zip_file:
 169             zip_file.extract(data_file, tmp_dir)
 170         logging.info(
 171             f"    Renaming the file {join(tmp_dir, data_file)} to {new_name}"
 172         )
 173         rename(join(tmp_dir, data_file), new_name)
 174         build[u"file-name"] = new_name
 175         return True
 176     except (BadZipfile, RuntimeError) as err:
 177         logging.error(f"Failed to unzip the file {file_name}: {repr(err)}.")
 178         return False
 179     except OSError as err:
 180         logging.error(f"Failed to rename the file {data_file}: {repr(err)}.")
 181         return False
 182
 183
 184 def _download_json(source, job, build, w_dir, arch):
 185     """
 186
 187     :param source:
 188     :param job:
 189     :param build:
 190     :param w_dir: Path to working directory
 191     :param arch:
 192     :return:
 193     """
 194     success = False
 195     downloaded_name = u""
 196
 197     return success, downloaded_name
 198
 199
 200 def _download_xml(source, job, build, w_dir, arch):
 201     """
 202
 203     :param source:
 204     :param job:
 205     :param build:
 206     :param w_dir: Path to working directory
 207     :param arch:
 208     :return:
 209     """
 210
 211     file_name = source.get(u"file-name", u"")
 212     new_name = join(
 213         w_dir,
 214         f"{job}{SEPARATOR}{build[u'build']}{SEPARATOR}{file_name}"
 215     )
 216     url = u"{0}/{1}".format(
 217         source.get(u"url", u""),
 218         source.get(u"path", u"").format(
 219             job=job, build=build[u'build'], filename=file_name
 220         )
 221     )
 222     verify = False if u"nginx" in url else True
 223     logging.info(f"  Trying to download {url}")
 224     success, downloaded_name = _download_file(
 225         url, new_name, arch=arch, verify=verify, repeat=3
 226     )
 227     return success, downloaded_name
 228
 229
 230 def _download_xml_docs(source, job, build, w_dir, arch):
 231     """
 232
 233     :param source:
 234     :param job:
 235     :param build:
 236     :param w_dir: Path to working directory
 237     :param arch:
 238     :return:
 239     """
 240
 241     file_name = source.get(u"file-name", u"")
 242     release = re.search(REGEX_RELEASE, job).group(2)
 243     for rls in (release, u"master"):
 244         try:
 245             rls = f"rls{int(rls)}"
 246         except ValueError:
 247             pass  # It is master
 248         url = (
 249             f"{source.get(u'url', u'')}/"
 250             f"{rls}/"
 251             f"{source.get(u'path', u'')}/"
 252             f"{job}{SEPARATOR}{build[u'build']}{SEPARATOR}{file_name}"
 253         )
 254         new_name = join(
 255             w_dir,
 256             f"{job}{SEPARATOR}{build[u'build']}{SEPARATOR}{file_name}"
 257         )
 258
 259         logging.info(f"  Trying to download {url}")
 260
 261         success, downloaded_name = _download_file(url, new_name, arch=arch)
 262         if success:
 263             if file_name.endswith(u".gz"):
 264                 with gzip.open(downloaded_name[:-3], u"rb") as gzip_file:
 265                     file_content = gzip_file.read()
 266                 with open(downloaded_name[:-3], u"wb") as xml_file:
 267                     xml_file.write(file_content)
 268             break
 269
 270     return success, downloaded_name
 271
 272
 273 def download_and_unzip_data_file(spec, job, build, pid):
 274     """Download and unzip a source file.
 275
 276     :param spec: Specification read form the specification file.
 277     :param job: Name of the Jenkins job.
 278     :param build: Information about the build.
 279     :param pid: PID of the process executing this method.
 280     :type spec: Specification
 281     :type job: str
 282     :type build: dict
 283     :type pid: int
 284     :returns: True if the download was successful, otherwise False.
 285     :rtype: bool
 286     """
 287
 288     download = {
 289         "json": _download_json,
 290         "xml": _download_xml,
 291         "xml-docs": _download_xml_docs
 292     }
 293
 294     success = False
 295     downloaded_name = u""
 296     arch = bool(spec.environment.get(u"archive-inputs", True))
 297
 298     for source in spec.environment.get(u"data-sources", tuple()):
 299         if not source.get(u"enabled", False):
 300             continue
 301         download_type = source.get(u"type", None)
 302         if not download_type:
 303             continue
 304         success, downloaded_name = download[download_type](
 305                 source,
 306                 job,
 307                 build,
 308                 spec.environment[u"paths"][u"DIR[WORKING,DATA]"],
 309                 arch
 310             )
 311         if success:
 312             source[u"successful-downloads"] += 1
 313             build[u"source"] = source[u"type"]
 314             break
 315
 316     # TODO: Remove when only .gz is used.
 317     if success and downloaded_name.endswith(u".zip"):
 318         if not is_zipfile(downloaded_name):
 319             logging.error(f"Zip file {downloaded_name} is corrupted.")
 320             success = False
 321
 322     if success:
 323         if downloaded_name.endswith(u".gz"):
 324             build[u"file-name"] = downloaded_name[:-3]
 325         # TODO: Remove when only .gz is used.
 326         elif downloaded_name.endswith(u".zip"):
 327             build[u"file-name"] = downloaded_name
 328             success = _unzip_file(spec, build, pid)
 329
 330     return success