resources/tools/presentation/input_data_files.py

   1 # Copyright (c) 2021 Cisco and/or its affiliates.
   2 # Licensed under the Apache License, Version 2.0 (the "License");
   3 # you may not use this file except in compliance with the License.
   4 # You may obtain a copy of the License at:
   5 #
   6 #     http://www.apache.org/licenses/LICENSE-2.0
   7 #
   8 # Unless required by applicable law or agreed to in writing, software
   9 # distributed under the License is distributed on an "AS IS" BASIS,
  10 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  11 # See the License for the specific language governing permissions and
  12 # limitations under the License.
  13
  14 """Inputs
  15 Download all data.
  16 """
  17
  18 import re
  19 import logging
  20 import gzip
  21
  22 from os import rename, mkdir
  23 from os.path import join
  24 from http.client import responses, HTTPException
  25 from zipfile import ZipFile, is_zipfile, BadZipfile
  26
  27 import requests
  28
  29 from requests.adapters import HTTPAdapter, Retry
  30 from requests.exceptions import RequestException
  31 from requests import codes
  32
  33 from urllib3.exceptions import HTTPError
  34
  35
  36 # Chunk size used for file download
  37 CHUNK_SIZE = 512
  38
  39 # Separator used in file names
  40 SEPARATOR = u"__"
  41
  42 REGEX_RELEASE = re.compile(r'(\D*)(\d{4}|master)(\D*)')
  43
  44
  45 def _download_file(url, file_name, arch=False, verify=True, repeat=1):
  46     """Download a file with input data.
  47
  48     :param url: URL to the file to download.
  49     :param file_name: Name of file to download.
  50     :param arch: If True, also .gz file is downloaded.
  51     :param verify: If true, verify the certificate.
  52     :param repeat: The number of attempts to download the file.
  53     :type url: str
  54     :type file_name: str
  55     :type arch: bool
  56     :type verify: bool
  57     :type repeat: int
  58     :returns: True if the download was successful, otherwise False.
  59     :rtype: bool
  60     """
  61
  62     def requests_retry_session(retries=3,
  63                                backoff_factor=0.3,
  64                                status_forcelist=(500, 502, 504)):
  65         """
  66
  67         :param retries: Total number of retries to allow.
  68         :param backoff_factor: A backoff factor to apply between attempts after
  69             the second try.
  70         :param status_forcelist: A set of integer HTTP status codes that are
  71             forced to retry.
  72         :type retries: int
  73         :type backoff_factor: float
  74         :type status_forcelist: iterable
  75         :returns: Session object.
  76         :rtype: requests.Session
  77         """
  78
  79         retry = Retry(
  80             total=retries,
  81             read=retries,
  82             connect=retries,
  83             backoff_factor=backoff_factor,
  84             status_forcelist=status_forcelist,
  85         )
  86         adapter = HTTPAdapter(max_retries=retry)
  87         session = requests.Session()
  88         session.mount(u"http://", adapter)
  89         session.mount(u"https://", adapter)
  90         return session
  91
  92     success = False
  93     while repeat:
  94         repeat -= 1
  95         session = None
  96         try:
  97             logging.info(f"  Connecting to {url} ...")
  98             session = requests_retry_session()
  99             response = session.get(url, stream=True, verify=verify)
 100             code = response.status_code
 101             logging.info(f"  {code}: {responses[code]}")
 102
 103             if code != codes[u"OK"]:
 104                 if session:
 105                     session.close()
 106                 return False, file_name
 107
 108             dst_file_name = file_name.replace(u".gz", u"")
 109             logging.info(f"  Downloading the file {url} to {dst_file_name}")
 110             with open(dst_file_name, u"wb") as file_handle:
 111                 for chunk in response.iter_content(chunk_size=CHUNK_SIZE):
 112                     if chunk:
 113                         file_handle.write(chunk)
 114
 115             if arch and u".gz" in file_name:
 116                 if session:
 117                     session.close()
 118                 logging.info(f"  Downloading the file {url} to {file_name}")
 119                 session = requests_retry_session()
 120                 response = session.get(url, stream=True, verify=verify)
 121                 if response.status_code == codes[u"OK"]:
 122                     with open(file_name, u"wb") as file_handle:
 123                         file_handle.write(response.raw.read())
 124                 else:
 125                     logging.error(
 126                         f"Not possible to download the file "
 127                         f"{url} to {file_name}"
 128                     )
 129
 130             success = True
 131             repeat = 0
 132         except (HTTPException, HTTPError) as err:
 133             logging.error(f"Connection broken:\n{repr(err)}")
 134         except RequestException as err:
 135             logging.error(f"HTTP Request exception:\n{repr(err)}")
 136         except (IOError, ValueError, KeyError) as err:
 137             logging.error(f"Download failed.\n{repr(err)}")
 138         finally:
 139             if session:
 140                 session.close()
 141     return success, file_name
 142
 143
 144 def _unzip_file(spec, build, pid):
 145     """Unzip downloaded source file.
 146
 147     :param spec: Specification read form the specification file.
 148     :param build: Information about the build.
 149     :type spec: Specification
 150     :type build: dict
 151     :returns: True if the download was successful, otherwise False.
 152     :rtype: bool
 153     """
 154
 155     file_name = build[u"file-name"]
 156     data_file = "robot-plugin/output.xml"
 157     directory = spec.environment[u"paths"][u"DIR[WORKING,DATA]"]
 158     tmp_dir = join(directory, str(pid))
 159     try:
 160         mkdir(tmp_dir)
 161     except OSError:
 162         pass
 163     new_name = \
 164         f"{file_name.rsplit(u'.')[-2]}{SEPARATOR}{data_file.split(u'/')[-1]}"
 165
 166     logging.info(f"    Unzipping: {data_file} from {file_name}.")
 167     try:
 168         with ZipFile(file_name, u'r') as zip_file:
 169             zip_file.extract(data_file, tmp_dir)
 170         logging.info(
 171             f"    Renaming the file {join(tmp_dir, data_file)} to {new_name}"
 172         )
 173         rename(join(tmp_dir, data_file), new_name)
 174         build[u"file-name"] = new_name
 175         return True
 176     except (BadZipfile, RuntimeError) as err:
 177         logging.error(f"Failed to unzip the file {file_name}: {repr(err)}.")
 178         return False
 179     except OSError as err:
 180         logging.error(f"Failed to rename the file {data_file}: {repr(err)}.")
 181         return False
 182
 183
 184 def _download_xml(source, job, build, w_dir, arch):
 185     """
 186
 187     :param source:
 188     :param job:
 189     :param build:
 190     :param w_dir: Path to working directory
 191     :param arch:
 192     :return:
 193     """
 194
 195     file_name = source.get(u"file-name", u"")
 196     new_name = join(
 197         w_dir,
 198         f"{job}{SEPARATOR}{build[u'build']}{SEPARATOR}{file_name}"
 199     )
 200     url = u"{0}/{1}".format(
 201         source.get(u"url", u""),
 202         source.get(u"path", u"").format(
 203             job=job, build=build[u'build'], filename=file_name
 204         )
 205     )
 206     logging.info(f"  Trying to download {url}")
 207     success, downloaded_name = _download_file(
 208         url, new_name, arch=arch, verify=(u"nginx" not in url), repeat=3
 209     )
 210     return success, downloaded_name
 211
 212
 213 def _download_xml_docs(source, job, build, w_dir, arch):
 214     """
 215
 216     :param source:
 217     :param job:
 218     :param build:
 219     :param w_dir: Path to working directory
 220     :param arch:
 221     :return:
 222     """
 223
 224     file_name = source.get(u"file-name", u"")
 225     release = re.search(REGEX_RELEASE, job).group(2)
 226     for rls in (release, u"master"):
 227         try:
 228             rls = f"rls{int(rls)}"
 229         except ValueError:
 230             pass  # It is master
 231         url = (
 232             f"{source.get(u'url', u'')}/"
 233             f"{rls}/"
 234             f"{source.get(u'path', u'')}/"
 235             f"{job}{SEPARATOR}{build[u'build']}{SEPARATOR}{file_name}"
 236         )
 237         new_name = join(
 238             w_dir,
 239             f"{job}{SEPARATOR}{build[u'build']}{SEPARATOR}{file_name}"
 240         )
 241
 242         logging.info(f"  Trying to download {url}")
 243
 244         success, downloaded_name = _download_file(url, new_name, arch=arch)
 245         if success:
 246             if file_name.endswith(u".gz"):
 247                 with gzip.open(downloaded_name[:-3], u"rb") as gzip_file:
 248                     file_content = gzip_file.read()
 249                 with open(downloaded_name[:-3], u"wb") as xml_file:
 250                     xml_file.write(file_content)
 251             break
 252
 253     return success, downloaded_name
 254
 255
 256 def download_and_unzip_data_file(spec, job, build, pid):
 257     """Download and unzip a source file.
 258
 259     :param spec: Specification read form the specification file.
 260     :param job: Name of the Jenkins job.
 261     :param build: Information about the build.
 262     :param pid: PID of the process executing this method.
 263     :type spec: Specification
 264     :type job: str
 265     :type build: dict
 266     :type pid: int
 267     :returns: True if the download was successful, otherwise False.
 268     :rtype: bool
 269     """
 270
 271     download = {
 272         "xml": _download_xml,
 273         "xml-docs": _download_xml_docs
 274     }
 275
 276     success = False
 277     downloaded_name = u""
 278     arch = bool(spec.environment.get(u"archive-inputs", True))
 279
 280     for source in spec.environment.get(u"data-sources", tuple()):
 281         if not source.get(u"enabled", False):
 282             continue
 283         download_type = source.get(u"type", None)
 284         if not download_type:
 285             continue
 286         success, downloaded_name = download[download_type](
 287             source,
 288             job,
 289             build,
 290             spec.environment[u"paths"][u"DIR[WORKING,DATA]"],
 291             arch
 292         )
 293         if success:
 294             source[u"successful-downloads"] += 1
 295             build[u"source"] = source[u"type"]
 296             break
 297
 298     # TODO: Remove when only .gz is used.
 299     if success and downloaded_name.endswith(u".zip"):
 300         if not is_zipfile(downloaded_name):
 301             logging.error(f"Zip file {downloaded_name} is corrupted.")
 302             success = False
 303
 304     if success:
 305         if downloaded_name.endswith(u".gz"):
 306             build[u"file-name"] = downloaded_name[:-3]
 307         # TODO: Remove when only .gz is used.
 308         elif downloaded_name.endswith(u".zip"):
 309             build[u"file-name"] = downloaded_name
 310             success = _unzip_file(spec, build, pid)
 311
 312     return success