resources/tools/presentation/input_data_files.py

   1 # Copyright (c) 2021 Cisco and/or its affiliates.
   2 # Licensed under the Apache License, Version 2.0 (the "License");
   3 # you may not use this file except in compliance with the License.
   4 # You may obtain a copy of the License at:
   5 #
   6 #     http://www.apache.org/licenses/LICENSE-2.0
   7 #
   8 # Unless required by applicable law or agreed to in writing, software
   9 # distributed under the License is distributed on an "AS IS" BASIS,
  10 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  11 # See the License for the specific language governing permissions and
  12 # limitations under the License.
  13
  14 """Inputs
  15 Download all data.
  16 """
  17
  18 import re
  19 import logging
  20 import gzip
  21
  22 from os import rename, mkdir
  23 from os.path import join
  24 from http.client import responses, HTTPException
  25 from zipfile import ZipFile, is_zipfile, BadZipfile
  26
  27 import requests
  28
  29 from requests.adapters import HTTPAdapter, Retry
  30 from requests.exceptions import RequestException
  31 from requests import codes
  32
  33 from urllib3.exceptions import HTTPError
  34
  35 from pal_errors import PresentationError
  36
  37
  38 # Chunk size used for file download
  39 CHUNK_SIZE = 512
  40
  41 # Separator used in file names
  42 SEPARATOR = u"__"
  43
  44 REGEX_RELEASE = re.compile(r'(\D*)(\d{4}|master)(\D*)')
  45
  46
  47 def _download_file(url, file_name, arch=False, verify=True, repeat=1):
  48     """Download a file with input data.
  49
  50     :param url: URL to the file to download.
  51     :param file_name: Name of file to download.
  52     :param arch: If True, also .gz file is downloaded.
  53     :param verify: If true, verify the certificate.
  54     :param repeat: The number of attempts to download the file.
  55     :type url: str
  56     :type file_name: str
  57     :type arch: bool
  58     :type verify: bool
  59     :type repeat: int
  60     :returns: True if the download was successful, otherwise False.
  61     :rtype: bool
  62     """
  63
  64     def requests_retry_session(retries=3,
  65                                backoff_factor=0.3,
  66                                status_forcelist=(500, 502, 504)):
  67         """
  68
  69         :param retries: Total number of retries to allow.
  70         :param backoff_factor: A backoff factor to apply between attempts after
  71             the second try.
  72         :param status_forcelist: A set of integer HTTP status codes that are
  73             forced to retry.
  74         :type retries: int
  75         :type backoff_factor: float
  76         :type status_forcelist: iterable
  77         :returns: Session object.
  78         :rtype: requests.Session
  79         """
  80
  81         retry = Retry(
  82             total=retries,
  83             read=retries,
  84             connect=retries,
  85             backoff_factor=backoff_factor,
  86             status_forcelist=status_forcelist,
  87         )
  88         adapter = HTTPAdapter(max_retries=retry)
  89         session = requests.Session()
  90         session.mount(u"http://", adapter)
  91         session.mount(u"https://", adapter)
  92         return session
  93
  94     success = False
  95     while repeat:
  96         repeat -= 1
  97         session = None
  98         try:
  99             logging.info(f"    Connecting to {url} ...")
 100             session = requests_retry_session()
 101             response = session.get(url, stream=True, verify=verify)
 102             code = response.status_code
 103             logging.info(f"    {code}: {responses[code]}")
 104
 105             if code != codes[u"OK"]:
 106                 if session:
 107                     session.close()
 108                 url = url.replace(u"_info", u"")
 109                 logging.info(f"    Connecting to {url} ...")
 110                 session = requests_retry_session()
 111                 response = session.get(url, stream=True, verify=verify)
 112                 code = response.status_code
 113                 logging.info(f"    {code}: {responses[code]}")
 114                 if code != codes[u"OK"]:
 115                     return False, file_name
 116                 file_name = file_name.replace(u"_info", u"")
 117
 118             dst_file_name = file_name.replace(u".gz", u"")
 119             logging.info(f"    Downloading the file {url} to {dst_file_name}")
 120             with open(dst_file_name, u"wb") as file_handle:
 121                 for chunk in response.iter_content(chunk_size=CHUNK_SIZE):
 122                     if chunk:
 123                         file_handle.write(chunk)
 124
 125             if arch and u".gz" in file_name:
 126                 if session:
 127                     session.close()
 128                 logging.info(f"    Downloading the file {url} to {file_name}")
 129                 session = requests_retry_session()
 130                 response = session.get(url, stream=True, verify=verify)
 131                 if response.status_code == codes[u"OK"]:
 132                     with open(file_name, u"wb") as file_handle:
 133                         file_handle.write(response.raw.read())
 134                 else:
 135                     logging.error(
 136                         f"Not possible to download the file "
 137                         f"{url} to {file_name}"
 138                     )
 139
 140             success = True
 141             repeat = 0
 142         except (HTTPException, HTTPError) as err:
 143             logging.error(f"Connection broken:\n{repr(err)}")
 144         except RequestException as err:
 145             logging.error(f"HTTP Request exception:\n{repr(err)}")
 146         except (IOError, ValueError, KeyError) as err:
 147             logging.error(f"Download failed.\n{repr(err)}")
 148         finally:
 149             if session:
 150                 session.close()
 151
 152     logging.info(u"    Download finished.")
 153     return success, file_name
 154
 155
 156 def _unzip_file(spec, build, pid):
 157     """Unzip downloaded source file.
 158
 159     :param spec: Specification read form the specification file.
 160     :param build: Information about the build.
 161     :type spec: Specification
 162     :type build: dict
 163     :returns: True if the download was successful, otherwise False.
 164     :rtype: bool
 165     """
 166
 167     file_name = build[u"file-name"]
 168     if u".zip" in file_name:
 169         data_file = spec.input[u"zip-extract"]
 170     else:
 171         data_file = spec.input[u"extract"]
 172
 173     directory = spec.environment[u"paths"][u"DIR[WORKING,DATA]"]
 174     tmp_dir = join(directory, str(pid))
 175     try:
 176         mkdir(tmp_dir)
 177     except OSError:
 178         pass
 179     new_name = \
 180         f"{file_name.rsplit(u'.')[-2]}{SEPARATOR}{data_file.split(u'/')[-1]}"
 181
 182     logging.info(f"    Unzipping: {data_file} from {file_name}.")
 183     try:
 184         with ZipFile(file_name, u'r') as zip_file:
 185             zip_file.extract(data_file, tmp_dir)
 186         logging.info(
 187             f"    Renaming the file {join(tmp_dir, data_file)} to {new_name}"
 188         )
 189         rename(join(tmp_dir, data_file), new_name)
 190         build[u"file-name"] = new_name
 191         return True
 192     except (BadZipfile, RuntimeError) as err:
 193         logging.error(f"Failed to unzip the file {file_name}: {repr(err)}.")
 194         return False
 195     except OSError as err:
 196         logging.error(f"Failed to rename the file {data_file}: {repr(err)}.")
 197         return False
 198
 199
 200 def download_and_unzip_data_file(spec, job, build, pid):
 201     """Download and unzip a source file.
 202
 203     :param spec: Specification read form the specification file.
 204     :param job: Name of the Jenkins job.
 205     :param build: Information about the build.
 206     :param pid: PID of the process executing this method.
 207     :type spec: Specification
 208     :type job: str
 209     :type build: dict
 210     :type pid: int
 211     :returns: True if the download was successful, otherwise False.
 212     :rtype: bool
 213     """
 214
 215     success = False
 216
 217     file_name = spec.input[u"file-name"]
 218     new_name = join(
 219         spec.environment[u"paths"][u"DIR[WORKING,DATA]"],
 220         f"{job}{SEPARATOR}{build[u'build']}{SEPARATOR}{file_name}"
 221     )
 222     arch = bool(spec.configuration.get(u"archive-inputs", True))
 223     downloaded_name = u""
 224
 225     # Try to download .gz from s3_storage
 226     for path in spec.input[u'download-path']:
 227         url = u"{0}/{1}".format(
 228             spec.environment[u'urls'][u'URL[S3_STORAGE,LOG]'],
 229             path.format(job=job, build=build[u'build'], filename=file_name)
 230         )
 231         logging.info(f"Trying to download {url}")
 232         success, downloaded_name = _download_file(
 233             url, new_name, arch=arch, verify=False, repeat=3
 234         )
 235         if success:
 236             break
 237
 238     if not success:
 239         # Try to download .gz from logs.fd.io
 240         for path in spec.input[u'download-path']:
 241             url = u"{0}/{1}".format(
 242                 spec.environment[u'urls'][u'URL[NEXUS,LOG]'],
 243                 path.format(job=job, build=build[u'build'], filename=file_name)
 244             )
 245             logging.info(f"Trying to download {url}")
 246             success, downloaded_name = _download_file(
 247                 url, new_name, arch=arch, verify=True, repeat=3
 248             )
 249             if success:
 250                 break
 251
 252     if not success:
 253         # Try to download .gz or .zip from docs.fd.io
 254         file_name = (spec.input[u"file-name"], spec.input[u"zip-file-name"])
 255         release = re.search(REGEX_RELEASE, job).group(2)
 256         for idx, rls in enumerate((release, u"master", )):
 257             try:
 258                 rls = f"rls{int(rls)}"
 259             except ValueError:
 260                 # It is master
 261                 pass
 262             url = (
 263                 f"{spec.environment[u'urls'][u'URL[NEXUS,DOC]']}/"
 264                 f"{rls}/"
 265                 f"{spec.environment[u'urls'][u'DIR[NEXUS,DOC]']}/"
 266                 f"{job}{SEPARATOR}{build[u'build']}{SEPARATOR}{file_name[idx]}"
 267             )
 268
 269             logging.info(f"Downloading {url}")
 270
 271             new_name = join(
 272                 spec.environment[u"paths"][u"DIR[WORKING,DATA]"],
 273                 f"{job}{SEPARATOR}{build[u'build']}{SEPARATOR}{file_name[idx]}"
 274             )
 275             success, downloaded_name = _download_file(url, new_name, arch=arch)
 276             if success:
 277                 file_name = file_name[idx]
 278                 if file_name.endswith(u".gz"):
 279                     with gzip.open(downloaded_name[:-3], u"rb") as gzip_file:
 280                         file_content = gzip_file.read()
 281                     with open(downloaded_name[:-3], u"wb") as xml_file:
 282                         xml_file.write(file_content)
 283                 break
 284
 285     # if not success:
 286     #     # Try to download .zip from jenkins.fd.io
 287     #     file_name = spec.input[u"zip-file-name"]
 288     #     download_path = spec.input[u"zip-download-path"]
 289     #     if job.startswith(u"csit-"):
 290     #         url = spec.environment[u"urls"][u"URL[JENKINS,CSIT]"]
 291     #     else:
 292     #         raise PresentationError(f"No url defined for the job {job}.")
 293     #
 294     #     full_name = download_path.format(
 295     #         job=job, build=build[u"build"], filename=file_name
 296     #     )
 297     #     url = u"{0}/{1}".format(url, full_name)
 298     #     new_name = join(
 299     #         spec.environment[u"paths"][u"DIR[WORKING,DATA]"],
 300     #         f"{job}{SEPARATOR}{build[u'build']}{SEPARATOR}{file_name}"
 301     #     )
 302     #     logging.info(f"Downloading {url}")
 303     #     success, downloaded_name = _download_file(url, new_name)
 304
 305     if success and downloaded_name.endswith(u".zip"):
 306         if not is_zipfile(downloaded_name):
 307             logging.error(f"Zip file {new_name} is corrupted.")
 308             success = False
 309
 310     if success:
 311         build[u"file-name"] = downloaded_name
 312
 313         if file_name.endswith(u".gz"):
 314             build[u"file-name"] = downloaded_name[:-3]
 315
 316         if downloaded_name.endswith(u".zip"):
 317             success = _unzip_file(spec, build, pid)
 318
 319     return success