resources/tools/presentation/input_data_files.py

   1 # Copyright (c) 2020 Cisco and/or its affiliates.
   2 # Licensed under the Apache License, Version 2.0 (the "License");
   3 # you may not use this file except in compliance with the License.
   4 # You may obtain a copy of the License at:
   5 #
   6 #     http://www.apache.org/licenses/LICENSE-2.0
   7 #
   8 # Unless required by applicable law or agreed to in writing, software
   9 # distributed under the License is distributed on an "AS IS" BASIS,
  10 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  11 # See the License for the specific language governing permissions and
  12 # limitations under the License.
  13
  14 """Inputs
  15 Download all data.
  16 """
  17
  18 import re
  19 import logging
  20 import gzip
  21
  22 from os import rename, mkdir
  23 from os.path import join
  24 from http.client import responses, IncompleteRead
  25 from zipfile import ZipFile, is_zipfile, BadZipfile
  26
  27 import requests
  28
  29 from requests.adapters import HTTPAdapter, Retry
  30 from requests.exceptions import RequestException
  31 from requests import codes
  32
  33 from pal_errors import PresentationError
  34
  35
  36 # Chunk size used for file download
  37 CHUNK_SIZE = 512
  38
  39 # Separator used in file names
  40 SEPARATOR = u"__"
  41
  42 REGEX_RELEASE = re.compile(r'(\D*)(\d{4}|master)(\D*)')
  43
  44
  45 def _download_file(url, file_name, arch=False, verify=True, repeat=1):
  46     """Download a file with input data.
  47
  48     :param url: URL to the file to download.
  49     :param file_name: Name of file to download.
  50     :param arch: If True, also .gz file is downloaded.
  51     :param verify: If true, verify the certificate.
  52     :param repeat: The number of attempts to download the file.
  53     :type url: str
  54     :type file_name: str
  55     :type arch: bool
  56     :type verify: bool
  57     :type repeat: int
  58     :returns: True if the download was successful, otherwise False.
  59     :rtype: bool
  60     """
  61
  62     def requests_retry_session(retries=3,
  63                                backoff_factor=0.3,
  64                                status_forcelist=(500, 502, 504)):
  65         """
  66
  67         :param retries: Total number of retries to allow.
  68         :param backoff_factor: A backoff factor to apply between attempts after
  69             the second try.
  70         :param status_forcelist: A set of integer HTTP status codes that are
  71             forced to retry.
  72         :type retries: int
  73         :type backoff_factor: float
  74         :type status_forcelist: iterable
  75         :returns: Session object.
  76         :rtype: requests.Session
  77         """
  78
  79         retry = Retry(
  80             total=retries,
  81             read=retries,
  82             connect=retries,
  83             backoff_factor=backoff_factor,
  84             status_forcelist=status_forcelist,
  85         )
  86         adapter = HTTPAdapter(max_retries=retry)
  87         session = requests.Session()
  88         session.mount(u"http://", adapter)
  89         session.mount(u"https://", adapter)
  90         return session
  91
  92     while repeat:
  93         repeat -= 1
  94         success = False
  95         session = None
  96         try:
  97             logging.info(f"    Connecting to {url} ...")
  98             session = requests_retry_session()
  99             response = session.get(url, stream=True, verify=verify)
 100             code = response.status_code
 101             logging.info(f"    {code}: {responses[code]}")
 102
 103             if code != codes[u"OK"]:
 104                 if session:
 105                     session.close()
 106                 url = url.replace(u"_info", u"")
 107                 logging.info(f"    Connecting to {url} ...")
 108                 session = requests_retry_session()
 109                 response = session.get(url, stream=True, verify=verify)
 110                 code = response.status_code
 111                 logging.info(f"    {code}: {responses[code]}")
 112                 if code != codes[u"OK"]:
 113                     return False, file_name
 114                 file_name = file_name.replace(u"_info", u"")
 115
 116             dst_file_name = file_name.replace(u".gz", u"")
 117             logging.info(f"    Downloading the file {url} to {dst_file_name}")
 118             with open(dst_file_name, u"wb") as file_handle:
 119                 for chunk in response.iter_content(chunk_size=CHUNK_SIZE):
 120                     if chunk:
 121                         file_handle.write(chunk)
 122
 123             if arch and u".gz" in file_name:
 124                 if session:
 125                     session.close()
 126                 logging.info(f"    Downloading the file {url} to {file_name}")
 127                 session = requests_retry_session()
 128                 response = session.get(url, stream=True, verify=verify)
 129                 if response.status_code == codes[u"OK"]:
 130                     with open(file_name, u"wb") as file_handle:
 131                         file_handle.write(response.raw.read())
 132                 else:
 133                     logging.error(
 134                         f"Not possible to download the file "
 135                         f"{url} to {file_name}"
 136                     )
 137
 138             success = True
 139             repeat = 0
 140         except IncompleteRead as err:
 141             logging.error(f"Connection broken:\n{repr(err)}")
 142         except RequestException as err:
 143             logging.error(f"HTTP Request exception:\n{repr(err)}")
 144         except (IOError, ValueError, KeyError) as err:
 145             logging.error(f"Download failed.\n{repr(err)}")
 146         finally:
 147             if session:
 148                 session.close()
 149
 150     logging.info(u"    Download finished.")
 151     return success, file_name
 152
 153
 154 def _unzip_file(spec, build, pid):
 155     """Unzip downloaded source file.
 156
 157     :param spec: Specification read form the specification file.
 158     :param build: Information about the build.
 159     :type spec: Specification
 160     :type build: dict
 161     :returns: True if the download was successful, otherwise False.
 162     :rtype: bool
 163     """
 164
 165     file_name = build[u"file-name"]
 166     if u".zip" in file_name:
 167         data_file = spec.input[u"zip-extract"]
 168     else:
 169         data_file = spec.input[u"extract"]
 170
 171     directory = spec.environment[u"paths"][u"DIR[WORKING,DATA]"]
 172     tmp_dir = join(directory, str(pid))
 173     try:
 174         mkdir(tmp_dir)
 175     except OSError:
 176         pass
 177     new_name = \
 178         f"{file_name.rsplit(u'.')[-2]}{SEPARATOR}{data_file.split(u'/')[-1]}"
 179
 180     logging.info(f"    Unzipping: {data_file} from {file_name}.")
 181     try:
 182         with ZipFile(file_name, u'r') as zip_file:
 183             zip_file.extract(data_file, tmp_dir)
 184         logging.info(
 185             f"    Renaming the file {join(tmp_dir, data_file)} to {new_name}"
 186         )
 187         rename(join(tmp_dir, data_file), new_name)
 188         build[u"file-name"] = new_name
 189         return True
 190     except (BadZipfile, RuntimeError) as err:
 191         logging.error(f"Failed to unzip the file {file_name}: {repr(err)}.")
 192         return False
 193     except OSError as err:
 194         logging.error(f"Failed to rename the file {data_file}: {repr(err)}.")
 195         return False
 196
 197
 198 def download_and_unzip_data_file(spec, job, build, pid):
 199     """Download and unzip a source file.
 200
 201     :param spec: Specification read form the specification file.
 202     :param job: Name of the Jenkins job.
 203     :param build: Information about the build.
 204     :param pid: PID of the process executing this method.
 205     :type spec: Specification
 206     :type job: str
 207     :type build: dict
 208     :type pid: int
 209     :returns: True if the download was successful, otherwise False.
 210     :rtype: bool
 211     """
 212
 213     # Try to download .gz from s3_storage
 214     file_name = spec.input[u"file-name"]
 215     url = u"{0}/{1}".format(
 216         spec.environment[u'urls'][u'URL[S3_STORAGE,LOG]'],
 217         spec.input[u'download-path'].format(
 218             job=job, build=build[u'build'], filename=file_name
 219         )
 220     )
 221     new_name = join(
 222         spec.environment[u"paths"][u"DIR[WORKING,DATA]"],
 223         f"{job}{SEPARATOR}{build[u'build']}{SEPARATOR}{file_name}"
 224     )
 225
 226     logging.info(f"Trying to download {url}")
 227
 228     arch = bool(spec.configuration.get(u"archive-inputs", True))
 229     success, downloaded_name = _download_file(
 230         url, new_name, arch=arch, verify=False, repeat=3
 231     )
 232
 233     if not success:
 234         # Try to download .gz from logs.fd.io
 235         file_name = spec.input[u"file-name"]
 236         url = u"{0}/{1}".format(
 237             spec.environment[u'urls'][u'URL[NEXUS,LOG]'],
 238             spec.input[u'download-path'].format(
 239                 job=job, build=build[u'build'], filename=file_name
 240             )
 241         )
 242         new_name = join(
 243             spec.environment[u"paths"][u"DIR[WORKING,DATA]"],
 244             f"{job}{SEPARATOR}{build[u'build']}{SEPARATOR}{file_name}"
 245         )
 246
 247         logging.info(f"Trying to download {url}")
 248
 249         arch = bool(spec.configuration.get(u"archive-inputs", True))
 250         success, downloaded_name = _download_file(url, new_name, arch=arch)
 251
 252     if not success:
 253
 254         # Try to download .gz or .zip from docs.fd.io
 255         file_name = (spec.input[u"file-name"], spec.input[u"zip-file-name"])
 256         release = re.search(REGEX_RELEASE, job).group(2)
 257         for idx, rls in enumerate((release, u"master", )):
 258             try:
 259                 rls = f"rls{int(rls)}"
 260             except ValueError:
 261                 # It is master
 262                 pass
 263             url = (
 264                 f"{spec.environment[u'urls'][u'URL[NEXUS,DOC]']}/"
 265                 f"{rls}/"
 266                 f"{spec.environment[u'urls'][u'DIR[NEXUS,DOC]']}/"
 267                 f"{job}{SEPARATOR}{build[u'build']}{SEPARATOR}{file_name[idx]}"
 268             )
 269
 270             logging.info(f"Downloading {url}")
 271
 272             new_name = join(
 273                 spec.environment[u"paths"][u"DIR[WORKING,DATA]"],
 274                 f"{job}{SEPARATOR}{build[u'build']}{SEPARATOR}{file_name[idx]}"
 275             )
 276             success, downloaded_name = _download_file(url, new_name, arch=arch)
 277             if success:
 278                 file_name = file_name[idx]
 279                 if file_name.endswith(u".gz"):
 280                     with gzip.open(downloaded_name[:-3], u"rb") as gzip_file:
 281                         file_content = gzip_file.read()
 282                     with open(downloaded_name[:-3], u"wb") as xml_file:
 283                         xml_file.write(file_content)
 284                 break
 285
 286     if not success:
 287
 288         # Try to download .zip from jenkins.fd.io
 289         file_name = spec.input[u"zip-file-name"]
 290         download_path = spec.input[u"zip-download-path"]
 291         if job.startswith(u"csit-"):
 292             url = spec.environment[u"urls"][u"URL[JENKINS,CSIT]"]
 293         else:
 294             raise PresentationError(f"No url defined for the job {job}.")
 295
 296         full_name = download_path.format(
 297             job=job, build=build[u"build"], filename=file_name
 298         )
 299         url = u"{0}/{1}".format(url, full_name)
 300         new_name = join(
 301             spec.environment[u"paths"][u"DIR[WORKING,DATA]"],
 302             f"{job}{SEPARATOR}{build[u'build']}{SEPARATOR}{file_name}"
 303         )
 304
 305         logging.info(f"Downloading {url}")
 306
 307         success, downloaded_name = _download_file(url, new_name)
 308
 309     if success and downloaded_name.endswith(u".zip"):
 310         if not is_zipfile(downloaded_name):
 311             logging.error(f"Zip file {new_name} is corrupted.")
 312             success = False
 313
 314     if success:
 315         build[u"file-name"] = downloaded_name
 316
 317         if file_name.endswith(u".gz"):
 318             build[u"file-name"] = downloaded_name[:-3]
 319
 320         if downloaded_name.endswith(u".zip"):
 321             success = _unzip_file(spec, build, pid)
 322
 323     return success