resources/tools/presentation/input_data_files.py

   1 # Copyright (c) 2018 Cisco and/or its affiliates.
   2 # Licensed under the Apache License, Version 2.0 (the "License");
   3 # you may not use this file except in compliance with the License.
   4 # You may obtain a copy of the License at:
   5 #
   6 #     http://www.apache.org/licenses/LICENSE-2.0
   7 #
   8 # Unless required by applicable law or agreed to in writing, software
   9 # distributed under the License is distributed on an "AS IS" BASIS,
  10 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  11 # See the License for the specific language governing permissions and
  12 # limitations under the License.
  13
  14 """Inputs
  15 Download all data.
  16 """
  17
  18 import re
  19 import logging
  20 import gzip
  21
  22 from os import rename, mkdir
  23 from os.path import join
  24 from http.client import responses
  25 from zipfile import ZipFile, is_zipfile, BadZipfile
  26
  27 import requests
  28
  29 from requests.adapters import HTTPAdapter, Retry
  30 from requests.exceptions import RequestException
  31 from requests import codes
  32
  33 from pal_errors import PresentationError
  34
  35
  36 # Chunk size used for file download
  37 CHUNK_SIZE = 512
  38
  39 # Separator used in file names
  40 SEPARATOR = u"__"
  41
  42 REGEX_RELEASE = re.compile(r'(\D*)(\d{4}|master)(\D*)')
  43
  44
  45 def _download_file(url, file_name, log, arch=False):
  46     """Download a file with input data.
  47
  48     :param url: URL to the file to download.
  49     :param file_name: Name of file to download.
  50     :param log: List of log messages.
  51     :param arch: If True, also .gz file is downloaded
  52     :type url: str
  53     :type file_name: str
  54     :type log: list of tuples (severity, msg)
  55     :type arch: bool
  56     :returns: True if the download was successful, otherwise False.
  57     :rtype: bool
  58     """
  59
  60     def requests_retry_session(retries=3,
  61                                backoff_factor=0.3,
  62                                status_forcelist=(500, 502, 504)):
  63         """
  64
  65         :param retries: Total number of retries to allow.
  66         :param backoff_factor: A backoff factor to apply between attempts after
  67             the second try.
  68         :param status_forcelist: A set of integer HTTP status codes that are
  69             forced to retry.
  70         :type retries: int
  71         :type backoff_factor: float
  72         :type status_forcelist: iterable
  73         :returns: Session object.
  74         :rtype: requests.Session
  75         """
  76
  77         retry = Retry(
  78             total=retries,
  79             read=retries,
  80             connect=retries,
  81             backoff_factor=backoff_factor,
  82             status_forcelist=status_forcelist,
  83         )
  84         adapter = HTTPAdapter(max_retries=retry)
  85         session = requests.Session()
  86         session.mount(u"http://", adapter)
  87         session.mount(u"https://", adapter)
  88         return session
  89
  90     success = False
  91     session = None
  92     try:
  93         log.append((u"INFO", f"    Connecting to {url} ..."))
  94         session = requests_retry_session()
  95         response = session.get(url, stream=True)
  96         code = response.status_code
  97         log.append((u"INFO", f"    {code}: {responses[code]}"))
  98
  99         if code != codes[u"OK"]:
 100             if session:
 101                 session.close()
 102             url = url.replace(u"_info", u"")
 103             log.append((u"INFO", f"    Connecting to {url} ..."))
 104             session = requests_retry_session()
 105             response = session.get(url, stream=True)
 106             code = response.status_code
 107             log.append((u"INFO", f"    {code}: {responses[code]}"))
 108             if code != codes[u"OK"]:
 109                 return False, file_name
 110             file_name = file_name.replace(u"_info", u"")
 111
 112         dst_file_name = file_name.replace(u".gz", u"")
 113         log.append(
 114             (u"INFO", f"    Downloading the file {url} to {dst_file_name} ...")
 115         )
 116         with open(dst_file_name, u"wb") as file_handle:
 117             for chunk in response.iter_content(chunk_size=CHUNK_SIZE):
 118                 if chunk:
 119                     file_handle.write(chunk)
 120
 121         if arch and u".gz" in file_name:
 122             if session:
 123                 session.close()
 124             log.append(
 125                 (u"INFO", f"    Downloading the file {url} to {file_name} ...")
 126             )
 127             session = requests_retry_session()
 128             response = session.get(url, stream=True)
 129             if response.status_code == codes[u"OK"]:
 130                 with open(file_name, u"wb") as file_handle:
 131                     file_handle.write(response.raw.read())
 132             else:
 133                 log.append(
 134                     (u"ERROR", f"Not possible to download the file {url} to "
 135                                f"{file_name} ...")
 136                 )
 137
 138         success = True
 139     except RequestException as err:
 140         log.append(
 141             (u"ERROR", f"HTTP Request exception:\n{repr(err)}")
 142         )
 143     except (IOError, ValueError, KeyError) as err:
 144         log.append((u"ERROR", f"Download failed.\n{repr(err)}"))
 145     finally:
 146         if session:
 147             session.close()
 148
 149     log.append((u"INFO", u"    Download finished."))
 150     return success, file_name
 151
 152
 153 def _unzip_file(spec, build, pid, log):
 154     """Unzip downloaded source file.
 155
 156     :param spec: Specification read form the specification file.
 157     :param build: Information about the build.
 158     :param log: List of log messages.
 159     :type spec: Specification
 160     :type build: dict
 161     :type log: list of tuples (severity, msg)
 162     :returns: True if the download was successful, otherwise False.
 163     :rtype: bool
 164     """
 165
 166     file_name = build[u"file-name"]
 167     if u".zip" in file_name:
 168         data_file = spec.input[u"zip-extract"]
 169     else:
 170         data_file = spec.input[u"extract"]
 171
 172     directory = spec.environment[u"paths"][u"DIR[WORKING,DATA]"]
 173     tmp_dir = join(directory, str(pid))
 174     try:
 175         mkdir(tmp_dir)
 176     except OSError:
 177         pass
 178     new_name = \
 179         f"{file_name.rsplit(u'.')[-2]}{SEPARATOR}{data_file.split(u'/')[-1]}"
 180
 181     log.append((u"INFO", f"    Unzipping: {data_file} from {file_name}."))
 182     try:
 183         with ZipFile(file_name, u'r') as zip_file:
 184             zip_file.extract(data_file, tmp_dir)
 185         log.append(
 186             (u"INFO", f"    Renaming the file {join(tmp_dir, data_file)} to "
 187                       f"{new_name}")
 188         )
 189         rename(join(tmp_dir, data_file), new_name)
 190         build[u"file-name"] = new_name
 191         return True
 192     except (BadZipfile, RuntimeError) as err:
 193         log.append(
 194             (u"ERROR", f"Failed to unzip the file {file_name}: {repr(err)}.")
 195         )
 196         return False
 197     except OSError as err:
 198         log.append(
 199             (u"ERROR", f"Failed to rename the file {data_file}: {repr(err)}.")
 200         )
 201         return False
 202
 203
 204 def download_and_unzip_data_file(spec, job, build, pid, log):
 205     """Download and unzip a source file.
 206
 207     :param spec: Specification read form the specification file.
 208     :param job: Name of the Jenkins job.
 209     :param build: Information about the build.
 210     :param pid: PID of the process executing this method.
 211     :param log: List of log messages.
 212     :type spec: Specification
 213     :type job: str
 214     :type build: dict
 215     :type pid: int
 216     :type log: list of tuples (severity, msg)
 217     :returns: True if the download was successful, otherwise False.
 218     :rtype: bool
 219     """
 220
 221     # Try to download .gz from logs.fd.io
 222
 223     file_name = spec.input[u"file-name"]
 224     url = u"{0}/{1}".format(
 225         spec.environment[u'urls'][u'URL[NEXUS,LOG]'],
 226         spec.input[u'download-path'].format(
 227             job=job, build=build[u'build'], filename=file_name
 228         )
 229     )
 230     new_name = join(
 231         spec.environment[u"paths"][u"DIR[WORKING,DATA]"],
 232         f"{job}{SEPARATOR}{build[u'build']}{SEPARATOR}{file_name}"
 233     )
 234
 235     logging.info(f"Trying to download {url}")
 236
 237     arch = bool(spec.configuration.get(u"archive-inputs", True))
 238     success, downloaded_name = _download_file(url, new_name, log, arch=arch)
 239
 240     if not success:
 241
 242         # Try to download .gz or .zip from docs.fd.io
 243         file_name = (spec.input[u"file-name"], spec.input[u"zip-file-name"])
 244         release = re.search(REGEX_RELEASE, job).group(2)
 245         for idx, rls in enumerate((release, u"master", )):
 246             try:
 247                 rls = f"rls{int(rls)}"
 248             except ValueError:
 249                 # It is master
 250                 pass
 251             url = (
 252                 f"{spec.environment[u'urls'][u'URL[NEXUS,DOC]']}/"
 253                 f"{rls}/"
 254                 f"{spec.environment[u'urls'][u'DIR[NEXUS,DOC]']}/"
 255                 f"{job}{SEPARATOR}{build[u'build']}{SEPARATOR}{file_name[idx]}"
 256             )
 257
 258             logging.info(f"Downloading {url}")
 259
 260             new_name = join(
 261                 spec.environment[u"paths"][u"DIR[WORKING,DATA]"],
 262                 f"{job}{SEPARATOR}{build[u'build']}{SEPARATOR}{file_name[idx]}"
 263             )
 264             success, downloaded_name = _download_file(
 265                 url, new_name, log, arch=arch
 266             )
 267             if success:
 268                 file_name = file_name[idx]
 269                 if file_name.endswith(u".gz"):
 270                     with gzip.open(downloaded_name[:-3], u"rb") as gzip_file:
 271                         file_content = gzip_file.read()
 272                     with open(downloaded_name[:-3], u"wb") as xml_file:
 273                         xml_file.write(file_content)
 274                 break
 275
 276     if not success:
 277
 278         # Try to download .zip from jenkins.fd.io
 279         file_name = spec.input[u"zip-file-name"]
 280         download_path = spec.input[u"zip-download-path"]
 281         if job.startswith(u"csit-"):
 282             url = spec.environment[u"urls"][u"URL[JENKINS,CSIT]"]
 283         elif job.startswith(u"hc2vpp-"):
 284             url = spec.environment[u"urls"][u"URL[JENKINS,HC]"]
 285         else:
 286             raise PresentationError(f"No url defined for the job {job}.")
 287
 288         full_name = download_path.format(
 289             job=job, build=build[u"build"], filename=file_name
 290         )
 291         url = u"{0}/{1}".format(url, full_name)
 292         new_name = join(
 293             spec.environment[u"paths"][u"DIR[WORKING,DATA]"],
 294             f"{job}{SEPARATOR}{build[u'build']}{SEPARATOR}{file_name}"
 295         )
 296
 297         logging.info(f"Downloading {url}")
 298
 299         success, downloaded_name = _download_file(url, new_name, log)
 300
 301     if success and downloaded_name.endswith(u".zip"):
 302         if not is_zipfile(downloaded_name):
 303             log.append((u"ERROR", f"Zip file {new_name} is corrupted."))
 304             success = False
 305
 306     if success:
 307         build[u"file-name"] = downloaded_name
 308
 309         if file_name.endswith(u".gz"):
 310             build[u"file-name"] = downloaded_name[:-3]
 311
 312         if downloaded_name.endswith(u".zip"):
 313             success = _unzip_file(spec, build, pid, log)
 314
 315     return success