resources/tools/presentation/input_data_files.py

   1 # Copyright (c) 2020 Cisco and/or its affiliates.
   2 # Licensed under the Apache License, Version 2.0 (the "License");
   3 # you may not use this file except in compliance with the License.
   4 # You may obtain a copy of the License at:
   5 #
   6 #     http://www.apache.org/licenses/LICENSE-2.0
   7 #
   8 # Unless required by applicable law or agreed to in writing, software
   9 # distributed under the License is distributed on an "AS IS" BASIS,
  10 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  11 # See the License for the specific language governing permissions and
  12 # limitations under the License.
  13
  14 """Inputs
  15 Download all data.
  16 """
  17
  18 import re
  19 import logging
  20 import gzip
  21
  22 from os import rename, mkdir
  23 from os.path import join
  24 from http.client import responses
  25 from zipfile import ZipFile, is_zipfile, BadZipfile
  26
  27 import requests
  28
  29 from requests.adapters import HTTPAdapter, Retry
  30 from requests.exceptions import RequestException
  31 from requests import codes
  32
  33 from pal_errors import PresentationError
  34
  35
  36 # Chunk size used for file download
  37 CHUNK_SIZE = 512
  38
  39 # Separator used in file names
  40 SEPARATOR = u"__"
  41
  42 REGEX_RELEASE = re.compile(r'(\D*)(\d{4}|master)(\D*)')
  43
  44
  45 def _download_file(url, file_name, arch=False):
  46     """Download a file with input data.
  47
  48     :param url: URL to the file to download.
  49     :param file_name: Name of file to download.
  50     :param arch: If True, also .gz file is downloaded
  51     :type url: str
  52     :type file_name: str
  53     :type arch: bool
  54     :returns: True if the download was successful, otherwise False.
  55     :rtype: bool
  56     """
  57
  58     def requests_retry_session(retries=3,
  59                                backoff_factor=0.3,
  60                                status_forcelist=(500, 502, 504)):
  61         """
  62
  63         :param retries: Total number of retries to allow.
  64         :param backoff_factor: A backoff factor to apply between attempts after
  65             the second try.
  66         :param status_forcelist: A set of integer HTTP status codes that are
  67             forced to retry.
  68         :type retries: int
  69         :type backoff_factor: float
  70         :type status_forcelist: iterable
  71         :returns: Session object.
  72         :rtype: requests.Session
  73         """
  74
  75         retry = Retry(
  76             total=retries,
  77             read=retries,
  78             connect=retries,
  79             backoff_factor=backoff_factor,
  80             status_forcelist=status_forcelist,
  81         )
  82         adapter = HTTPAdapter(max_retries=retry)
  83         session = requests.Session()
  84         session.mount(u"http://", adapter)
  85         session.mount(u"https://", adapter)
  86         return session
  87
  88     success = False
  89     session = None
  90     try:
  91         logging.info(f"    Connecting to {url} ...")
  92         session = requests_retry_session()
  93         response = session.get(url, stream=True)
  94         code = response.status_code
  95         logging.info(f"    {code}: {responses[code]}")
  96
  97         if code != codes[u"OK"]:
  98             if session:
  99                 session.close()
 100             url = url.replace(u"_info", u"")
 101             logging.info(f"    Connecting to {url} ...")
 102             session = requests_retry_session()
 103             response = session.get(url, stream=True)
 104             code = response.status_code
 105             logging.info(f"    {code}: {responses[code]}")
 106             if code != codes[u"OK"]:
 107                 return False, file_name
 108             file_name = file_name.replace(u"_info", u"")
 109
 110         dst_file_name = file_name.replace(u".gz", u"")
 111         logging.info(f"    Downloading the file {url} to {dst_file_name} ...")
 112         with open(dst_file_name, u"wb") as file_handle:
 113             for chunk in response.iter_content(chunk_size=CHUNK_SIZE):
 114                 if chunk:
 115                     file_handle.write(chunk)
 116
 117         if arch and u".gz" in file_name:
 118             if session:
 119                 session.close()
 120             logging.info(f"    Downloading the file {url} to {file_name} ...")
 121             session = requests_retry_session()
 122             response = session.get(url, stream=True)
 123             if response.status_code == codes[u"OK"]:
 124                 with open(file_name, u"wb") as file_handle:
 125                     file_handle.write(response.raw.read())
 126             else:
 127                 logging.error(
 128                     f"Not possible to download the file {url} to {file_name}"
 129                 )
 130
 131         success = True
 132     except RequestException as err:
 133         logging.error(f"HTTP Request exception:\n{repr(err)}")
 134     except (IOError, ValueError, KeyError) as err:
 135         logging.error(f"Download failed.\n{repr(err)}")
 136     finally:
 137         if session:
 138             session.close()
 139
 140     logging.info(u"    Download finished.")
 141     return success, file_name
 142
 143
 144 def _unzip_file(spec, build, pid):
 145     """Unzip downloaded source file.
 146
 147     :param spec: Specification read form the specification file.
 148     :param build: Information about the build.
 149     :type spec: Specification
 150     :type build: dict
 151     :returns: True if the download was successful, otherwise False.
 152     :rtype: bool
 153     """
 154
 155     file_name = build[u"file-name"]
 156     if u".zip" in file_name:
 157         data_file = spec.input[u"zip-extract"]
 158     else:
 159         data_file = spec.input[u"extract"]
 160
 161     directory = spec.environment[u"paths"][u"DIR[WORKING,DATA]"]
 162     tmp_dir = join(directory, str(pid))
 163     try:
 164         mkdir(tmp_dir)
 165     except OSError:
 166         pass
 167     new_name = \
 168         f"{file_name.rsplit(u'.')[-2]}{SEPARATOR}{data_file.split(u'/')[-1]}"
 169
 170     logging.info(f"    Unzipping: {data_file} from {file_name}.")
 171     try:
 172         with ZipFile(file_name, u'r') as zip_file:
 173             zip_file.extract(data_file, tmp_dir)
 174         logging.info(
 175             f"    Renaming the file {join(tmp_dir, data_file)} to {new_name}"
 176         )
 177         rename(join(tmp_dir, data_file), new_name)
 178         build[u"file-name"] = new_name
 179         return True
 180     except (BadZipfile, RuntimeError) as err:
 181         logging.error(f"Failed to unzip the file {file_name}: {repr(err)}.")
 182         return False
 183     except OSError as err:
 184         logging.error(f"Failed to rename the file {data_file}: {repr(err)}.")
 185         return False
 186
 187
 188 def download_and_unzip_data_file(spec, job, build, pid):
 189     """Download and unzip a source file.
 190
 191     :param spec: Specification read form the specification file.
 192     :param job: Name of the Jenkins job.
 193     :param build: Information about the build.
 194     :param pid: PID of the process executing this method.
 195     :type spec: Specification
 196     :type job: str
 197     :type build: dict
 198     :type pid: int
 199     :returns: True if the download was successful, otherwise False.
 200     :rtype: bool
 201     """
 202
 203     # Try to download .gz from logs.fd.io
 204
 205     file_name = spec.input[u"file-name"]
 206     url = u"{0}/{1}".format(
 207         spec.environment[u'urls'][u'URL[NEXUS,LOG]'],
 208         spec.input[u'download-path'].format(
 209             job=job, build=build[u'build'], filename=file_name
 210         )
 211     )
 212     new_name = join(
 213         spec.environment[u"paths"][u"DIR[WORKING,DATA]"],
 214         f"{job}{SEPARATOR}{build[u'build']}{SEPARATOR}{file_name}"
 215     )
 216
 217     logging.info(f"Trying to download {url}")
 218
 219     arch = bool(spec.configuration.get(u"archive-inputs", True))
 220     success, downloaded_name = _download_file(url, new_name, arch=arch)
 221
 222     if not success:
 223
 224         # Try to download .gz or .zip from docs.fd.io
 225         file_name = (spec.input[u"file-name"], spec.input[u"zip-file-name"])
 226         release = re.search(REGEX_RELEASE, job).group(2)
 227         for idx, rls in enumerate((release, u"master", )):
 228             try:
 229                 rls = f"rls{int(rls)}"
 230             except ValueError:
 231                 # It is master
 232                 pass
 233             url = (
 234                 f"{spec.environment[u'urls'][u'URL[NEXUS,DOC]']}/"
 235                 f"{rls}/"
 236                 f"{spec.environment[u'urls'][u'DIR[NEXUS,DOC]']}/"
 237                 f"{job}{SEPARATOR}{build[u'build']}{SEPARATOR}{file_name[idx]}"
 238             )
 239
 240             logging.info(f"Downloading {url}")
 241
 242             new_name = join(
 243                 spec.environment[u"paths"][u"DIR[WORKING,DATA]"],
 244                 f"{job}{SEPARATOR}{build[u'build']}{SEPARATOR}{file_name[idx]}"
 245             )
 246             success, downloaded_name = _download_file(url, new_name, arch=arch)
 247             if success:
 248                 file_name = file_name[idx]
 249                 if file_name.endswith(u".gz"):
 250                     with gzip.open(downloaded_name[:-3], u"rb") as gzip_file:
 251                         file_content = gzip_file.read()
 252                     with open(downloaded_name[:-3], u"wb") as xml_file:
 253                         xml_file.write(file_content)
 254                 break
 255
 256     if not success:
 257
 258         # Try to download .zip from jenkins.fd.io
 259         file_name = spec.input[u"zip-file-name"]
 260         download_path = spec.input[u"zip-download-path"]
 261         if job.startswith(u"csit-"):
 262             url = spec.environment[u"urls"][u"URL[JENKINS,CSIT]"]
 263         elif job.startswith(u"hc2vpp-"):
 264             url = spec.environment[u"urls"][u"URL[JENKINS,HC]"]
 265         else:
 266             raise PresentationError(f"No url defined for the job {job}.")
 267
 268         full_name = download_path.format(
 269             job=job, build=build[u"build"], filename=file_name
 270         )
 271         url = u"{0}/{1}".format(url, full_name)
 272         new_name = join(
 273             spec.environment[u"paths"][u"DIR[WORKING,DATA]"],
 274             f"{job}{SEPARATOR}{build[u'build']}{SEPARATOR}{file_name}"
 275         )
 276
 277         logging.info(f"Downloading {url}")
 278
 279         success, downloaded_name = _download_file(url, new_name)
 280
 281     if success and downloaded_name.endswith(u".zip"):
 282         if not is_zipfile(downloaded_name):
 283             logging.error(f"Zip file {new_name} is corrupted.")
 284             success = False
 285
 286     if success:
 287         build[u"file-name"] = downloaded_name
 288
 289         if file_name.endswith(u".gz"):
 290             build[u"file-name"] = downloaded_name[:-3]
 291
 292         if downloaded_name.endswith(u".zip"):
 293             success = _unzip_file(spec, build, pid)
 294
 295     return success