PAL: Archiving of input data
[csit.git] / resources / tools / presentation / input_data_files.py
1 # Copyright (c) 2018 Cisco and/or its affiliates.
2 # Licensed under the Apache License, Version 2.0 (the "License");
3 # you may not use this file except in compliance with the License.
4 # You may obtain a copy of the License at:
5 #
6 #     http://www.apache.org/licenses/LICENSE-2.0
7 #
8 # Unless required by applicable law or agreed to in writing, software
9 # distributed under the License is distributed on an "AS IS" BASIS,
10 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 # See the License for the specific language governing permissions and
12 # limitations under the License.
13
14 """Inputs
15 Download all data.
16 """
17
18 import re
19 import requests
20 import logging
21
22 from os import rename, mkdir
23 from os.path import join
24 from zipfile import ZipFile, is_zipfile, BadZipfile
25 from httplib import responses
26 from requests.adapters import HTTPAdapter
27 from requests.packages.urllib3.util.retry import Retry
28 from requests import codes, RequestException, Timeout, TooManyRedirects, \
29     HTTPError, ConnectionError
30
31 from errors import PresentationError
32
33
34 # Chunk size used for file download
35 CHUNK_SIZE = 512
36
37 # Separator used in file names
38 SEPARATOR = "__"
39
40 REGEX_RELEASE = re.compile(r'(\D*)(\d{4}|master)(\D*)')
41
42
43 def _download_file(url, file_name, log, arch=False):
44     """Download a file with input data.
45
46     :param url: URL to the file to download.
47     :param file_name: Name of file to download.
48     :param log: List of log messages.
49     :param arch: If True, also .gz file is downloaded
50     :type url: str
51     :type file_name: str
52     :type log: list of tuples (severity, msg)
53     :type arch: bool
54     :returns: True if the download was successful, otherwise False.
55     :rtype: bool
56     """
57
58     def requests_retry_session(retries=3,
59                                backoff_factor=0.3,
60                                status_forcelist=(500, 502, 504)):
61         """
62
63         :param retries: Total number of retries to allow.
64         :param backoff_factor: A backoff factor to apply between attempts after
65             the second try.
66         :param status_forcelist: A set of integer HTTP status codes that are
67             forced to retry.
68         :type retries: int
69         :type backoff_factor: float
70         :type status_forcelist: iterable
71         :returns: Session object.
72         :rtype: requests.Session
73         """
74
75         retry = Retry(
76             total=retries,
77             read=retries,
78             connect=retries,
79             backoff_factor=backoff_factor,
80             status_forcelist=status_forcelist,
81         )
82         adapter = HTTPAdapter(max_retries=retry)
83         session = requests.Session()
84         session.mount('http://', adapter)
85         session.mount('https://', adapter)
86         return session
87
88     success = False
89     try:
90         log.append(("INFO", "    Connecting to '{0}' ...".format(url)))
91         session = requests_retry_session()
92         response = session.get(url, stream=True)
93         code = response.status_code
94         log.append(("INFO", "    {0}: {1}".format(code, responses[code])))
95
96         if code != codes["OK"]:
97             if session:
98                 session.close()
99             url = url.replace("_info", "")
100             log.append(("INFO", "    Connecting to '{0}' ...".format(url)))
101             session = requests_retry_session()
102             response = session.get(url, stream=True)
103             code = response.status_code
104             log.append(("INFO", "    {0}: {1}".format(code, responses[code])))
105             if code != codes["OK"]:
106                 return False, file_name
107             file_name = file_name.replace("_info", "")
108
109         dst_file_name = file_name.replace(".gz", "")
110         log.append(("INFO", "    Downloading the file '{0}' to '{1}' ...".
111                     format(url, dst_file_name)))
112         with open(dst_file_name, "wb") as file_handle:
113             for chunk in response.iter_content(chunk_size=CHUNK_SIZE):
114                 if chunk:
115                     file_handle.write(chunk)
116
117         if arch and ".gz" in file_name:
118             if session:
119                 session.close()
120             log.append(("INFO", "    Downloading the file '{0}' to '{1}' ...".
121                         format(url, file_name)))
122             session = requests_retry_session()
123             response = session.get(url, stream=True)
124             if response.status_code == codes["OK"]:
125                 with open(file_name, "wb") as file_handle:
126                     file_handle.write(response.raw.read())
127             else:
128                 log.append(("ERROR", "Not possible to download the file '{0}' "
129                                      "to '{1}' ...".format(url, file_name)))
130
131         success = True
132     except ConnectionError as err:
133         log.append(("ERROR", "Not possible to connect to '{0}'.".format(url)))
134         log.append(("DEBUG", repr(err)))
135     except HTTPError as err:
136         log.append(("ERROR", "Invalid HTTP response from '{0}'.".format(url)))
137         log.append(("DEBUG", repr(err)))
138     except TooManyRedirects as err:
139         log.append(("ERROR", "Request exceeded the configured number "
140                              "of maximum re-directions."))
141         log.append(("DEBUG", repr(err)))
142     except Timeout as err:
143         log.append(("ERROR", "Request timed out."))
144         log.append(("DEBUG", repr(err)))
145     except RequestException as err:
146         log.append(("ERROR", "Unexpected HTTP request exception."))
147         log.append(("DEBUG", repr(err)))
148     except (IOError, ValueError, KeyError) as err:
149         log.append(("ERROR", "Download failed."))
150         log.append(("DEBUG", repr(err)))
151     finally:
152         if session:
153             session.close()
154
155     log.append(("INFO", "    Download finished."))
156     return success, file_name
157
158
159 def _unzip_file(spec, build, pid, log):
160     """Unzip downloaded source file.
161
162     :param spec: Specification read form the specification file.
163     :param build: Information about the build.
164     :param log: List of log messages.
165     :type spec: Specification
166     :type build: dict
167     :type log: list of tuples (severity, msg)
168     :returns: True if the download was successful, otherwise False.
169     :rtype: bool
170     """
171
172     file_name = build["file-name"]
173     if ".zip" in file_name:
174         data_file = spec.input["zip-extract"]
175     else:
176         data_file = spec.input["extract"]
177
178     directory = spec.environment["paths"]["DIR[WORKING,DATA]"]
179     tmp_dir = join(directory, str(pid))
180     try:
181         mkdir(tmp_dir)
182     except OSError:
183         pass
184     new_name = "{0}{1}{2}".format(file_name.rsplit('.')[-2],
185                                   SEPARATOR,
186                                   data_file.split("/")[-1])
187
188     log.append(("INFO", "    Unzipping: '{0}' from '{1}'.".
189                 format(data_file, file_name)))
190     try:
191         with ZipFile(file_name, 'r') as zip_file:
192             zip_file.extract(data_file, tmp_dir)
193         log.append(("INFO", "    Renaming the file '{0}' to '{1}'".
194                     format(join(tmp_dir, data_file), new_name)))
195         rename(join(tmp_dir, data_file), new_name)
196         build["file-name"] = new_name
197         return True
198     except (BadZipfile, RuntimeError) as err:
199         log.append(("ERROR", "Failed to unzip the file '{0}': {1}.".
200                     format(file_name, str(err))))
201         return False
202     except OSError as err:
203         log.append(("ERROR", "Failed to rename the file '{0}': {1}.".
204                     format(data_file, str(err))))
205         return False
206
207
208 def download_and_unzip_data_file(spec, job, build, pid, log):
209     """Download and unzip a source file.
210
211     :param spec: Specification read form the specification file.
212     :param job: Name of the Jenkins job.
213     :param build: Information about the build.
214     :param pid: PID of the process executing this method.
215     :param log: List of log messages.
216     :type spec: Specification
217     :type job: str
218     :type build: dict
219     :type pid: int
220     :type log: list of tuples (severity, msg)
221     :returns: True if the download was successful, otherwise False.
222     :rtype: bool
223     """
224
225     # Try to download .gz from logs.fd.io
226
227     file_name = spec.input["file-name"]
228     url = "{0}/{1}".format(
229         spec.environment["urls"]["URL[NEXUS,LOG]"],
230         spec.input["download-path"].format(
231             job=job, build=build["build"], filename=file_name))
232     new_name = join(spec.environment["paths"]["DIR[WORKING,DATA]"],
233                     "{job}{sep}{build}{sep}{name}".format(
234                         job=job, sep=SEPARATOR, build=build["build"],
235                         name=file_name))
236
237     logging.info("Trying to download {0}".format(url))
238
239     arch = True if spec.configuration.get("archive-inputs", True) else False
240     success, downloaded_name = _download_file(url, new_name, log, arch=arch)
241
242     if not success:
243
244         # Try to download .gz from docs.fd.io
245
246         file_name = spec.input["file-name"]
247         url = "{0}/{1}".format(
248             spec.environment["urls"]["URL[NEXUS,DOC]"],
249             spec.input["download-path"].format(
250                 job=job, build=build["build"], filename=file_name))
251         new_name = join(spec.environment["paths"]["DIR[WORKING,DATA]"],
252                         "{job}{sep}{build}{sep}{name}".format(
253                             job=job, sep=SEPARATOR, build=build["build"],
254                             name=file_name))
255
256         logging.info("Downloading {0}".format(url))
257
258         if spec.configuration.get("archive-inputs", True):
259             arch = True
260         success, downloaded_name = _download_file(url, new_name, log, arch=arch)
261
262     if not success:
263
264         # Try to download .zip from docs.fd.io
265
266         file_name = spec.input["zip-file-name"]
267         new_name = join(spec.environment["paths"]["DIR[WORKING,DATA]"],
268                         "{job}{sep}{build}{sep}{name}".format(
269                             job=job, sep=SEPARATOR, build=build["build"],
270                             name=file_name))
271         release = re.search(REGEX_RELEASE, job).group(2)
272         for rls in (release, "master"):
273             nexus_file_name = "{job}{sep}{build}{sep}{name}". \
274                 format(job=job, sep=SEPARATOR, build=build["build"],
275                        name=file_name)
276             try:
277                 rls = "rls{0}".format(int(rls))
278             except ValueError:
279                 # It is 'master'
280                 pass
281             url = "{url}/{release}/{dir}/{file}". \
282                 format(url=spec.environment["urls"]["URL[NEXUS,DOC]"],
283                        release=rls,
284                        dir=spec.environment["urls"]["DIR[NEXUS,DOC]"],
285                        file=nexus_file_name)
286
287             logging.info("Downloading {0}".format(url))
288
289             success, downloaded_name = _download_file(url, new_name, log)
290             if success:
291                 break
292
293     if not success:
294
295         # Try to download .zip from jenkins.fd.io
296
297         file_name = spec.input["zip-file-name"]
298         download_path = spec.input["zip-download-path"]
299         if job.startswith("csit-"):
300             url = spec.environment["urls"]["URL[JENKINS,CSIT]"]
301         elif job.startswith("hc2vpp-"):
302             url = spec.environment["urls"]["URL[JENKINS,HC]"]
303         else:
304             raise PresentationError(
305                 "No url defined for the job '{}'.".format(job))
306
307         full_name = download_path.format(
308             job=job, build=build["build"], filename=file_name)
309         url = "{0}/{1}".format(url, full_name)
310         new_name = join(spec.environment["paths"]["DIR[WORKING,DATA]"],
311                         "{job}{sep}{build}{sep}{name}".
312                         format(job=job, sep=SEPARATOR, build=build["build"],
313                                name=file_name))
314
315         logging.info("Downloading {0}".format(url))
316
317         success, downloaded_name = _download_file(url, new_name, log)
318
319     if success and downloaded_name.endswith(".zip"):
320         if not is_zipfile(downloaded_name):
321             log.append(("ERROR",
322                         "Zip file '{0}' is corrupted.".format(new_name)))
323             success = False
324
325     if success:
326         build["file-name"] = downloaded_name
327
328         if file_name.endswith(".gz"):
329             build["file-name"] = downloaded_name[:-3]
330
331         if downloaded_name.endswith(".zip"):
332             success = _unzip_file(spec, build, pid, log)
333
334     return success