Report: Download first from logs.fd.io
[csit.git] / resources / tools / presentation / input_data_files.py
1 # Copyright (c) 2018 Cisco and/or its affiliates.
2 # Licensed under the Apache License, Version 2.0 (the "License");
3 # you may not use this file except in compliance with the License.
4 # You may obtain a copy of the License at:
5 #
6 #     http://www.apache.org/licenses/LICENSE-2.0
7 #
8 # Unless required by applicable law or agreed to in writing, software
9 # distributed under the License is distributed on an "AS IS" BASIS,
10 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 # See the License for the specific language governing permissions and
12 # limitations under the License.
13
14 """Inputs
15 Download all data.
16 """
17
18 import re
19 import requests
20 import logging
21
22 from os import rename, mkdir
23 from os.path import join
24 from zipfile import ZipFile, is_zipfile, BadZipfile
25 from httplib import responses
26 from requests.adapters import HTTPAdapter
27 from requests.packages.urllib3.util.retry import Retry
28 from requests import codes, RequestException, Timeout, TooManyRedirects, \
29     HTTPError, ConnectionError
30
31 from errors import PresentationError
32
33
34 # Chunk size used for file download
35 CHUNK_SIZE = 512
36
37 # Separator used in file names
38 SEPARATOR = "__"
39
40 REGEX_RELEASE = re.compile(r'(\D*)(\d{4}|master)(\D*)')
41
42
43 def _download_file(url, file_name, log, arch=False):
44     """Download a file with input data.
45
46     :param url: URL to the file to download.
47     :param file_name: Name of file to download.
48     :param log: List of log messages.
49     :param arch: If True, also .gz file is downloaded
50     :type url: str
51     :type file_name: str
52     :type log: list of tuples (severity, msg)
53     :type arch: bool
54     :returns: True if the download was successful, otherwise False.
55     :rtype: bool
56     """
57
58     def requests_retry_session(retries=3,
59                                backoff_factor=0.3,
60                                status_forcelist=(500, 502, 504)):
61         """
62
63         :param retries: Total number of retries to allow.
64         :param backoff_factor: A backoff factor to apply between attempts after
65             the second try.
66         :param status_forcelist: A set of integer HTTP status codes that are
67             forced to retry.
68         :type retries: int
69         :type backoff_factor: float
70         :type status_forcelist: iterable
71         :returns: Session object.
72         :rtype: requests.Session
73         """
74
75         retry = Retry(
76             total=retries,
77             read=retries,
78             connect=retries,
79             backoff_factor=backoff_factor,
80             status_forcelist=status_forcelist,
81         )
82         adapter = HTTPAdapter(max_retries=retry)
83         session = requests.Session()
84         session.mount('http://', adapter)
85         session.mount('https://', adapter)
86         return session
87
88     success = False
89     try:
90         log.append(("INFO", "    Connecting to '{0}' ...".format(url)))
91         response = requests_retry_session().get(url, stream=True)
92         code = response.status_code
93         log.append(("INFO", "    {0}: {1}".format(code, responses[code])))
94
95         if code != codes["OK"]:
96             url = url.replace("_info", "")
97             log.append(("INFO", "    Connecting to '{0}' ...".format(url)))
98             response = requests_retry_session().get(url, stream=True)
99             code = response.status_code
100             log.append(("INFO", "    {0}: {1}".format(code, responses[code])))
101             if code != codes["OK"]:
102                 return False, file_name
103             file_name = file_name.replace("_info", "")
104
105         log.append(("INFO", "    Downloading the file '{0}' to '{1}' ...".
106                     format(url, file_name)))
107
108         dst_file_name = file_name.replace(".gz", "")
109         with open(dst_file_name, "wb") as file_handle:
110             for chunk in response.iter_content(chunk_size=CHUNK_SIZE):
111                 if chunk:
112                     file_handle.write(chunk)
113
114         if arch and ".gz" in file_name:
115             with open(file_name, "wb") as file_handle:
116                 file_handle.write(response.raw.read())
117
118         success = True
119     except ConnectionError as err:
120         log.append(("ERROR", "Not possible to connect to '{0}'.".format(url)))
121         log.append(("DEBUG", repr(err)))
122     except HTTPError as err:
123         log.append(("ERROR", "Invalid HTTP response from '{0}'.".format(url)))
124         log.append(("DEBUG", repr(err)))
125     except TooManyRedirects as err:
126         log.append(("ERROR", "Request exceeded the configured number "
127                              "of maximum re-directions."))
128         log.append(("DEBUG", repr(err)))
129     except Timeout as err:
130         log.append(("ERROR", "Request timed out."))
131         log.append(("DEBUG", repr(err)))
132     except RequestException as err:
133         log.append(("ERROR", "Unexpected HTTP request exception."))
134         log.append(("DEBUG", repr(err)))
135     except (IOError, ValueError, KeyError) as err:
136         log.append(("ERROR", "Download failed."))
137         log.append(("DEBUG", repr(err)))
138
139     log.append(("INFO", "    Download finished."))
140     return success, file_name
141
142
143 def _unzip_file(spec, build, pid, log):
144     """Unzip downloaded source file.
145
146     :param spec: Specification read form the specification file.
147     :param build: Information about the build.
148     :param log: List of log messages.
149     :type spec: Specification
150     :type build: dict
151     :type log: list of tuples (severity, msg)
152     :returns: True if the download was successful, otherwise False.
153     :rtype: bool
154     """
155
156     file_name = build["file-name"]
157     if ".zip" in file_name:
158         data_file = spec.input["zip-extract"]
159     else:
160         data_file = spec.input["extract"]
161
162     directory = spec.environment["paths"]["DIR[WORKING,DATA]"]
163     tmp_dir = join(directory, str(pid))
164     try:
165         mkdir(tmp_dir)
166     except OSError:
167         pass
168     new_name = "{0}{1}{2}".format(file_name.rsplit('.')[-2],
169                                   SEPARATOR,
170                                   data_file.split("/")[-1])
171
172     log.append(("INFO", "    Unzipping: '{0}' from '{1}'.".
173                 format(data_file, file_name)))
174     try:
175         with ZipFile(file_name, 'r') as zip_file:
176             zip_file.extract(data_file, tmp_dir)
177         log.append(("INFO", "    Renaming the file '{0}' to '{1}'".
178                     format(join(tmp_dir, data_file), new_name)))
179         rename(join(tmp_dir, data_file), new_name)
180         build["file-name"] = new_name
181         return True
182     except (BadZipfile, RuntimeError) as err:
183         log.append(("ERROR", "Failed to unzip the file '{0}': {1}.".
184                     format(file_name, str(err))))
185         return False
186     except OSError as err:
187         log.append(("ERROR", "Failed to rename the file '{0}': {1}.".
188                     format(data_file, str(err))))
189         return False
190
191
192 def download_and_unzip_data_file(spec, job, build, pid, log):
193     """Download and unzip a source file.
194
195     :param spec: Specification read form the specification file.
196     :param job: Name of the Jenkins job.
197     :param build: Information about the build.
198     :param pid: PID of the process executing this method.
199     :param log: List of log messages.
200     :type spec: Specification
201     :type job: str
202     :type build: dict
203     :type pid: int
204     :type log: list of tuples (severity, msg)
205     :returns: True if the download was successful, otherwise False.
206     :rtype: bool
207     """
208
209     # Try to download .gz from logs.fd.io
210
211     file_name = spec.input["file-name"]
212     url = "{0}/{1}".format(
213         spec.environment["urls"]["URL[NEXUS,LOG]"],
214         spec.input["download-path"].format(
215             job=job, build=build["build"], filename=file_name))
216     new_name = join(spec.environment["paths"]["DIR[WORKING,DATA]"],
217                     "{job}{sep}{build}{sep}{name}".format(
218                         job=job, sep=SEPARATOR, build=build["build"],
219                         name=file_name))
220
221     logging.info("Downloading {0}".format(url))
222
223     arch = True if spec.configuration.get("archive-inputs", True) else False
224     success, downloaded_name = _download_file(url, new_name, log, arch=arch)
225
226     if not success:
227
228         # Try to download .gz from docs.fd.io
229
230         file_name = spec.input["file-name"]
231         url = "{0}/{1}".format(
232             spec.environment["urls"]["URL[NEXUS,DOC]"],
233             spec.input["download-path"].format(
234                 job=job, build=build["build"], filename=file_name))
235         new_name = join(spec.environment["paths"]["DIR[WORKING,DATA]"],
236                         "{job}{sep}{build}{sep}{name}".format(
237                             job=job, sep=SEPARATOR, build=build["build"],
238                             name=file_name))
239
240         logging.info("Downloading {0}".format(url))
241
242         if spec.configuration.get("archive-inputs", True):
243             arch = True
244         success, downloaded_name = _download_file(url, new_name, log, arch=arch)
245
246     if not success:
247
248         # Try to download .zip from docs.fd.io
249
250         file_name = spec.input["zip-file-name"]
251         new_name = join(spec.environment["paths"]["DIR[WORKING,DATA]"],
252                         "{job}{sep}{build}{sep}{name}".format(
253                             job=job, sep=SEPARATOR, build=build["build"],
254                             name=file_name))
255         release = re.search(REGEX_RELEASE, job).group(2)
256         for rls in (release, "master"):
257             nexus_file_name = "{job}{sep}{build}{sep}{name}". \
258                 format(job=job, sep=SEPARATOR, build=build["build"],
259                        name=file_name)
260             try:
261                 rls = "rls{0}".format(int(rls))
262             except ValueError:
263                 # It is 'master'
264                 pass
265             url = "{url}/{release}/{dir}/{file}". \
266                 format(url=spec.environment["urls"]["URL[NEXUS,DOC]"],
267                        release=rls,
268                        dir=spec.environment["urls"]["DIR[NEXUS,DOC]"],
269                        file=nexus_file_name)
270
271             logging.info("Downloading {0}".format(url))
272
273             success, downloaded_name = _download_file(url, new_name, log)
274             if success:
275                 break
276
277     if not success:
278
279         # Try to download .zip from jenkins.fd.io
280
281         file_name = spec.input["zip-file-name"]
282         download_path = spec.input["zip-download-path"]
283         if job.startswith("csit-"):
284             url = spec.environment["urls"]["URL[JENKINS,CSIT]"]
285         elif job.startswith("hc2vpp-"):
286             url = spec.environment["urls"]["URL[JENKINS,HC]"]
287         else:
288             raise PresentationError(
289                 "No url defined for the job '{}'.".format(job))
290
291         full_name = download_path.format(
292             job=job, build=build["build"], filename=file_name)
293         url = "{0}/{1}".format(url, full_name)
294         new_name = join(spec.environment["paths"]["DIR[WORKING,DATA]"],
295                         "{job}{sep}{build}{sep}{name}".
296                         format(job=job, sep=SEPARATOR, build=build["build"],
297                                name=file_name))
298
299         logging.info("Downloading {0}".format(url))
300
301         success, downloaded_name = _download_file(url, new_name, log)
302
303     if success and downloaded_name.endswith(".zip"):
304         if not is_zipfile(downloaded_name):
305             log.append(("ERROR",
306                         "Zip file '{0}' is corrupted.".format(new_name)))
307             success = False
308
309     if success:
310         build["file-name"] = downloaded_name
311
312         if file_name.endswith(".gz"):
313             build["file-name"] = downloaded_name[:-3]
314
315         if downloaded_name.endswith(".zip"):
316             success = _unzip_file(spec, build, pid, log)
317
318     return success