PAL: Process Hoststack data
[csit.git] / resources / tools / presentation / input_data_files.py
1 # Copyright (c) 2018 Cisco and/or its affiliates.
2 # Licensed under the Apache License, Version 2.0 (the "License");
3 # you may not use this file except in compliance with the License.
4 # You may obtain a copy of the License at:
5 #
6 #     http://www.apache.org/licenses/LICENSE-2.0
7 #
8 # Unless required by applicable law or agreed to in writing, software
9 # distributed under the License is distributed on an "AS IS" BASIS,
10 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 # See the License for the specific language governing permissions and
12 # limitations under the License.
13
14 """Inputs
15 Download all data.
16 """
17
18 import re
19 import logging
20
21 from os import rename, mkdir
22 from os.path import join
23 from http.client import responses
24 from zipfile import ZipFile, is_zipfile, BadZipfile
25
26 import requests
27
28 from requests.adapters import HTTPAdapter, Retry
29 from requests.exceptions import RequestException
30 from requests import codes
31
32 from pal_errors import PresentationError
33
34
35 # Chunk size used for file download
36 CHUNK_SIZE = 512
37
38 # Separator used in file names
39 SEPARATOR = u"__"
40
41 REGEX_RELEASE = re.compile(r'(\D*)(\d{4}|master)(\D*)')
42
43
44 def _download_file(url, file_name, log, arch=False):
45     """Download a file with input data.
46
47     :param url: URL to the file to download.
48     :param file_name: Name of file to download.
49     :param log: List of log messages.
50     :param arch: If True, also .gz file is downloaded
51     :type url: str
52     :type file_name: str
53     :type log: list of tuples (severity, msg)
54     :type arch: bool
55     :returns: True if the download was successful, otherwise False.
56     :rtype: bool
57     """
58
59     def requests_retry_session(retries=3,
60                                backoff_factor=0.3,
61                                status_forcelist=(500, 502, 504)):
62         """
63
64         :param retries: Total number of retries to allow.
65         :param backoff_factor: A backoff factor to apply between attempts after
66             the second try.
67         :param status_forcelist: A set of integer HTTP status codes that are
68             forced to retry.
69         :type retries: int
70         :type backoff_factor: float
71         :type status_forcelist: iterable
72         :returns: Session object.
73         :rtype: requests.Session
74         """
75
76         retry = Retry(
77             total=retries,
78             read=retries,
79             connect=retries,
80             backoff_factor=backoff_factor,
81             status_forcelist=status_forcelist,
82         )
83         adapter = HTTPAdapter(max_retries=retry)
84         session = requests.Session()
85         session.mount(u"http://", adapter)
86         session.mount(u"https://", adapter)
87         return session
88
89     success = False
90     session = None
91     try:
92         log.append((u"INFO", f"    Connecting to {url} ..."))
93         session = requests_retry_session()
94         response = session.get(url, stream=True)
95         code = response.status_code
96         log.append((u"INFO", f"    {code}: {responses[code]}"))
97
98         if code != codes[u"OK"]:
99             if session:
100                 session.close()
101             url = url.replace(u"_info", u"")
102             log.append((u"INFO", f"    Connecting to {url} ..."))
103             session = requests_retry_session()
104             response = session.get(url, stream=True)
105             code = response.status_code
106             log.append((u"INFO", f"    {code}: {responses[code]}"))
107             if code != codes[u"OK"]:
108                 return False, file_name
109             file_name = file_name.replace(u"_info", u"")
110
111         dst_file_name = file_name.replace(u".gz", u"")
112         log.append(
113             (u"INFO", f"    Downloading the file {url} to {dst_file_name} ...")
114         )
115         with open(dst_file_name, u"wb") as file_handle:
116             for chunk in response.iter_content(chunk_size=CHUNK_SIZE):
117                 if chunk:
118                     file_handle.write(chunk)
119
120         if arch and u".gz" in file_name:
121             if session:
122                 session.close()
123             log.append(
124                 (u"INFO", f"    Downloading the file {url} to {file_name} ...")
125             )
126             session = requests_retry_session()
127             response = session.get(url, stream=True)
128             if response.status_code == codes[u"OK"]:
129                 with open(file_name, u"wb") as file_handle:
130                     file_handle.write(response.raw.read())
131             else:
132                 log.append(
133                     (u"ERROR", f"Not possible to download the file {url} to "
134                                f"{file_name} ...")
135                 )
136
137         success = True
138     except RequestException as err:
139         log.append(
140             (u"ERROR", f"HTTP Request exception:\n{repr(err)}")
141         )
142     except (IOError, ValueError, KeyError) as err:
143         log.append((u"ERROR", f"Download failed.\n{repr(err)}"))
144     finally:
145         if session:
146             session.close()
147
148     log.append((u"INFO", u"    Download finished."))
149     return success, file_name
150
151
152 def _unzip_file(spec, build, pid, log):
153     """Unzip downloaded source file.
154
155     :param spec: Specification read form the specification file.
156     :param build: Information about the build.
157     :param log: List of log messages.
158     :type spec: Specification
159     :type build: dict
160     :type log: list of tuples (severity, msg)
161     :returns: True if the download was successful, otherwise False.
162     :rtype: bool
163     """
164
165     file_name = build[u"file-name"]
166     if u".zip" in file_name:
167         data_file = spec.input[u"zip-extract"]
168     else:
169         data_file = spec.input[u"extract"]
170
171     directory = spec.environment[u"paths"][u"DIR[WORKING,DATA]"]
172     tmp_dir = join(directory, str(pid))
173     try:
174         mkdir(tmp_dir)
175     except OSError:
176         pass
177     new_name = \
178         f"{file_name.rsplit(u'.')[-2]}{SEPARATOR}{data_file.split(u'/')[-1]}"
179
180     log.append((u"INFO", f"    Unzipping: {data_file} from {file_name}."))
181     try:
182         with ZipFile(file_name, u'r') as zip_file:
183             zip_file.extract(data_file, tmp_dir)
184         log.append(
185             (u"INFO", f"    Renaming the file {join(tmp_dir, data_file)} to "
186                       f"{new_name}")
187         )
188         rename(join(tmp_dir, data_file), new_name)
189         build[u"file-name"] = new_name
190         return True
191     except (BadZipfile, RuntimeError) as err:
192         log.append(
193             (u"ERROR", f"Failed to unzip the file {file_name}: {repr(err)}.")
194         )
195         return False
196     except OSError as err:
197         log.append(
198             (u"ERROR", f"Failed to rename the file {data_file}: {repr(err)}.")
199         )
200         return False
201
202
203 def download_and_unzip_data_file(spec, job, build, pid, log):
204     """Download and unzip a source file.
205
206     :param spec: Specification read form the specification file.
207     :param job: Name of the Jenkins job.
208     :param build: Information about the build.
209     :param pid: PID of the process executing this method.
210     :param log: List of log messages.
211     :type spec: Specification
212     :type job: str
213     :type build: dict
214     :type pid: int
215     :type log: list of tuples (severity, msg)
216     :returns: True if the download was successful, otherwise False.
217     :rtype: bool
218     """
219
220     # Try to download .gz from logs.fd.io
221
222     file_name = spec.input[u"file-name"]
223     url = u"{0}/{1}".format(
224         spec.environment[u'urls'][u'URL[NEXUS,LOG]'],
225         spec.input[u'download-path'].format(
226             job=job, build=build[u'build'], filename=file_name
227         )
228     )
229     new_name = join(
230         spec.environment[u"paths"][u"DIR[WORKING,DATA]"],
231         f"{job}{SEPARATOR}{build[u'build']}{SEPARATOR}{file_name}"
232     )
233
234     logging.info(f"Trying to download {url}")
235
236     arch = bool(spec.configuration.get(u"archive-inputs", True))
237     success, downloaded_name = _download_file(url, new_name, log, arch=arch)
238
239     if not success:
240
241         # Try to download .gz from docs.fd.io
242
243         file_name = spec.input[u"file-name"]
244         url = u"{0}/{1}".format(
245             spec.environment[u"urls"][u"URL[NEXUS,DOC]"],
246             spec.input[u"download-path"].format(
247                 job=job, build=build[u"build"], filename=file_name
248             )
249         )
250         new_name = join(
251             spec.environment[u"paths"][u"DIR[WORKING,DATA]"],
252             f"{job}{SEPARATOR}{build[u'build']}{SEPARATOR}{file_name}"
253         )
254
255         logging.info(f"Downloading {url}")
256
257         success, downloaded_name = _download_file(url, new_name, log, arch=arch)
258
259     if not success:
260
261         # Try to download .zip from docs.fd.io
262
263         file_name = spec.input[u"zip-file-name"]
264         new_name = join(
265             spec.environment[u"paths"][u"DIR[WORKING,DATA]"],
266             f"{job}{SEPARATOR}{build[u'build']}{SEPARATOR}{file_name}"
267         )
268         release = re.search(REGEX_RELEASE, job).group(2)
269         for rls in (release, u"master"):
270             nexus_file_name = \
271                 f"{job}{SEPARATOR}{build[u'build']}{SEPARATOR}{file_name}"
272             try:
273                 rls = f"rls{int(rls)}"
274             except ValueError:
275                 # It is master
276                 pass
277             url = (
278                 f"{spec.environment[u'urls'][u'URL[NEXUS,DOC]']}/"
279                 f"{rls}/"
280                 f"{spec.environment[u'urls'][u'DIR[NEXUS,DOC]']}/"
281                 f"{nexus_file_name}"
282             )
283
284             logging.info(f"Downloading {url}")
285
286             success, downloaded_name = _download_file(url, new_name, log)
287             if success:
288                 break
289
290     if not success:
291
292         # Try to download .zip from jenkins.fd.io
293
294         file_name = spec.input[u"zip-file-name"]
295         download_path = spec.input[u"zip-download-path"]
296         if job.startswith(u"csit-"):
297             url = spec.environment[u"urls"][u"URL[JENKINS,CSIT]"]
298         elif job.startswith(u"hc2vpp-"):
299             url = spec.environment[u"urls"][u"URL[JENKINS,HC]"]
300         else:
301             raise PresentationError(f"No url defined for the job {job}.")
302
303         full_name = download_path.format(
304             job=job, build=build[u"build"], filename=file_name
305         )
306         url = u"{0}/{1}".format(url, full_name)
307         new_name = join(
308             spec.environment[u"paths"][u"DIR[WORKING,DATA]"],
309             f"{job}{SEPARATOR}{build[u'build']}{SEPARATOR}{file_name}"
310         )
311
312         logging.info(f"Downloading {url}")
313
314         success, downloaded_name = _download_file(url, new_name, log)
315
316     if success and downloaded_name.endswith(u".zip"):
317         if not is_zipfile(downloaded_name):
318             log.append((u"ERROR", f"Zip file {new_name} is corrupted."))
319             success = False
320
321     if success:
322         build[u"file-name"] = downloaded_name
323
324         if file_name.endswith(u".gz"):
325             build[u"file-name"] = downloaded_name[:-3]
326
327         if downloaded_name.endswith(u".zip"):
328             success = _unzip_file(spec, build, pid, log)
329
330     return success