Trending: Partially remove 3n-hsw
[csit.git] / resources / tools / presentation / input_data_files.py
1 # Copyright (c) 2021 Cisco and/or its affiliates.
2 # Licensed under the Apache License, Version 2.0 (the "License");
3 # you may not use this file except in compliance with the License.
4 # You may obtain a copy of the License at:
5 #
6 #     http://www.apache.org/licenses/LICENSE-2.0
7 #
8 # Unless required by applicable law or agreed to in writing, software
9 # distributed under the License is distributed on an "AS IS" BASIS,
10 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 # See the License for the specific language governing permissions and
12 # limitations under the License.
13
14 """Inputs
15 Download all data.
16 """
17
18 import re
19 import logging
20 import gzip
21
22 from os import rename, mkdir
23 from os.path import join
24 from http.client import responses, HTTPException
25 from zipfile import ZipFile, is_zipfile, BadZipfile
26
27 import requests
28
29 from requests.adapters import HTTPAdapter, Retry
30 from requests.exceptions import RequestException
31 from requests import codes
32
33 from urllib3.exceptions import HTTPError
34
35
36 # Chunk size used for file download
37 CHUNK_SIZE = 512
38
39 # Separator used in file names
40 SEPARATOR = u"__"
41
42 REGEX_RELEASE = re.compile(r'(\D*)(\d{4}|master)(\D*)')
43
44
45 def _download_file(url, file_name, arch=False, verify=True, repeat=1):
46     """Download a file with input data.
47
48     :param url: URL to the file to download.
49     :param file_name: Name of file to download.
50     :param arch: If True, also .gz file is downloaded.
51     :param verify: If true, verify the certificate.
52     :param repeat: The number of attempts to download the file.
53     :type url: str
54     :type file_name: str
55     :type arch: bool
56     :type verify: bool
57     :type repeat: int
58     :returns: True if the download was successful, otherwise False.
59     :rtype: bool
60     """
61
62     def requests_retry_session(retries=3,
63                                backoff_factor=0.3,
64                                status_forcelist=(500, 502, 504)):
65         """
66
67         :param retries: Total number of retries to allow.
68         :param backoff_factor: A backoff factor to apply between attempts after
69             the second try.
70         :param status_forcelist: A set of integer HTTP status codes that are
71             forced to retry.
72         :type retries: int
73         :type backoff_factor: float
74         :type status_forcelist: iterable
75         :returns: Session object.
76         :rtype: requests.Session
77         """
78
79         retry = Retry(
80             total=retries,
81             read=retries,
82             connect=retries,
83             backoff_factor=backoff_factor,
84             status_forcelist=status_forcelist,
85         )
86         adapter = HTTPAdapter(max_retries=retry)
87         session = requests.Session()
88         session.mount(u"http://", adapter)
89         session.mount(u"https://", adapter)
90         return session
91
92     success = False
93     while repeat:
94         repeat -= 1
95         session = None
96         try:
97             logging.info(f"  Connecting to {url} ...")
98             session = requests_retry_session()
99             response = session.get(url, stream=True, verify=verify)
100             code = response.status_code
101             logging.info(f"  {code}: {responses[code]}")
102
103             if code != codes[u"OK"]:
104                 if session:
105                     session.close()
106                 return False, file_name
107
108             dst_file_name = file_name.replace(u".gz", u"")
109             logging.info(f"  Downloading the file {url} to {dst_file_name}")
110             with open(dst_file_name, u"wb") as file_handle:
111                 for chunk in response.iter_content(chunk_size=CHUNK_SIZE):
112                     if chunk:
113                         file_handle.write(chunk)
114
115             if arch and u".gz" in file_name:
116                 if session:
117                     session.close()
118                 logging.info(f"  Downloading the file {url} to {file_name}")
119                 session = requests_retry_session()
120                 response = session.get(url, stream=True, verify=verify)
121                 if response.status_code == codes[u"OK"]:
122                     with open(file_name, u"wb") as file_handle:
123                         file_handle.write(response.raw.read())
124                 else:
125                     logging.error(
126                         f"Not possible to download the file "
127                         f"{url} to {file_name}"
128                     )
129
130             success = True
131             repeat = 0
132         except (HTTPException, HTTPError) as err:
133             logging.error(f"Connection broken:\n{repr(err)}")
134         except RequestException as err:
135             logging.error(f"HTTP Request exception:\n{repr(err)}")
136         except (IOError, ValueError, KeyError) as err:
137             logging.error(f"Download failed.\n{repr(err)}")
138         finally:
139             if session:
140                 session.close()
141     return success, file_name
142
143
144 def _unzip_file(spec, build, pid):
145     """Unzip downloaded source file.
146
147     :param spec: Specification read form the specification file.
148     :param build: Information about the build.
149     :type spec: Specification
150     :type build: dict
151     :returns: True if the download was successful, otherwise False.
152     :rtype: bool
153     """
154
155     file_name = build[u"file-name"]
156     data_file = "robot-plugin/output.xml"
157     directory = spec.environment[u"paths"][u"DIR[WORKING,DATA]"]
158     tmp_dir = join(directory, str(pid))
159     try:
160         mkdir(tmp_dir)
161     except OSError:
162         pass
163     new_name = \
164         f"{file_name.rsplit(u'.')[-2]}{SEPARATOR}{data_file.split(u'/')[-1]}"
165
166     logging.info(f"    Unzipping: {data_file} from {file_name}.")
167     try:
168         with ZipFile(file_name, u'r') as zip_file:
169             zip_file.extract(data_file, tmp_dir)
170         logging.info(
171             f"    Renaming the file {join(tmp_dir, data_file)} to {new_name}"
172         )
173         rename(join(tmp_dir, data_file), new_name)
174         build[u"file-name"] = new_name
175         return True
176     except (BadZipfile, RuntimeError) as err:
177         logging.error(f"Failed to unzip the file {file_name}: {repr(err)}.")
178         return False
179     except OSError as err:
180         logging.error(f"Failed to rename the file {data_file}: {repr(err)}.")
181         return False
182
183
184 def _download_json(source, job, build, w_dir, arch):
185     """
186
187     :param source:
188     :param job:
189     :param build:
190     :param w_dir: Path to working directory
191     :param arch:
192     :return:
193     """
194     success = False
195     downloaded_name = u""
196
197     return success, downloaded_name
198
199
200 def _download_xml(source, job, build, w_dir, arch):
201     """
202
203     :param source:
204     :param job:
205     :param build:
206     :param w_dir: Path to working directory
207     :param arch:
208     :return:
209     """
210
211     file_name = source.get(u"file-name", u"")
212     new_name = join(
213         w_dir,
214         f"{job}{SEPARATOR}{build[u'build']}{SEPARATOR}{file_name}"
215     )
216     url = u"{0}/{1}".format(
217         source.get(u"url", u""),
218         source.get(u"path", u"").format(
219             job=job, build=build[u'build'], filename=file_name
220         )
221     )
222     verify = False if u"nginx" in url else True
223     logging.info(f"  Trying to download {url}")
224     success, downloaded_name = _download_file(
225         url, new_name, arch=arch, verify=verify, repeat=3
226     )
227     return success, downloaded_name
228
229
230 def _download_xml_docs(source, job, build, w_dir, arch):
231     """
232
233     :param source:
234     :param job:
235     :param build:
236     :param w_dir: Path to working directory
237     :param arch:
238     :return:
239     """
240
241     file_name = source.get(u"file-name", u"")
242     release = re.search(REGEX_RELEASE, job).group(2)
243     for rls in (release, u"master"):
244         try:
245             rls = f"rls{int(rls)}"
246         except ValueError:
247             pass  # It is master
248         url = (
249             f"{source.get(u'url', u'')}/"
250             f"{rls}/"
251             f"{source.get(u'path', u'')}/"
252             f"{job}{SEPARATOR}{build[u'build']}{SEPARATOR}{file_name}"
253         )
254         new_name = join(
255             w_dir,
256             f"{job}{SEPARATOR}{build[u'build']}{SEPARATOR}{file_name}"
257         )
258
259         logging.info(f"  Trying to download {url}")
260
261         success, downloaded_name = _download_file(url, new_name, arch=arch)
262         if success:
263             if file_name.endswith(u".gz"):
264                 with gzip.open(downloaded_name[:-3], u"rb") as gzip_file:
265                     file_content = gzip_file.read()
266                 with open(downloaded_name[:-3], u"wb") as xml_file:
267                     xml_file.write(file_content)
268             break
269
270     return success, downloaded_name
271
272
273 def download_and_unzip_data_file(spec, job, build, pid):
274     """Download and unzip a source file.
275
276     :param spec: Specification read form the specification file.
277     :param job: Name of the Jenkins job.
278     :param build: Information about the build.
279     :param pid: PID of the process executing this method.
280     :type spec: Specification
281     :type job: str
282     :type build: dict
283     :type pid: int
284     :returns: True if the download was successful, otherwise False.
285     :rtype: bool
286     """
287
288     download = {
289         "json": _download_json,
290         "xml": _download_xml,
291         "xml-docs": _download_xml_docs
292     }
293
294     success = False
295     downloaded_name = u""
296     arch = bool(spec.environment.get(u"archive-inputs", True))
297
298     for source in spec.environment.get(u"data-sources", tuple()):
299         if not source.get(u"enabled", False):
300             continue
301         download_type = source.get(u"type", None)
302         if not download_type:
303             continue
304         success, downloaded_name = download[download_type](
305                 source,
306                 job,
307                 build,
308                 spec.environment[u"paths"][u"DIR[WORKING,DATA]"],
309                 arch
310             )
311         if success:
312             source[u"successful-downloads"] += 1
313             build[u"source"] = source[u"type"]
314             break
315
316     # TODO: Remove when only .gz is used.
317     if success and downloaded_name.endswith(u".zip"):
318         if not is_zipfile(downloaded_name):
319             logging.error(f"Zip file {downloaded_name} is corrupted.")
320             success = False
321
322     if success:
323         if downloaded_name.endswith(u".gz"):
324             build[u"file-name"] = downloaded_name[:-3]
325         # TODO: Remove when only .gz is used.
326         elif downloaded_name.endswith(u".zip"):
327             build[u"file-name"] = downloaded_name
328             success = _unzip_file(spec, build, pid)
329
330     return success