PAL: Convert XML to JSON
[csit.git] / resources / tools / presentation / input_data_files.py
1 # Copyright (c) 2021 Cisco and/or its affiliates.
2 # Licensed under the Apache License, Version 2.0 (the "License");
3 # you may not use this file except in compliance with the License.
4 # You may obtain a copy of the License at:
5 #
6 #     http://www.apache.org/licenses/LICENSE-2.0
7 #
8 # Unless required by applicable law or agreed to in writing, software
9 # distributed under the License is distributed on an "AS IS" BASIS,
10 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 # See the License for the specific language governing permissions and
12 # limitations under the License.
13
14 """Inputs
15 Download all data.
16 """
17
18 import re
19 import logging
20 import gzip
21
22 from os import rename, mkdir
23 from os.path import join
24 from http.client import responses, HTTPException
25 from zipfile import ZipFile, is_zipfile, BadZipfile
26
27 import requests
28
29 from requests.adapters import HTTPAdapter, Retry
30 from requests.exceptions import RequestException
31 from requests import codes
32
33 from urllib3.exceptions import HTTPError
34
35
36 # Chunk size used for file download
37 CHUNK_SIZE = 512
38
39 # Separator used in file names
40 SEPARATOR = u"__"
41
42 REGEX_RELEASE = re.compile(r'(\D*)(\d{4}|master)(\D*)')
43
44
45 def _download_file(url, file_name, arch=False, verify=True, repeat=1):
46     """Download a file with input data.
47
48     :param url: URL to the file to download.
49     :param file_name: Name of file to download.
50     :param arch: If True, also .gz file is downloaded.
51     :param verify: If true, verify the certificate.
52     :param repeat: The number of attempts to download the file.
53     :type url: str
54     :type file_name: str
55     :type arch: bool
56     :type verify: bool
57     :type repeat: int
58     :returns: True if the download was successful, otherwise False.
59     :rtype: bool
60     """
61
62     def requests_retry_session(retries=3,
63                                backoff_factor=0.3,
64                                status_forcelist=(500, 502, 504)):
65         """
66
67         :param retries: Total number of retries to allow.
68         :param backoff_factor: A backoff factor to apply between attempts after
69             the second try.
70         :param status_forcelist: A set of integer HTTP status codes that are
71             forced to retry.
72         :type retries: int
73         :type backoff_factor: float
74         :type status_forcelist: iterable
75         :returns: Session object.
76         :rtype: requests.Session
77         """
78
79         retry = Retry(
80             total=retries,
81             read=retries,
82             connect=retries,
83             backoff_factor=backoff_factor,
84             status_forcelist=status_forcelist,
85         )
86         adapter = HTTPAdapter(max_retries=retry)
87         session = requests.Session()
88         session.mount(u"http://", adapter)
89         session.mount(u"https://", adapter)
90         return session
91
92     success = False
93     while repeat:
94         repeat -= 1
95         session = None
96         try:
97             logging.info(f"  Connecting to {url} ...")
98             session = requests_retry_session()
99             response = session.get(url, stream=True, verify=verify)
100             code = response.status_code
101             logging.info(f"  {code}: {responses[code]}")
102
103             if code != codes[u"OK"]:
104                 if session:
105                     session.close()
106                 return False, file_name
107
108             dst_file_name = file_name.replace(u".gz", u"")
109             logging.info(f"  Downloading the file {url} to {dst_file_name}")
110             with open(dst_file_name, u"wb") as file_handle:
111                 for chunk in response.iter_content(chunk_size=CHUNK_SIZE):
112                     if chunk:
113                         file_handle.write(chunk)
114
115             if arch and u".gz" in file_name:
116                 if session:
117                     session.close()
118                 logging.info(f"  Downloading the file {url} to {file_name}")
119                 session = requests_retry_session()
120                 response = session.get(url, stream=True, verify=verify)
121                 if response.status_code == codes[u"OK"]:
122                     with open(file_name, u"wb") as file_handle:
123                         file_handle.write(response.raw.read())
124                 else:
125                     logging.error(
126                         f"Not possible to download the file "
127                         f"{url} to {file_name}"
128                     )
129
130             success = True
131             repeat = 0
132         except (HTTPException, HTTPError) as err:
133             logging.error(f"Connection broken:\n{repr(err)}")
134         except RequestException as err:
135             logging.error(f"HTTP Request exception:\n{repr(err)}")
136         except (IOError, ValueError, KeyError) as err:
137             logging.error(f"Download failed.\n{repr(err)}")
138         finally:
139             if session:
140                 session.close()
141     return success, file_name
142
143
144 def _unzip_file(spec, build, pid):
145     """Unzip downloaded source file.
146
147     :param spec: Specification read form the specification file.
148     :param build: Information about the build.
149     :type spec: Specification
150     :type build: dict
151     :returns: True if the download was successful, otherwise False.
152     :rtype: bool
153     """
154
155     file_name = build[u"file-name"]
156     data_file = "robot-plugin/output.xml"
157     directory = spec.environment[u"paths"][u"DIR[WORKING,DATA]"]
158     tmp_dir = join(directory, str(pid))
159     try:
160         mkdir(tmp_dir)
161     except OSError:
162         pass
163     new_name = \
164         f"{file_name.rsplit(u'.')[-2]}{SEPARATOR}{data_file.split(u'/')[-1]}"
165
166     logging.info(f"    Unzipping: {data_file} from {file_name}.")
167     try:
168         with ZipFile(file_name, u'r') as zip_file:
169             zip_file.extract(data_file, tmp_dir)
170         logging.info(
171             f"    Renaming the file {join(tmp_dir, data_file)} to {new_name}"
172         )
173         rename(join(tmp_dir, data_file), new_name)
174         build[u"file-name"] = new_name
175         return True
176     except (BadZipfile, RuntimeError) as err:
177         logging.error(f"Failed to unzip the file {file_name}: {repr(err)}.")
178         return False
179     except OSError as err:
180         logging.error(f"Failed to rename the file {data_file}: {repr(err)}.")
181         return False
182
183
184 def _download_xml(source, job, build, w_dir, arch):
185     """
186
187     :param source:
188     :param job:
189     :param build:
190     :param w_dir: Path to working directory
191     :param arch:
192     :return:
193     """
194
195     file_name = source.get(u"file-name", u"")
196     new_name = join(
197         w_dir,
198         f"{job}{SEPARATOR}{build[u'build']}{SEPARATOR}{file_name}"
199     )
200     url = u"{0}/{1}".format(
201         source.get(u"url", u""),
202         source.get(u"path", u"").format(
203             job=job, build=build[u'build'], filename=file_name
204         )
205     )
206     logging.info(f"  Trying to download {url}")
207     success, downloaded_name = _download_file(
208         url, new_name, arch=arch, verify=(u"nginx" not in url), repeat=3
209     )
210     return success, downloaded_name
211
212
213 def _download_xml_docs(source, job, build, w_dir, arch):
214     """
215
216     :param source:
217     :param job:
218     :param build:
219     :param w_dir: Path to working directory
220     :param arch:
221     :return:
222     """
223
224     file_name = source.get(u"file-name", u"")
225     release = re.search(REGEX_RELEASE, job).group(2)
226     for rls in (release, u"master"):
227         try:
228             rls = f"rls{int(rls)}"
229         except ValueError:
230             pass  # It is master
231         url = (
232             f"{source.get(u'url', u'')}/"
233             f"{rls}/"
234             f"{source.get(u'path', u'')}/"
235             f"{job}{SEPARATOR}{build[u'build']}{SEPARATOR}{file_name}"
236         )
237         new_name = join(
238             w_dir,
239             f"{job}{SEPARATOR}{build[u'build']}{SEPARATOR}{file_name}"
240         )
241
242         logging.info(f"  Trying to download {url}")
243
244         success, downloaded_name = _download_file(url, new_name, arch=arch)
245         if success:
246             if file_name.endswith(u".gz"):
247                 with gzip.open(downloaded_name[:-3], u"rb") as gzip_file:
248                     file_content = gzip_file.read()
249                 with open(downloaded_name[:-3], u"wb") as xml_file:
250                     xml_file.write(file_content)
251             break
252
253     return success, downloaded_name
254
255
256 def download_and_unzip_data_file(spec, job, build, pid):
257     """Download and unzip a source file.
258
259     :param spec: Specification read form the specification file.
260     :param job: Name of the Jenkins job.
261     :param build: Information about the build.
262     :param pid: PID of the process executing this method.
263     :type spec: Specification
264     :type job: str
265     :type build: dict
266     :type pid: int
267     :returns: True if the download was successful, otherwise False.
268     :rtype: bool
269     """
270
271     download = {
272         "xml": _download_xml,
273         "xml-docs": _download_xml_docs
274     }
275
276     success = False
277     downloaded_name = u""
278     arch = bool(spec.environment.get(u"archive-inputs", True))
279
280     for source in spec.environment.get(u"data-sources", tuple()):
281         if not source.get(u"enabled", False):
282             continue
283         download_type = source.get(u"type", None)
284         if not download_type:
285             continue
286         success, downloaded_name = download[download_type](
287             source,
288             job,
289             build,
290             spec.environment[u"paths"][u"DIR[WORKING,DATA]"],
291             arch
292         )
293         if success:
294             source[u"successful-downloads"] += 1
295             build[u"source"] = source[u"type"]
296             break
297
298     # TODO: Remove when only .gz is used.
299     if success and downloaded_name.endswith(u".zip"):
300         if not is_zipfile(downloaded_name):
301             logging.error(f"Zip file {downloaded_name} is corrupted.")
302             success = False
303
304     if success:
305         if downloaded_name.endswith(u".gz"):
306             build[u"file-name"] = downloaded_name[:-3]
307         # TODO: Remove when only .gz is used.
308         elif downloaded_name.endswith(u".zip"):
309             build[u"file-name"] = downloaded_name
310             success = _unzip_file(spec, build, pid)
311
312     return success