Report: Configure rls2101.09
[csit.git] / resources / tools / presentation / input_data_files.py
1 # Copyright (c) 2020 Cisco and/or its affiliates.
2 # Licensed under the Apache License, Version 2.0 (the "License");
3 # you may not use this file except in compliance with the License.
4 # You may obtain a copy of the License at:
5 #
6 #     http://www.apache.org/licenses/LICENSE-2.0
7 #
8 # Unless required by applicable law or agreed to in writing, software
9 # distributed under the License is distributed on an "AS IS" BASIS,
10 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 # See the License for the specific language governing permissions and
12 # limitations under the License.
13
14 """Inputs
15 Download all data.
16 """
17
18 import re
19 import logging
20 import gzip
21
22 from os import rename, mkdir
23 from os.path import join
24 from http.client import responses
25 from zipfile import ZipFile, is_zipfile, BadZipfile
26
27 import requests
28
29 from requests.adapters import HTTPAdapter, Retry
30 from requests.exceptions import RequestException
31 from requests import codes
32
33 from pal_errors import PresentationError
34
35
36 # Chunk size used for file download
37 CHUNK_SIZE = 512
38
39 # Separator used in file names
40 SEPARATOR = u"__"
41
42 REGEX_RELEASE = re.compile(r'(\D*)(\d{4}|master)(\D*)')
43
44
45 def _download_file(url, file_name, arch=False):
46     """Download a file with input data.
47
48     :param url: URL to the file to download.
49     :param file_name: Name of file to download.
50     :param arch: If True, also .gz file is downloaded
51     :type url: str
52     :type file_name: str
53     :type arch: bool
54     :returns: True if the download was successful, otherwise False.
55     :rtype: bool
56     """
57
58     def requests_retry_session(retries=3,
59                                backoff_factor=0.3,
60                                status_forcelist=(500, 502, 504)):
61         """
62
63         :param retries: Total number of retries to allow.
64         :param backoff_factor: A backoff factor to apply between attempts after
65             the second try.
66         :param status_forcelist: A set of integer HTTP status codes that are
67             forced to retry.
68         :type retries: int
69         :type backoff_factor: float
70         :type status_forcelist: iterable
71         :returns: Session object.
72         :rtype: requests.Session
73         """
74
75         retry = Retry(
76             total=retries,
77             read=retries,
78             connect=retries,
79             backoff_factor=backoff_factor,
80             status_forcelist=status_forcelist,
81         )
82         adapter = HTTPAdapter(max_retries=retry)
83         session = requests.Session()
84         session.mount(u"http://", adapter)
85         session.mount(u"https://", adapter)
86         return session
87
88     success = False
89     session = None
90     try:
91         logging.info(f"    Connecting to {url} ...")
92         session = requests_retry_session()
93         response = session.get(url, stream=True)
94         code = response.status_code
95         logging.info(f"    {code}: {responses[code]}")
96
97         if code != codes[u"OK"]:
98             if session:
99                 session.close()
100             url = url.replace(u"_info", u"")
101             logging.info(f"    Connecting to {url} ...")
102             session = requests_retry_session()
103             response = session.get(url, stream=True)
104             code = response.status_code
105             logging.info(f"    {code}: {responses[code]}")
106             if code != codes[u"OK"]:
107                 return False, file_name
108             file_name = file_name.replace(u"_info", u"")
109
110         dst_file_name = file_name.replace(u".gz", u"")
111         logging.info(f"    Downloading the file {url} to {dst_file_name} ...")
112         with open(dst_file_name, u"wb") as file_handle:
113             for chunk in response.iter_content(chunk_size=CHUNK_SIZE):
114                 if chunk:
115                     file_handle.write(chunk)
116
117         if arch and u".gz" in file_name:
118             if session:
119                 session.close()
120             logging.info(f"    Downloading the file {url} to {file_name} ...")
121             session = requests_retry_session()
122             response = session.get(url, stream=True)
123             if response.status_code == codes[u"OK"]:
124                 with open(file_name, u"wb") as file_handle:
125                     file_handle.write(response.raw.read())
126             else:
127                 logging.error(
128                     f"Not possible to download the file {url} to {file_name}"
129                 )
130
131         success = True
132     except RequestException as err:
133         logging.error(f"HTTP Request exception:\n{repr(err)}")
134     except (IOError, ValueError, KeyError) as err:
135         logging.error(f"Download failed.\n{repr(err)}")
136     finally:
137         if session:
138             session.close()
139
140     logging.info(u"    Download finished.")
141     return success, file_name
142
143
144 def _unzip_file(spec, build, pid):
145     """Unzip downloaded source file.
146
147     :param spec: Specification read form the specification file.
148     :param build: Information about the build.
149     :type spec: Specification
150     :type build: dict
151     :returns: True if the download was successful, otherwise False.
152     :rtype: bool
153     """
154
155     file_name = build[u"file-name"]
156     if u".zip" in file_name:
157         data_file = spec.input[u"zip-extract"]
158     else:
159         data_file = spec.input[u"extract"]
160
161     directory = spec.environment[u"paths"][u"DIR[WORKING,DATA]"]
162     tmp_dir = join(directory, str(pid))
163     try:
164         mkdir(tmp_dir)
165     except OSError:
166         pass
167     new_name = \
168         f"{file_name.rsplit(u'.')[-2]}{SEPARATOR}{data_file.split(u'/')[-1]}"
169
170     logging.info(f"    Unzipping: {data_file} from {file_name}.")
171     try:
172         with ZipFile(file_name, u'r') as zip_file:
173             zip_file.extract(data_file, tmp_dir)
174         logging.info(
175             f"    Renaming the file {join(tmp_dir, data_file)} to {new_name}"
176         )
177         rename(join(tmp_dir, data_file), new_name)
178         build[u"file-name"] = new_name
179         return True
180     except (BadZipfile, RuntimeError) as err:
181         logging.error(f"Failed to unzip the file {file_name}: {repr(err)}.")
182         return False
183     except OSError as err:
184         logging.error(f"Failed to rename the file {data_file}: {repr(err)}.")
185         return False
186
187
188 def download_and_unzip_data_file(spec, job, build, pid):
189     """Download and unzip a source file.
190
191     :param spec: Specification read form the specification file.
192     :param job: Name of the Jenkins job.
193     :param build: Information about the build.
194     :param pid: PID of the process executing this method.
195     :type spec: Specification
196     :type job: str
197     :type build: dict
198     :type pid: int
199     :returns: True if the download was successful, otherwise False.
200     :rtype: bool
201     """
202
203     # Try to download .gz from logs.fd.io
204
205     file_name = spec.input[u"file-name"]
206     url = u"{0}/{1}".format(
207         spec.environment[u'urls'][u'URL[NEXUS,LOG]'],
208         spec.input[u'download-path'].format(
209             job=job, build=build[u'build'], filename=file_name
210         )
211     )
212     new_name = join(
213         spec.environment[u"paths"][u"DIR[WORKING,DATA]"],
214         f"{job}{SEPARATOR}{build[u'build']}{SEPARATOR}{file_name}"
215     )
216
217     logging.info(f"Trying to download {url}")
218
219     arch = bool(spec.configuration.get(u"archive-inputs", True))
220     success, downloaded_name = _download_file(url, new_name, arch=arch)
221
222     if not success:
223
224         # Try to download .gz or .zip from docs.fd.io
225         file_name = (spec.input[u"file-name"], spec.input[u"zip-file-name"])
226         release = re.search(REGEX_RELEASE, job).group(2)
227         for idx, rls in enumerate((release, u"master", )):
228             try:
229                 rls = f"rls{int(rls)}"
230             except ValueError:
231                 # It is master
232                 pass
233             url = (
234                 f"{spec.environment[u'urls'][u'URL[NEXUS,DOC]']}/"
235                 f"{rls}/"
236                 f"{spec.environment[u'urls'][u'DIR[NEXUS,DOC]']}/"
237                 f"{job}{SEPARATOR}{build[u'build']}{SEPARATOR}{file_name[idx]}"
238             )
239
240             logging.info(f"Downloading {url}")
241
242             new_name = join(
243                 spec.environment[u"paths"][u"DIR[WORKING,DATA]"],
244                 f"{job}{SEPARATOR}{build[u'build']}{SEPARATOR}{file_name[idx]}"
245             )
246             success, downloaded_name = _download_file(url, new_name, arch=arch)
247             if success:
248                 file_name = file_name[idx]
249                 if file_name.endswith(u".gz"):
250                     with gzip.open(downloaded_name[:-3], u"rb") as gzip_file:
251                         file_content = gzip_file.read()
252                     with open(downloaded_name[:-3], u"wb") as xml_file:
253                         xml_file.write(file_content)
254                 break
255
256     if not success:
257
258         # Try to download .zip from jenkins.fd.io
259         file_name = spec.input[u"zip-file-name"]
260         download_path = spec.input[u"zip-download-path"]
261         if job.startswith(u"csit-"):
262             url = spec.environment[u"urls"][u"URL[JENKINS,CSIT]"]
263         elif job.startswith(u"hc2vpp-"):
264             url = spec.environment[u"urls"][u"URL[JENKINS,HC]"]
265         else:
266             raise PresentationError(f"No url defined for the job {job}.")
267
268         full_name = download_path.format(
269             job=job, build=build[u"build"], filename=file_name
270         )
271         url = u"{0}/{1}".format(url, full_name)
272         new_name = join(
273             spec.environment[u"paths"][u"DIR[WORKING,DATA]"],
274             f"{job}{SEPARATOR}{build[u'build']}{SEPARATOR}{file_name}"
275         )
276
277         logging.info(f"Downloading {url}")
278
279         success, downloaded_name = _download_file(url, new_name)
280
281     if success and downloaded_name.endswith(u".zip"):
282         if not is_zipfile(downloaded_name):
283             logging.error(f"Zip file {new_name} is corrupted.")
284             success = False
285
286     if success:
287         build[u"file-name"] = downloaded_name
288
289         if file_name.endswith(u".gz"):
290             build[u"file-name"] = downloaded_name[:-3]
291
292         if downloaded_name.endswith(u".zip"):
293             success = _unzip_file(spec, build, pid)
294
295     return success