PAL: Add replacing of data in tables
[csit.git] / resources / tools / presentation / input_data_files.py
1 # Copyright (c) 2018 Cisco and/or its affiliates.
2 # Licensed under the Apache License, Version 2.0 (the "License");
3 # you may not use this file except in compliance with the License.
4 # You may obtain a copy of the License at:
5 #
6 #     http://www.apache.org/licenses/LICENSE-2.0
7 #
8 # Unless required by applicable law or agreed to in writing, software
9 # distributed under the License is distributed on an "AS IS" BASIS,
10 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 # See the License for the specific language governing permissions and
12 # limitations under the License.
13
14 """Inputs
15 Download all data.
16 """
17
18 import re
19 import requests
20 import logging
21
22 from os import rename, mkdir
23 from os.path import join
24 from zipfile import ZipFile, is_zipfile, BadZipfile
25 from httplib import responses
26 from requests.adapters import HTTPAdapter
27 from requests.packages.urllib3.util.retry import Retry
28 from requests import codes, RequestException, Timeout, TooManyRedirects, \
29     HTTPError, ConnectionError
30
31 from errors import PresentationError
32
33
34 # Chunk size used for file download
35 CHUNK_SIZE = 512
36
37 # Separator used in file names
38 SEPARATOR = "__"
39
40 REGEX_RELEASE = re.compile(r'(\D*)(\d{4}|master)(\D*)')
41
42
43 def _download_file(url, file_name, log, arch=False):
44     """Download a file with input data.
45
46     :param url: URL to the file to download.
47     :param file_name: Name of file to download.
48     :param log: List of log messages.
49     :param arch: If True, also .gz file is downloaded
50     :type url: str
51     :type file_name: str
52     :type log: list of tuples (severity, msg)
53     :type arch: bool
54     :returns: True if the download was successful, otherwise False.
55     :rtype: bool
56     """
57
58     def requests_retry_session(retries=3,
59                                backoff_factor=0.3,
60                                status_forcelist=(500, 502, 504)):
61         """
62
63         :param retries: Total number of retries to allow.
64         :param backoff_factor: A backoff factor to apply between attempts after
65             the second try.
66         :param status_forcelist: A set of integer HTTP status codes that are
67             forced to retry.
68         :type retries: int
69         :type backoff_factor: float
70         :type status_forcelist: iterable
71         :returns: Session object.
72         :rtype: requests.Session
73         """
74
75         retry = Retry(
76             total=retries,
77             read=retries,
78             connect=retries,
79             backoff_factor=backoff_factor,
80             status_forcelist=status_forcelist,
81         )
82         adapter = HTTPAdapter(max_retries=retry)
83         session = requests.Session()
84         session.mount('http://', adapter)
85         session.mount('https://', adapter)
86         return session
87
88     success = False
89     session = None
90     try:
91         log.append(("INFO", "    Connecting to '{0}' ...".format(url)))
92         session = requests_retry_session()
93         response = session.get(url, stream=True)
94         code = response.status_code
95         log.append(("INFO", "    {0}: {1}".format(code, responses[code])))
96
97         if code != codes["OK"]:
98             if session:
99                 session.close()
100             url = url.replace("_info", "")
101             log.append(("INFO", "    Connecting to '{0}' ...".format(url)))
102             session = requests_retry_session()
103             response = session.get(url, stream=True)
104             code = response.status_code
105             log.append(("INFO", "    {0}: {1}".format(code, responses[code])))
106             if code != codes["OK"]:
107                 return False, file_name
108             file_name = file_name.replace("_info", "")
109
110         dst_file_name = file_name.replace(".gz", "")
111         log.append(("INFO", "    Downloading the file '{0}' to '{1}' ...".
112                     format(url, dst_file_name)))
113         with open(dst_file_name, "wb") as file_handle:
114             for chunk in response.iter_content(chunk_size=CHUNK_SIZE):
115                 if chunk:
116                     file_handle.write(chunk)
117
118         if arch and ".gz" in file_name:
119             if session:
120                 session.close()
121             log.append(("INFO", "    Downloading the file '{0}' to '{1}' ...".
122                         format(url, file_name)))
123             session = requests_retry_session()
124             response = session.get(url, stream=True)
125             if response.status_code == codes["OK"]:
126                 with open(file_name, "wb") as file_handle:
127                     file_handle.write(response.raw.read())
128             else:
129                 log.append(("ERROR", "Not possible to download the file '{0}' "
130                                      "to '{1}' ...".format(url, file_name)))
131
132         success = True
133     except ConnectionError as err:
134         log.append(("ERROR", "Not possible to connect to '{0}'.".format(url)))
135         log.append(("DEBUG", repr(err)))
136     except HTTPError as err:
137         log.append(("ERROR", "Invalid HTTP response from '{0}'.".format(url)))
138         log.append(("DEBUG", repr(err)))
139     except TooManyRedirects as err:
140         log.append(("ERROR", "Request exceeded the configured number "
141                              "of maximum re-directions."))
142         log.append(("DEBUG", repr(err)))
143     except Timeout as err:
144         log.append(("ERROR", "Request timed out."))
145         log.append(("DEBUG", repr(err)))
146     except RequestException as err:
147         log.append(("ERROR", "Unexpected HTTP request exception."))
148         log.append(("DEBUG", repr(err)))
149     except (IOError, ValueError, KeyError) as err:
150         log.append(("ERROR", "Download failed."))
151         log.append(("DEBUG", repr(err)))
152     finally:
153         if session:
154             session.close()
155
156     log.append(("INFO", "    Download finished."))
157     return success, file_name
158
159
160 def _unzip_file(spec, build, pid, log):
161     """Unzip downloaded source file.
162
163     :param spec: Specification read form the specification file.
164     :param build: Information about the build.
165     :param log: List of log messages.
166     :type spec: Specification
167     :type build: dict
168     :type log: list of tuples (severity, msg)
169     :returns: True if the download was successful, otherwise False.
170     :rtype: bool
171     """
172
173     file_name = build["file-name"]
174     if ".zip" in file_name:
175         data_file = spec.input["zip-extract"]
176     else:
177         data_file = spec.input["extract"]
178
179     directory = spec.environment["paths"]["DIR[WORKING,DATA]"]
180     tmp_dir = join(directory, str(pid))
181     try:
182         mkdir(tmp_dir)
183     except OSError:
184         pass
185     new_name = "{0}{1}{2}".format(file_name.rsplit('.')[-2],
186                                   SEPARATOR,
187                                   data_file.split("/")[-1])
188
189     log.append(("INFO", "    Unzipping: '{0}' from '{1}'.".
190                 format(data_file, file_name)))
191     try:
192         with ZipFile(file_name, 'r') as zip_file:
193             zip_file.extract(data_file, tmp_dir)
194         log.append(("INFO", "    Renaming the file '{0}' to '{1}'".
195                     format(join(tmp_dir, data_file), new_name)))
196         rename(join(tmp_dir, data_file), new_name)
197         build["file-name"] = new_name
198         return True
199     except (BadZipfile, RuntimeError) as err:
200         log.append(("ERROR", "Failed to unzip the file '{0}': {1}.".
201                     format(file_name, str(err))))
202         return False
203     except OSError as err:
204         log.append(("ERROR", "Failed to rename the file '{0}': {1}.".
205                     format(data_file, str(err))))
206         return False
207
208
209 def download_and_unzip_data_file(spec, job, build, pid, log):
210     """Download and unzip a source file.
211
212     :param spec: Specification read form the specification file.
213     :param job: Name of the Jenkins job.
214     :param build: Information about the build.
215     :param pid: PID of the process executing this method.
216     :param log: List of log messages.
217     :type spec: Specification
218     :type job: str
219     :type build: dict
220     :type pid: int
221     :type log: list of tuples (severity, msg)
222     :returns: True if the download was successful, otherwise False.
223     :rtype: bool
224     """
225
226     # Try to download .gz from logs.fd.io
227
228     file_name = spec.input["file-name"]
229     url = "{0}/{1}".format(
230         spec.environment["urls"]["URL[NEXUS,LOG]"],
231         spec.input["download-path"].format(
232             job=job, build=build["build"], filename=file_name))
233     new_name = join(spec.environment["paths"]["DIR[WORKING,DATA]"],
234                     "{job}{sep}{build}{sep}{name}".format(
235                         job=job, sep=SEPARATOR, build=build["build"],
236                         name=file_name))
237
238     logging.info("Trying to download {0}".format(url))
239
240     arch = True if spec.configuration.get("archive-inputs", True) else False
241     success, downloaded_name = _download_file(url, new_name, log, arch=arch)
242
243     if not success:
244
245         # Try to download .gz from docs.fd.io
246
247         file_name = spec.input["file-name"]
248         url = "{0}/{1}".format(
249             spec.environment["urls"]["URL[NEXUS,DOC]"],
250             spec.input["download-path"].format(
251                 job=job, build=build["build"], filename=file_name))
252         new_name = join(spec.environment["paths"]["DIR[WORKING,DATA]"],
253                         "{job}{sep}{build}{sep}{name}".format(
254                             job=job, sep=SEPARATOR, build=build["build"],
255                             name=file_name))
256
257         logging.info("Downloading {0}".format(url))
258
259         if spec.configuration.get("archive-inputs", True):
260             arch = True
261         success, downloaded_name = _download_file(url, new_name, log, arch=arch)
262
263     if not success:
264
265         # Try to download .zip from docs.fd.io
266
267         file_name = spec.input["zip-file-name"]
268         new_name = join(spec.environment["paths"]["DIR[WORKING,DATA]"],
269                         "{job}{sep}{build}{sep}{name}".format(
270                             job=job, sep=SEPARATOR, build=build["build"],
271                             name=file_name))
272         release = re.search(REGEX_RELEASE, job).group(2)
273         for rls in (release, "master"):
274             nexus_file_name = "{job}{sep}{build}{sep}{name}". \
275                 format(job=job, sep=SEPARATOR, build=build["build"],
276                        name=file_name)
277             try:
278                 rls = "rls{0}".format(int(rls))
279             except ValueError:
280                 # It is 'master'
281                 pass
282             url = "{url}/{release}/{dir}/{file}". \
283                 format(url=spec.environment["urls"]["URL[NEXUS,DOC]"],
284                        release=rls,
285                        dir=spec.environment["urls"]["DIR[NEXUS,DOC]"],
286                        file=nexus_file_name)
287
288             logging.info("Downloading {0}".format(url))
289
290             success, downloaded_name = _download_file(url, new_name, log)
291             if success:
292                 break
293
294     if not success:
295
296         # Try to download .zip from jenkins.fd.io
297
298         file_name = spec.input["zip-file-name"]
299         download_path = spec.input["zip-download-path"]
300         if job.startswith("csit-"):
301             url = spec.environment["urls"]["URL[JENKINS,CSIT]"]
302         elif job.startswith("hc2vpp-"):
303             url = spec.environment["urls"]["URL[JENKINS,HC]"]
304         else:
305             raise PresentationError(
306                 "No url defined for the job '{}'.".format(job))
307
308         full_name = download_path.format(
309             job=job, build=build["build"], filename=file_name)
310         url = "{0}/{1}".format(url, full_name)
311         new_name = join(spec.environment["paths"]["DIR[WORKING,DATA]"],
312                         "{job}{sep}{build}{sep}{name}".
313                         format(job=job, sep=SEPARATOR, build=build["build"],
314                                name=file_name))
315
316         logging.info("Downloading {0}".format(url))
317
318         success, downloaded_name = _download_file(url, new_name, log)
319
320     if success and downloaded_name.endswith(".zip"):
321         if not is_zipfile(downloaded_name):
322             log.append(("ERROR",
323                         "Zip file '{0}' is corrupted.".format(new_name)))
324             success = False
325
326     if success:
327         build["file-name"] = downloaded_name
328
329         if file_name.endswith(".gz"):
330             build["file-name"] = downloaded_name[:-3]
331
332         if downloaded_name.endswith(".zip"):
333             success = _unzip_file(spec, build, pid, log)
334
335     return success