Trending: Implement retries to requests
[csit.git] / resources / tools / presentation / input_data_files.py
1 # Copyright (c) 2018 Cisco and/or its affiliates.
2 # Licensed under the Apache License, Version 2.0 (the "License");
3 # you may not use this file except in compliance with the License.
4 # You may obtain a copy of the License at:
5 #
6 #     http://www.apache.org/licenses/LICENSE-2.0
7 #
8 # Unless required by applicable law or agreed to in writing, software
9 # distributed under the License is distributed on an "AS IS" BASIS,
10 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 # See the License for the specific language governing permissions and
12 # limitations under the License.
13
14 """Inputs
15 Download all data.
16 """
17
18 import re
19 import requests
20
21 from os import rename, mkdir
22 from os.path import join
23 from zipfile import ZipFile, is_zipfile, BadZipfile
24 from httplib import responses
25 from requests.adapters import HTTPAdapter
26 from requests.packages.urllib3.util.retry import Retry
27 from requests import codes, RequestException, Timeout, TooManyRedirects, \
28     HTTPError, ConnectionError
29
30 from errors import PresentationError
31 from utils import execute_command
32
33 # Chunk size used for file download
34 CHUNK_SIZE = 512
35
36 # Separator used in file names
37 SEPARATOR = "__"
38
39 REGEX_RELEASE = re.compile(r'(\D*)(\d{4}|master)(\D*)')
40
41
42 def _download_file(url, file_name, log):
43     """Download a file with input data.
44
45     :param url: URL to the file to download.
46     :param file_name: Name of file to download.
47     :param log: List of log messages.
48     :type url: str
49     :type file_name: str
50     :type log: list of tuples (severity, msg)
51     :returns: True if the download was successful, otherwise False.
52     :rtype: bool
53     """
54
55     def requests_retry_session(retries=3,
56                                backoff_factor=0.3,
57                                status_forcelist=(500, 502, 504)):
58         """
59
60         :param retries:
61         :param backoff_factor:
62         :param status_forcelist:
63         :returns:
64         """
65
66         retry = Retry(
67             total=retries,
68             read=retries,
69             connect=retries,
70             backoff_factor=backoff_factor,
71             status_forcelist=status_forcelist,
72         )
73         adapter = HTTPAdapter(max_retries=retry)
74         session = requests.Session()
75         session.mount('http://', adapter)
76         session.mount('https://', adapter)
77         return session
78
79     success = False
80     try:
81         log.append(("INFO", "    Connecting to '{0}' ...".format(url)))
82         response = requests_retry_session().get(url, stream=True)
83         code = response.status_code
84         log.append(("INFO", "    {0}: {1}".format(code, responses[code])))
85
86         if code != codes["OK"]:
87             url = url.replace("_info", "")
88             log.append(("INFO", "    Connecting to '{0}' ...".format(url)))
89             response = requests_retry_session().get(url, stream=True)
90             code = response.status_code
91             log.append(("INFO", "    {0}: {1}".format(code, responses[code])))
92             if code != codes["OK"]:
93                 return False, file_name
94             file_name = file_name.replace("_info", "")
95
96         log.append(("INFO", "    Downloading the file '{0}' to '{1}' ...".
97                     format(url, file_name)))
98
99         with open(file_name, "wb") as file_handle:
100             for chunk in response.iter_content(chunk_size=CHUNK_SIZE):
101                 if chunk:
102                     file_handle.write(chunk)
103         success = True
104     except ConnectionError as err:
105         log.append(("ERROR", "Not possible to connect to '{0}'.".format(url)))
106         log.append(("DEBUG", repr(err)))
107     except HTTPError as err:
108         log.append(("ERROR", "Invalid HTTP response from '{0}'.".format(url)))
109         log.append(("DEBUG", repr(err)))
110     except TooManyRedirects as err:
111         log.append(("ERROR", "Request exceeded the configured number "
112                              "of maximum re-directions."))
113         log.append(("DEBUG", repr(err)))
114     except Timeout as err:
115         log.append(("ERROR", "Request timed out."))
116         log.append(("DEBUG", repr(err)))
117     except RequestException as err:
118         log.append(("ERROR", "Unexpected HTTP request exception."))
119         log.append(("DEBUG", repr(err)))
120     except (IOError, ValueError, KeyError) as err:
121         log.append(("ERROR", "Download failed."))
122         log.append(("DEBUG", repr(err)))
123
124     log.append(("INFO", "    Download finished."))
125     return success, file_name
126
127
128 def _unzip_file(spec, build, pid, log):
129     """Unzip downloaded source file.
130
131     :param spec: Specification read form the specification file.
132     :param build: Information about the build.
133     :param log: List of log messages.
134     :type spec: Specification
135     :type build: dict
136     :type log: list of tuples (severity, msg)
137     :returns: True if the download was successful, otherwise False.
138     :rtype: bool
139     """
140
141     data_file = spec.input["extract"]
142     file_name = build["file-name"]
143     directory = spec.environment["paths"]["DIR[WORKING,DATA]"]
144     tmp_dir = join(directory, str(pid))
145     try:
146         mkdir(tmp_dir)
147     except OSError:
148         pass
149     new_name = "{0}{1}{2}".format(file_name.rsplit('.')[-2],
150                                   SEPARATOR,
151                                   data_file.split("/")[-1])
152
153     log.append(("INFO", "    Unzipping: '{0}' from '{1}'.".
154                 format(data_file, file_name)))
155     try:
156         with ZipFile(file_name, 'r') as zip_file:
157             zip_file.extract(data_file, tmp_dir)
158         log.append(("INFO", "    Renaming the file '{0}' to '{1}'".
159                     format(join(tmp_dir, data_file), new_name)))
160         rename(join(tmp_dir, data_file), new_name)
161         build["file-name"] = new_name
162         return True
163     except (BadZipfile, RuntimeError) as err:
164         log.append(("ERROR", "Failed to unzip the file '{0}': {1}.".
165                     format(file_name, str(err))))
166         return False
167     except OSError as err:
168         log.append(("ERROR", "Failed to rename the file '{0}': {1}.".
169                     format(data_file, str(err))))
170         return False
171
172
173 def download_and_unzip_data_file(spec, job, build, pid, log):
174     """Download and unzip a source file.
175
176     :param spec: Specification read form the specification file.
177     :param job: Name of the Jenkins job.
178     :param build: Information about the build.
179     :param pid: PID of the process executing this method.
180     :param log: List of log messages.
181     :type spec: Specification
182     :type job: str
183     :type build: dict
184     :type pid: int
185     :type log: list of tuples (severity, msg)
186     :returns: True if the download was successful, otherwise False.
187     :rtype: bool
188     """
189
190     if job.startswith("csit-"):
191         if spec.input["file-name"].endswith(".zip"):
192             url = spec.environment["urls"]["URL[JENKINS,CSIT]"]
193         elif spec.input["file-name"].endswith(".gz"):
194             url = spec.environment["urls"]["URL[NEXUS,LOG]"]
195         else:
196             log.append(("ERROR", "Not supported file format."))
197             return False
198     elif job.startswith("hc2vpp-"):
199         url = spec.environment["urls"]["URL[JENKINS,HC]"]
200     elif job.startswith("intel-dnv-"):
201         url = spec.environment["urls"]["URL[VIRL,DNV]"].format(release=job[-4:])
202     else:
203         raise PresentationError("No url defined for the job '{}'.".format(job))
204     file_name = spec.input["file-name"]
205     full_name = spec.input["download-path"]. \
206         format(job=job, build=build["build"], filename=file_name)
207     if not job.startswith("intel-dnv-"):
208         url = "{0}/{1}".format(url, full_name)
209     new_name = join(spec.environment["paths"]["DIR[WORKING,DATA]"],
210                     "{job}{sep}{build}{sep}{name}".
211                     format(job=job, sep=SEPARATOR, build=build["build"],
212                            name=file_name))
213
214     # Download the file from the defined source (Jenkins, logs.fd.io):
215     success, downloaded_name = _download_file(url, new_name, log)
216     if success:
217         new_name = downloaded_name
218
219     if success and new_name.endswith(".zip"):
220         if not is_zipfile(new_name):
221             success = False
222
223     # If not successful, download from docs.fd.io:
224     if not success:
225         log.append(("INFO", "    Trying to download from https://docs.fd.io:"))
226         release = re.search(REGEX_RELEASE, job).group(2)
227         for rls in (release, "master"):
228             nexus_file_name = "{job}{sep}{build}{sep}{name}". \
229                 format(job=job, sep=SEPARATOR, build=build["build"],
230                        name=file_name)
231             try:
232                 rls = "rls{0}".format(int(rls))
233             except ValueError:
234                 pass
235             url = "{url}/{release}/{dir}/{file}". \
236                 format(url=spec.environment["urls"]["URL[NEXUS]"],
237                        release=rls,
238                        dir=spec.environment["urls"]["DIR[NEXUS]"],
239                        file=nexus_file_name)
240             success, new_name = _download_file(url, new_name, log)
241             if success:
242                 break
243
244     if success:
245         build["file-name"] = new_name
246     else:
247         return False
248
249     if spec.input["file-name"].endswith(".gz"):
250         if "docs.fd.io" in url:
251             execute_command("gzip --decompress --keep --force {0}".
252                             format(new_name))
253         else:
254             rename(new_name, new_name[:-3])
255             if spec.configuration.get("archive-inputs", True):
256                 execute_command("gzip --keep {0}".format(new_name[:-3]))
257         build["file-name"] = new_name[:-3]
258
259     if new_name.endswith(".zip"):
260         if is_zipfile(new_name):
261             return _unzip_file(spec, build, pid, log)
262         else:
263             log.append(("ERROR",
264                         "Zip file '{0}' is corrupted.".format(new_name)))
265             return False
266     else:
267         return True