CSIT-1101: Optimize input data processing
[csit.git] / resources / tools / presentation / input_data_files.py
1 # Copyright (c) 2018 Cisco and/or its affiliates.
2 # Licensed under the Apache License, Version 2.0 (the "License");
3 # you may not use this file except in compliance with the License.
4 # You may obtain a copy of the License at:
5 #
6 #     http://www.apache.org/licenses/LICENSE-2.0
7 #
8 # Unless required by applicable law or agreed to in writing, software
9 # distributed under the License is distributed on an "AS IS" BASIS,
10 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 # See the License for the specific language governing permissions and
12 # limitations under the License.
13
14 """Inputs
15 Download all data.
16 """
17
18 import re
19
20 from os import rename, mkdir
21 from os.path import join
22 from zipfile import ZipFile, is_zipfile, BadZipfile
23 from httplib import responses
24 from requests import get, codes, RequestException, Timeout, TooManyRedirects, \
25     HTTPError, ConnectionError
26
27 from errors import PresentationError
28 from utils import execute_command
29
30 # Chunk size used for file download
31 CHUNK_SIZE = 512
32
33 # Separator used in file names
34 SEPARATOR = "__"
35
36 REGEX_RELEASE = re.compile(r'(\D*)(\d{4}|master)(\D*)')
37
38
39 def _download_file(url, file_name, log):
40     """Download a file with input data.
41
42     :param url: URL to the file to download.
43     :param file_name: Name of file to download.
44     :param log: List of log messages.
45     :type url: str
46     :type file_name: str
47     :type log: list of tuples (severity, msg)
48     :returns: True if the download was successful, otherwise False.
49     :rtype: bool
50     """
51
52     success = False
53     try:
54         log.append(("INFO", "    Connecting to '{0}' ...".format(url)))
55
56         response = get(url, stream=True)
57         code = response.status_code
58
59         log.append(("INFO", "    {0}: {1}".format(code, responses[code])))
60
61         if code != codes["OK"]:
62             return False
63
64         log.append(("INFO", "    Downloading the file '{0}' to '{1}' ...".
65                     format(url, file_name)))
66
67         file_handle = open(file_name, "wb")
68         for chunk in response.iter_content(chunk_size=CHUNK_SIZE):
69             if chunk:
70                 file_handle.write(chunk)
71         file_handle.close()
72         success = True
73     except ConnectionError as err:
74         log.append(("ERROR", "Not possible to connect to '{0}'.".format(url)))
75         log.append(("DEBUG", str(err)))
76     except HTTPError as err:
77         log.append(("ERROR", "Invalid HTTP response from '{0}'.".format(url)))
78         log.append(("DEBUG", str(err)))
79     except TooManyRedirects as err:
80         log.append(("ERROR", "Request exceeded the configured number "
81                              "of maximum re-directions."))
82         log.append(("DEBUG", str(err)))
83     except Timeout as err:
84         log.append(("ERROR", "Request timed out."))
85         log.append(("DEBUG", str(err)))
86     except RequestException as err:
87         log.append(("ERROR", "Unexpected HTTP request exception."))
88         log.append(("DEBUG", str(err)))
89     except (IOError, ValueError, KeyError) as err:
90         log.append(("ERROR", "Download failed."))
91         log.append(("DEBUG", str(err)))
92
93     log.append(("INFO", "    Download finished."))
94     return success
95
96
97 def _unzip_file(spec, build, pid, log):
98     """Unzip downloaded source file.
99
100     :param spec: Specification read form the specification file.
101     :param build: Information about the build.
102     :param log: List of log messages.
103     :type spec: Specification
104     :type build: dict
105     :type log: list of tuples (severity, msg)
106     :returns: True if the download was successful, otherwise False.
107     :rtype: bool
108     """
109
110     data_file = spec.input["extract"]
111     file_name = build["file-name"]
112     directory = spec.environment["paths"]["DIR[WORKING,DATA]"]
113     tmp_dir = join(directory, str(pid))
114     try:
115         mkdir(tmp_dir)
116     except OSError:
117         pass
118     new_name = "{0}{1}{2}".format(file_name.rsplit('.')[-2],
119                                   SEPARATOR,
120                                   data_file.split("/")[-1])
121
122     log.append(("INFO", "    Unzipping: '{0}' from '{1}'.".
123                 format(data_file, file_name)))
124     try:
125         with ZipFile(file_name, 'r') as zip_file:
126             zip_file.extract(data_file, tmp_dir)
127         log.append(("INFO", "    Renaming the file '{0}' to '{1}'".
128                     format(join(tmp_dir, data_file), new_name)))
129         rename(join(tmp_dir, data_file), new_name)
130         build["file-name"] = new_name
131         return True
132     except (BadZipfile, RuntimeError) as err:
133         log.append(("ERROR", "Failed to unzip the file '{0}': {1}.".
134                     format(file_name, str(err))))
135         return False
136     except OSError as err:
137         log.append(("ERROR", "Failed to rename the file '{0}': {1}.".
138                     format(data_file, str(err))))
139         return False
140
141
142 def download_and_unzip_data_file(spec, job, build, pid, log):
143     """Download and unzip a source file.
144
145     :param spec: Specification read form the specification file.
146     :param job: Name of the Jenkins job.
147     :param build: Information about the build.
148     :param pid: PID of the process executing this method.
149     :param log: List of log messages.
150     :type spec: Specification
151     :type job: str
152     :type build: dict
153     :type pid: int
154     :type log: list of tuples (severity, msg)
155     :returns: True if the download was successful, otherwise False.
156     :rtype: bool
157     """
158
159     if job.startswith("csit-"):
160         if spec.input["file-name"].endswith(".zip"):
161             url = spec.environment["urls"]["URL[JENKINS,CSIT]"]
162         elif spec.input["file-name"].endswith(".gz"):
163             url = spec.environment["urls"]["URL[NEXUS,LOG]"]
164         else:
165             log.append(("ERROR", "Not supported file format."))
166             return False
167     elif job.startswith("hc2vpp-"):
168         url = spec.environment["urls"]["URL[JENKINS,HC]"]
169     else:
170         raise PresentationError("No url defined for the job '{}'.".
171                                 format(job))
172     file_name = spec.input["file-name"]
173     full_name = spec.input["download-path"]. \
174         format(job=job, build=build["build"], filename=file_name)
175     url = "{0}/{1}".format(url, full_name)
176     new_name = join(spec.environment["paths"]["DIR[WORKING,DATA]"],
177                     "{job}{sep}{build}{sep}{name}".
178                     format(job=job, sep=SEPARATOR, build=build["build"],
179                            name=file_name))
180     # Download the file from the defined source (Jenkins, logs.fd.io):
181     success = _download_file(url, new_name, log)
182
183     if success and new_name.endswith(".zip"):
184         if not is_zipfile(new_name):
185             success = False
186
187     # If not successful, download from docs.fd.io:
188     if not success:
189         log.append(("INFO", "    Trying to download from https://docs.fd.io:"))
190         release = re.search(REGEX_RELEASE, job).group(2)
191         for rls in (release, "master"):
192             nexus_file_name = "{job}{sep}{build}{sep}{name}". \
193                 format(job=job, sep=SEPARATOR, build=build["build"],
194                        name=file_name)
195             try:
196                 rls = "rls{0}".format(int(rls))
197             except ValueError:
198                 pass
199             url = "{url}/{release}/{dir}/{file}". \
200                 format(url=spec.environment["urls"]["URL[NEXUS]"],
201                        release=rls,
202                        dir=spec.environment["urls"]["DIR[NEXUS]"],
203                        file=nexus_file_name)
204             success = _download_file(url, new_name, log)
205             if success:
206                 break
207
208     if success:
209         build["file-name"] = new_name
210     else:
211         return False
212
213     if spec.input["file-name"].endswith(".gz"):
214         if "docs.fd.io" in url:
215             execute_command("gzip --decompress --keep --force {0}".
216                             format(new_name))
217         else:
218             rename(new_name, new_name[:-3])
219             execute_command("gzip --keep {0}".format(new_name[:-3]))
220         build["file-name"] = new_name[:-3]
221
222     if new_name.endswith(".zip"):
223         if is_zipfile(new_name):
224             return _unzip_file(spec, build, pid, log)
225         else:
226             log.append(("ERROR",
227                         "Zip file '{0}' is corrupted.".format(new_name)))
228             return False
229     else:
230         return True

©2016 FD.io a Linux Foundation Collaborative Project. All Rights Reserved.
Linux Foundation is a registered trademark of The Linux Foundation. Linux is a registered trademark of Linus Torvalds.
Please see our privacy policy and terms of use.