Line length: Fix recent merges
[csit.git] / resources / tools / presentation / convert_xml_json.py
1 # Copyright (c) 2021 Cisco and/or its affiliates.
2 # Licensed under the Apache License, Version 2.0 (the "License");
3 # you may not use this file except in compliance with the License.
4 # You may obtain a copy of the License at:
5 #
6 #     http://www.apache.org/licenses/LICENSE-2.0
7 #
8 # Unless required by applicable law or agreed to in writing, software
9 # distributed under the License is distributed on an "AS IS" BASIS,
10 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 # See the License for the specific language governing permissions and
12 # limitations under the License.
13
14 """Convert output_info.xml files into JSON structures.
15
16 Version: 0.1.0
17 Date:    8th June 2021
18
19 The json structure is defined in https://gerrit.fd.io/r/c/csit/+/28992
20 """
21
22 import os
23 import re
24 import json
25 import logging
26 import gzip
27
28 from os.path import join
29 from shutil import rmtree
30 from copy import deepcopy
31
32 from pal_utils import get_files
33
34
35 class JSONData:
36     """A Class storing and manipulating data from tests.
37     """
38
39     def __init__(self, template=None):
40         """Initialization.
41
42         :param template: JSON formatted template used to store data. It can
43             include default values.
44         :type template: dict
45         """
46
47         self._template = deepcopy(template)
48         self._data = self._template if self._template else dict()
49
50     def __str__(self):
51         """Return a string with human readable data.
52
53         :returns: Readable description.
54         :rtype: str
55         """
56         return str(self._data)
57
58     def __repr__(self):
59         """Return a string executable as Python constructor call.
60
61         :returns: Executable constructor call.
62         :rtype: str
63         """
64         return f"JSONData(template={self._template!r})"
65
66     @property
67     def data(self):
68         """Getter
69
70         :return: Data stored in the object.
71         :rtype: dict
72         """
73         return self._data
74
75     def add_element(self, value, path_to_value):
76         """Add an element to the json structure.
77
78         :param value: Element value.
79         :param path_to_value: List of tuples where the first item is the element
80             on the path and the second one is its type.
81         :type value: dict, list, str, int, float, bool
82         :type path_to_value: list
83         :raises: IndexError if the path is empty.
84         :raises: TypeError if the val is of not supported type.
85         """
86
87         def _add_element(val, path, structure):
88             """Add an element to the given path.
89
90             :param val: Element value.
91             :param path: List of tuples where the first item is the element
92             on the path and the second one is its type.
93             :param structure: The structure where the element is added.
94             :type val: dict, list, str, int, float, bool
95             :type path: list
96             :type structure: dict
97             :raises TypeError if there is a wrong type in the path.
98             """
99             if len(path) == 1:
100                 if isinstance(structure, dict):
101                     if path[0][1] is dict:
102                         if path[0][0] not in structure:
103                             structure[path[0][0]] = dict()
104                         structure[path[0][0]].update(val)
105                     elif path[0][1] is list:
106                         if path[0][0] not in structure:
107                             structure[path[0][0]] = list()
108                         if isinstance(val, list):
109                             structure[path[0][0]].extend(val)
110                         else:
111                             structure[path[0][0]].append(val)
112                     else:
113                         structure[path[0][0]] = val
114                 elif isinstance(structure, list):
115                     if path[0][0] == -1 or path[0][0] >= len(structure):
116                         if isinstance(val, list):
117                             structure.extend(val)
118                         else:
119                             structure.append(val)
120                     else:
121                         structure[path[0][0]] = val
122                 return
123
124             if isinstance(structure, dict):
125                 if path[0][1] is dict:
126                     if path[0][0] not in structure:
127                         structure[path[0][0]] = dict()
128                 elif path[0][1] is list:
129                     if path[0][0] not in structure:
130                         structure[path[0][0]] = list()
131             elif isinstance(structure, list):
132                 if path[0][0] == -1 or path[0][0] >= len(structure):
133                     if path[0][1] is list:
134                         structure.append(list())
135                     elif path[0][1] is dict:
136                         structure.append(dict())
137                     else:
138                         structure.append(0)
139                     path[0][0] = len(structure) - 1
140             else:
141                 raise TypeError(
142                     u"Only the last item in the path can be different type "
143                     u"then list or dictionary."
144                 )
145             _add_element(val, path[1:], structure[path[0][0]])
146
147         if not isinstance(value, (dict, list, str, int, float, bool)):
148             raise TypeError(
149                 u"The value must be one of these types: dict, list, str, int, "
150                 u"float, bool.\n"
151                 f"Value: {value}\n"
152                 f"Path: {path_to_value}"
153             )
154         _add_element(deepcopy(value), path_to_value, self._data)
155
156     def get_element(self, path):
157         """Get the element specified by the path.
158
159         :param path: List of keys and indices to the requested element or
160             sub-tree.
161         :type path: list
162         :returns: Element specified by the path.
163         :rtype: any
164         """
165         raise NotImplementedError
166
167     def dump(self, file_out, indent=None):
168         """Write JSON data to a file.
169
170         :param file_out: Path to the output JSON file.
171         :param indent: Indentation of items in JSON string. It is directly
172             passed to json.dump method.
173         :type file_out: str
174         :type indent: str
175         """
176         try:
177             with open(file_out, u"w") as file_handler:
178                 json.dump(self._data, file_handler, indent=indent)
179         except OSError as err:
180             logging.warning(f"{repr(err)} Skipping")
181
182     def load(self, file_in):
183         """Load JSON data from a file.
184
185         :param file_in: Path to the input JSON file.
186         :type file_in: str
187         :raises: ValueError if the data being deserialized is not a valid
188             JSON document.
189         :raises: IOError if the file is not found or corrupted.
190         """
191         with open(file_in, u"r") as file_handler:
192             self._data = json.load(file_handler)
193
194
195 def _export_test_from_xml_to_json(tid, in_data, out, template, metadata):
196     """Export data from a test to a json structure.
197
198     :param tid: Test ID.
199     :param in_data: Test data.
200     :param out: Path to output json file.
201     :param template: JSON template with optional default values.
202     :param metadata: Data which are not stored in XML structure.
203     :type tid: str
204     :type in_data: dict
205     :type out: str
206     :type template: dict
207     :type metadata: dict
208     """
209
210     p_metadata = [(u"metadata", dict), ]
211     p_test = [(u"test", dict), ]
212     p_log = [(u"log", list), (-1, list)]
213
214     data = JSONData(template=template)
215
216     data.add_element({u"suite-id": metadata.pop(u"suite-id", u"")}, p_metadata)
217     data.add_element(
218         {u"suite-doc": metadata.pop(u"suite-doc", u"")}, p_metadata
219     )
220     data.add_element({u"testbed": metadata.pop(u"testbed", u"")}, p_metadata)
221     data.add_element(
222         {u"sut-version": metadata.pop(u"sut-version", u"")}, p_metadata
223     )
224
225     data.add_element({u"test-id": tid}, p_test)
226     t_type = in_data.get(u"type", u"")
227     t_type = u"NDRPDR" if t_type == u"CPS" else t_type  # It is NDRPDR
228     data.add_element({u"test-type": t_type}, p_test)
229     tags = in_data.get(u"tags", list())
230     data.add_element({u"tags": tags}, p_test)
231     data.add_element(
232         {u"documentation": in_data.get(u"documentation", u"")}, p_test
233     )
234     data.add_element({u"message": in_data.get(u"msg", u"")}, p_test)
235     execution = {
236         u"start_time": in_data.get(u"starttime", u""),
237         u"end_time": in_data.get(u"endtime", u""),
238         u"status": in_data.get(u"status", u"FAILED"),
239     }
240     execution.update(metadata)
241     data.add_element({u"execution": execution}, p_test)
242
243     log_item = {
244         u"source": {
245             u"type": u"node",
246             u"id": ""
247         },
248         u"msg-type": u"",
249         u"log-level": u"INFO",
250         u"timestamp": in_data.get(u"starttime", u""),  # replacement
251         u"msg": u"",
252         u"data": []
253     }
254
255     # Process configuration history:
256     in_papi = deepcopy(in_data.get(u"conf-history", None))
257     if in_papi:
258         regex_dut = re.compile(r'\*\*DUT(\d):\*\*')
259         node_id = u"dut1"
260         for line in in_papi.split(u"\n"):
261             if not line:
262                 continue
263             groups = re.search(regex_dut, line)
264             if groups:
265                 node_id = f"dut{groups.group(1)}"
266             else:
267                 log_item[u"source"][u"id"] = node_id
268                 log_item[u"msg-type"] = u"papi"
269                 log_item[u"msg"] = line
270                 data.add_element(log_item, p_log)
271
272     # Process show runtime:
273     in_sh_run = deepcopy(in_data.get(u"show-run", None))
274     if in_sh_run:
275         # Transform to openMetrics format
276         for key, val in in_sh_run.items():
277             log_item[u"source"][u"id"] = key
278             log_item[u"msg-type"] = u"metric"
279             log_item[u"msg"] = u"show-runtime"
280             log_item[u"data"] = list()
281             for item in val.get(u"runtime", list()):
282                 for metric, m_data in item.items():
283                     if metric == u"name":
284                         continue
285                     for idx, m_item in enumerate(m_data):
286                         log_item[u"data"].append(
287                             {
288                                 u"name": metric,
289                                 u"value": m_item,
290                                 u"labels": {
291                                     u"host": val.get(u"host", u""),
292                                     u"socket": val.get(u"socket", u""),
293                                     u"graph-node": item.get(u"name", u""),
294                                     u"thread-id": str(idx)
295                                 }
296                             }
297                         )
298             data.add_element(log_item, p_log)
299
300     # Process results:
301     results = dict()
302     if t_type == u"DEVICETEST":
303         pass  # Nothing to add.
304     elif t_type == u"NDRPDR":
305         results = {
306             u"throughput": {
307                 u"unit":
308                     u"cps" if u"TCP_CPS" in tags or u"UDP_CPS" in tags
309                     else u"pps",
310                 u"ndr": {
311                     u"value": {
312                         u"lower": in_data.get(u"throughput", dict()).
313                                   get(u"NDR", dict()).get(u"LOWER", u"NaN"),
314                         u"upper": in_data.get(u"throughput", dict()).
315                                   get(u"NDR", dict()).get(u"UPPER", u"NaN")
316                     },
317                     u"value_gbps": {
318                         u"lower": in_data.get(u"gbps", dict()).
319                                   get(u"NDR", dict()).get(u"LOWER", u"NaN"),
320                         u"upper": in_data.get(u"gbps", dict()).
321                                   get(u"NDR", dict()).get(u"UPPER", u"NaN")
322                     }
323                 },
324                 u"pdr": {
325                     u"value": {
326                         u"lower": in_data.get(u"throughput", dict()).
327                                   get(u"PDR", dict()).get(u"LOWER", u"NaN"),
328                         u"upper": in_data.get(u"throughput", dict()).
329                                   get(u"PDR", dict()).get(u"UPPER", u"NaN")
330                     },
331                     u"value_gbps": {
332                         u"lower": in_data.get(u"gbps", dict()).
333                                   get(u"PDR", dict()).get(u"LOWER", u"NaN"),
334                         u"upper": in_data.get(u"gbps", dict()).
335                                   get(u"PDR", dict()).get(u"UPPER", u"NaN")
336                     }
337                 }
338             },
339             u"latency": {
340                 u"forward": {
341                     u"pdr-90": in_data.get(u"latency", dict()).
342                                get(u"PDR90", dict()).get(u"direction1", u"NaN"),
343                     u"pdr-50": in_data.get(u"latency", dict()).
344                                get(u"PDR50", dict()).get(u"direction1", u"NaN"),
345                     u"pdr-10": in_data.get(u"latency", dict()).
346                                get(u"PDR10", dict()).get(u"direction1", u"NaN"),
347                     u"pdr-0": in_data.get(u"latency", dict()).
348                               get(u"LAT0", dict()).get(u"direction1", u"NaN")
349                 },
350                 u"reverse": {
351                     u"pdr-90": in_data.get(u"latency", dict()).
352                                get(u"PDR90", dict()).get(u"direction2", u"NaN"),
353                     u"pdr-50": in_data.get(u"latency", dict()).
354                                get(u"PDR50", dict()).get(u"direction2", u"NaN"),
355                     u"pdr-10": in_data.get(u"latency", dict()).
356                                get(u"PDR10", dict()).get(u"direction2", u"NaN"),
357                     u"pdr-0": in_data.get(u"latency", dict()).
358                               get(u"LAT0", dict()).get(u"direction2", u"NaN")
359                 }
360             }
361         }
362     elif t_type == "MRR":
363         results = {
364             u"unit": u"pps",  # Old data use only pps
365             u"samples": in_data.get(u"result", dict()).get(u"samples", list()),
366             u"avg": in_data.get(u"result", dict()).get(u"receive-rate", u"NaN"),
367             u"stdev": in_data.get(u"result", dict()).
368                       get(u"receive-stdev", u"NaN")
369         }
370     elif t_type == "SOAK":
371         results = {
372             u"critical-rate": {
373                 u"lower": in_data.get(u"throughput", dict()).
374                           get(u"LOWER", u"NaN"),
375                 u"upper": in_data.get(u"throughput", dict()).
376                           get(u"UPPER", u"NaN"),
377             }
378         }
379     elif t_type == "HOSTSTACK":
380         results = in_data.get(u"result", dict())
381     # elif t_type == "TCP":  # Not used ???
382     #     results = in_data.get(u"result", u"NaN")
383     elif t_type == "RECONF":
384         results = {
385             u"loss": in_data.get(u"result", dict()).get(u"loss", u"NaN"),
386             u"time": in_data.get(u"result", dict()).get(u"time", u"NaN")
387         }
388     else:
389         pass
390     data.add_element({u"results": results}, p_test)
391
392     data.dump(out, indent=u"    ")
393
394
395 def convert_xml_to_json(spec, data):
396     """Convert downloaded XML files into JSON.
397
398     Procedure:
399     - create one json file for each test,
400     - gzip all json files one by one,
401     - delete json files.
402
403     :param spec: Specification read from the specification files.
404     :param data: Input data parsed from output.xml files.
405     :type spec: Specification
406     :type data: InputData
407     """
408
409     logging.info(u"Converting downloaded XML files to JSON ...")
410
411     template_name = spec.output.get(u"use-template", None)
412     structure = spec.output.get(u"structure", u"tree")
413     if template_name:
414         with open(template_name, u"r") as file_handler:
415             template = json.load(file_handler)
416     else:
417         template = None
418
419     build_dir = spec.environment[u"paths"][u"DIR[BUILD,JSON]"]
420     try:
421         rmtree(build_dir)
422     except FileNotFoundError:
423         pass  # It does not exist
424
425     os.mkdir(build_dir)
426
427     for job, builds in data.data.items():
428         logging.info(f"  Processing job {job}")
429         if structure == "tree":
430             os.makedirs(join(build_dir, job), exist_ok=True)
431         for build_nr, build in builds.items():
432             logging.info(f"  Processing build {build_nr}")
433             if structure == "tree":
434                 os.makedirs(join(build_dir, job, build_nr), exist_ok=True)
435             for test_id, test_data in build[u"tests"].items():
436                 groups = re.search(re.compile(r'-(\d+[tT](\d+[cC]))-'), test_id)
437                 if groups:
438                     test_id = test_id.replace(groups.group(1), groups.group(2))
439                 logging.info(f"  Processing test {test_id}")
440                 if structure == "tree":
441                     dirs = test_id.split(u".")[:-1]
442                     name = test_id.split(u".")[-1]
443                     os.makedirs(
444                         join(build_dir, job, build_nr, *dirs), exist_ok=True
445                     )
446                     file_name = \
447                         f"{join(build_dir, job, build_nr, *dirs, name)}.json"
448                 else:
449                     file_name = join(
450                         build_dir,
451                         u'.'.join((job, build_nr, test_id, u'json'))
452                     )
453                 suite_id = test_id.rsplit(u".", 1)[0].replace(u" ", u"_")
454                 _export_test_from_xml_to_json(
455                     test_id, test_data, file_name, template,
456                     {
457                         u"ci": u"jenkins.fd.io",
458                         u"job": job,
459                         u"build": build_nr,
460                         u"suite-id": suite_id,
461                         u"suite-doc": build[u"suites"].get(suite_id, dict()).
462                                       get(u"doc", u""),
463                         u"testbed": build[u"metadata"].get(u"testbed", u""),
464                         u"sut-version": build[u"metadata"].get(u"version", u"")
465                     }
466                 )
467
468     # gzip the json files:
469     for file in get_files(build_dir, u"json"):
470         with open(file, u"rb") as src:
471             with gzip.open(f"{file}.gz", u"wb") as dst:
472                 dst.writelines(src)
473             os.remove(file)
474
475     logging.info(u"Done.")