Report: Set the range on y-axis for hoststack
[csit.git] / resources / tools / presentation / input_data_parser.py
1 # Copyright (c) 2023 Cisco and/or its affiliates.
2 # Licensed under the Apache License, Version 2.0 (the "License");
3 # you may not use this file except in compliance with the License.
4 # You may obtain a copy of the License at:
5 #
6 #     http://www.apache.org/licenses/LICENSE-2.0
7 #
8 # Unless required by applicable law or agreed to in writing, software
9 # distributed under the License is distributed on an "AS IS" BASIS,
10 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 # See the License for the specific language governing permissions and
12 # limitations under the License.
13
14 """Data pre-processing
15
16 - extract data from output.xml files generated by Jenkins jobs and store in
17   pandas' Series,
18 - provide access to the data.
19 - filter the data using tags,
20 """
21
22 import re
23 import copy
24 import resource
25 import logging
26
27 from collections import OrderedDict
28 from os import remove, walk, listdir
29 from os.path import isfile, isdir, join
30 from datetime import datetime as dt
31 from datetime import timedelta
32 from json import loads
33 from json.decoder import JSONDecodeError
34
35 import hdrh.histogram
36 import hdrh.codec
37 import prettytable
38 import pandas as pd
39
40 from robot.api import ExecutionResult, ResultVisitor
41 from robot import errors
42
43 from resources.libraries.python import jumpavg
44 from input_data_files import download_and_unzip_data_file
45 from pal_errors import PresentationError
46
47
48 # Separator used in file names
49 SEPARATOR = "__"
50
51
52 class ExecutionChecker(ResultVisitor):
53     """Class to traverse through the test suite structure.
54     """
55
56     REGEX_PLR_RATE = re.compile(
57         r'PLRsearch lower bound::?\s(\d+.\d+).*\n'
58         r'PLRsearch upper bound::?\s(\d+.\d+)'
59     )
60     REGEX_NDRPDR_RATE = re.compile(
61         r'NDR_LOWER:\s(\d+.\d+).*\n.*\n'
62         r'NDR_UPPER:\s(\d+.\d+).*\n'
63         r'PDR_LOWER:\s(\d+.\d+).*\n.*\n'
64         r'PDR_UPPER:\s(\d+.\d+)'
65     )
66     REGEX_NDRPDR_GBPS = re.compile(
67         r'NDR_LOWER:.*,\s(\d+.\d+).*\n.*\n'
68         r'NDR_UPPER:.*,\s(\d+.\d+).*\n'
69         r'PDR_LOWER:.*,\s(\d+.\d+).*\n.*\n'
70         r'PDR_UPPER:.*,\s(\d+.\d+)'
71     )
72     REGEX_PERF_MSG_INFO = re.compile(
73         r'NDR_LOWER:\s(\d+.\d+)\s.*\s(\d+.\d+)\s.*\n.*\n.*\n'
74         r'PDR_LOWER:\s(\d+.\d+)\s.*\s(\d+.\d+)\s.*\n.*\n.*\n'
75         r'Latency at 90% PDR:.*\[\'(.*)\', \'(.*)\'\].*\n'
76         r'Latency at 50% PDR:.*\[\'(.*)\', \'(.*)\'\].*\n'
77         r'Latency at 10% PDR:.*\[\'(.*)\', \'(.*)\'\].*\n'
78     )
79     REGEX_CPS_MSG_INFO = re.compile(
80         r'NDR_LOWER:\s(\d+.\d+)\s.*\s.*\n.*\n.*\n'
81         r'PDR_LOWER:\s(\d+.\d+)\s.*\s.*\n.*\n.*'
82     )
83     REGEX_PPS_MSG_INFO = re.compile(
84         r'NDR_LOWER:\s(\d+.\d+)\s.*\s(\d+.\d+)\s.*\n.*\n.*\n'
85         r'PDR_LOWER:\s(\d+.\d+)\s.*\s(\d+.\d+)\s.*\n.*\n.*'
86     )
87     REGEX_MRR_MSG_INFO = re.compile(r'.*\[(.*)\]')
88
89     REGEX_VSAP_MSG_INFO = re.compile(
90         r'Transfer Rate: (\d*.\d*).*\n'
91         r'Latency: (\d*.\d*).*\n'
92         r'Connection [c|r]ps rate: (\d*).*\n'
93         r'Total data transferred: (\d*).*\n'
94         r'Completed requests: (\d*).*\n'
95         r'Failed requests:\s*(\d*.\d*)'
96     )
97
98     # Needed for CPS and PPS tests
99     REGEX_NDRPDR_LAT_BASE = re.compile(
100         r'LATENCY.*\[\'(.*)\', \'(.*)\'\]\s\n.*\n.*\n'
101         r'LATENCY.*\[\'(.*)\', \'(.*)\'\]'
102     )
103     REGEX_NDRPDR_LAT = re.compile(
104         r'LATENCY.*\[\'(.*)\', \'(.*)\'\]\s\n.*\n.*\n'
105         r'LATENCY.*\[\'(.*)\', \'(.*)\'\]\s\n.*\n'
106         r'Latency.*\[\'(.*)\', \'(.*)\'\]\s\n'
107         r'Latency.*\[\'(.*)\', \'(.*)\'\]\s\n'
108         r'Latency.*\[\'(.*)\', \'(.*)\'\]\s\n'
109         r'Latency.*\[\'(.*)\', \'(.*)\'\]'
110     )
111
112     REGEX_VERSION_VPP = re.compile(
113         r"(VPP Version:\s*|VPP version:\s*)(.*)"
114     )
115     REGEX_VERSION_DPDK = re.compile(
116         r"(DPDK version:\s*|DPDK Version:\s*)(.*)"
117     )
118     REGEX_TCP = re.compile(
119         r'Total\s(rps|cps|throughput):\s(\d*).*$'
120     )
121     REGEX_MRR = re.compile(
122         r'MaxReceivedRate_Results\s\[pkts/(\d*)sec\]:\s'
123         r'tx\s(\d*),\srx\s(\d*)'
124     )
125     REGEX_BMRR = re.compile(
126         r'.*trial results.*: \[(.*)\]'
127     )
128     REGEX_RECONF_LOSS = re.compile(
129         r'Packets lost due to reconfig: (\d*)'
130     )
131     REGEX_RECONF_TIME = re.compile(
132         r'Implied time lost: (\d*.[\de-]*)'
133     )
134     REGEX_TC_TAG = re.compile(r'\d+[tT]\d+[cC]')
135
136     REGEX_TC_NAME_NEW = re.compile(r'-\d+[cC]-')
137
138     REGEX_TC_NUMBER = re.compile(r'tc\d{2}-')
139
140     REGEX_TC_PAPI_CLI = re.compile(r'.*\((\d+.\d+.\d+.\d+.) - (.*)\)')
141
142     REGEX_SH_RUN_HOST = re.compile(
143         r'hostname=\"(\d{1,3}.\d{1,3}.\d{1,3}.\d{1,3})\",hook=\"(.*)\"'
144     )
145
146     def __init__(self, metadata, mapping, ignore, process_oper):
147         """Initialisation.
148
149         :param metadata: Key-value pairs to be included in "metadata" part of
150             JSON structure.
151         :param mapping: Mapping of the old names of test cases to the new
152             (actual) one.
153         :param ignore: List of TCs to be ignored.
154         :param process_oper: If True, operational data (show run, telemetry) is
155             processed.
156         :type metadata: dict
157         :type mapping: dict
158         :type ignore: list
159         :type process_oper: bool
160         """
161
162         # Mapping of TCs long names
163         self._mapping = mapping
164
165         # Ignore list
166         self._ignore = ignore
167
168         # Process operational data
169         self._process_oper = process_oper
170
171         # Name of currently processed keyword
172         self._kw_name = None
173
174         # VPP version
175         self._version = None
176
177         # Timestamp
178         self._timestamp = None
179
180         # Testbed. The testbed is identified by TG node IP address.
181         self._testbed = None
182
183         # Number of PAPI History messages found:
184         # 0 - no message
185         # 1 - PAPI History of DUT1
186         # 2 - PAPI History of DUT2
187         self._conf_history_lookup_nr = 0
188
189         self._sh_run_counter = 0
190         self._telemetry_kw_counter = 0
191         self._telemetry_msg_counter = 0
192
193         # Test ID of currently processed test- the lowercase full path to the
194         # test
195         self._test_id = None
196
197         # The main data structure
198         self._data = {
199             "metadata": dict(),
200             "suites": dict(),
201             "tests": dict()
202         }
203
204         # Save the provided metadata
205         for key, val in metadata.items():
206             self._data["metadata"][key] = val
207
208     @property
209     def data(self):
210         """Getter - Data parsed from the XML file.
211
212         :returns: Data parsed from the XML file.
213         :rtype: dict
214         """
215         return self._data
216
217     def _get_data_from_mrr_test_msg(self, msg):
218         """Get info from message of MRR performance tests.
219
220         :param msg: Message to be processed.
221         :type msg: str
222         :returns: Processed message or original message if a problem occurs.
223         :rtype: str
224         """
225
226         groups = re.search(self.REGEX_MRR_MSG_INFO, msg)
227         if not groups or groups.lastindex != 1:
228             return "Test Failed."
229
230         try:
231             data = groups.group(1).split(", ")
232         except (AttributeError, IndexError, ValueError, KeyError):
233             return "Test Failed."
234
235         out_str = "["
236         try:
237             for item in data:
238                 out_str += f"{(float(item) / 1e6):.2f}, "
239             return out_str[:-2] + "]"
240         except (AttributeError, IndexError, ValueError, KeyError):
241             return "Test Failed."
242
243     def _get_data_from_cps_test_msg(self, msg):
244         """Get info from message of NDRPDR CPS tests.
245
246         :param msg: Message to be processed.
247         :type msg: str
248         :returns: Processed message or "Test Failed." if a problem occurs.
249         :rtype: str
250         """
251
252         groups = re.search(self.REGEX_CPS_MSG_INFO, msg)
253         if not groups or groups.lastindex != 2:
254             return "Test Failed."
255
256         try:
257             return (
258                 f"1. {(float(groups.group(1)) / 1e6):5.2f}\n"
259                 f"2. {(float(groups.group(2)) / 1e6):5.2f}"
260             )
261         except (AttributeError, IndexError, ValueError, KeyError):
262             return "Test Failed."
263
264     def _get_data_from_pps_test_msg(self, msg):
265         """Get info from message of NDRPDR PPS tests.
266
267         :param msg: Message to be processed.
268         :type msg: str
269         :returns: Processed message or "Test Failed." if a problem occurs.
270         :rtype: str
271         """
272
273         groups = re.search(self.REGEX_PPS_MSG_INFO, msg)
274         if not groups or groups.lastindex != 4:
275             return "Test Failed."
276
277         try:
278             return (
279                 f"1. {(float(groups.group(1)) / 1e6):5.2f}      "
280                 f"{float(groups.group(2)):5.2f}\n"
281                 f"2. {(float(groups.group(3)) / 1e6):5.2f}      "
282                 f"{float(groups.group(4)):5.2f}"
283             )
284         except (AttributeError, IndexError, ValueError, KeyError):
285             return "Test Failed."
286
287     def _get_data_from_perf_test_msg(self, msg):
288         """Get info from message of NDRPDR performance tests.
289
290         :param msg: Message to be processed.
291         :type msg: str
292         :returns: Processed message or "Test Failed." if a problem occurs.
293         :rtype: str
294         """
295
296         groups = re.search(self.REGEX_PERF_MSG_INFO, msg)
297         if not groups or groups.lastindex != 10:
298             return "Test Failed."
299
300         try:
301             data = {
302                 "ndr_low": float(groups.group(1)),
303                 "ndr_low_b": float(groups.group(2)),
304                 "pdr_low": float(groups.group(3)),
305                 "pdr_low_b": float(groups.group(4)),
306                 "pdr_lat_90_1": groups.group(5),
307                 "pdr_lat_90_2": groups.group(6),
308                 "pdr_lat_50_1": groups.group(7),
309                 "pdr_lat_50_2": groups.group(8),
310                 "pdr_lat_10_1": groups.group(9),
311                 "pdr_lat_10_2": groups.group(10),
312             }
313         except (AttributeError, IndexError, ValueError, KeyError):
314             return "Test Failed."
315
316         def _process_lat(in_str_1, in_str_2):
317             """Extract P50, P90 and P99 latencies or min, avg, max values from
318             latency string.
319
320             :param in_str_1: Latency string for one direction produced by robot
321                 framework.
322             :param in_str_2: Latency string for second direction produced by
323                 robot framework.
324             :type in_str_1: str
325             :type in_str_2: str
326             :returns: Processed latency string or None if a problem occurs.
327             :rtype: tuple
328             """
329             in_list_1 = in_str_1.split('/', 3)
330             in_list_2 = in_str_2.split('/', 3)
331
332             if len(in_list_1) != 4 and len(in_list_2) != 4:
333                 return None
334
335             in_list_1[3] += "=" * (len(in_list_1[3]) % 4)
336             try:
337                 hdr_lat_1 = hdrh.histogram.HdrHistogram.decode(in_list_1[3])
338             except hdrh.codec.HdrLengthException:
339                 hdr_lat_1 = None
340
341             in_list_2[3] += "=" * (len(in_list_2[3]) % 4)
342             try:
343                 hdr_lat_2 = hdrh.histogram.HdrHistogram.decode(in_list_2[3])
344             except hdrh.codec.HdrLengthException:
345                 hdr_lat_2 = None
346
347             if hdr_lat_1 and hdr_lat_2:
348                 hdr_lat = (
349                     hdr_lat_1.get_value_at_percentile(50.0),
350                     hdr_lat_1.get_value_at_percentile(90.0),
351                     hdr_lat_1.get_value_at_percentile(99.0),
352                     hdr_lat_2.get_value_at_percentile(50.0),
353                     hdr_lat_2.get_value_at_percentile(90.0),
354                     hdr_lat_2.get_value_at_percentile(99.0)
355                 )
356                 if all(hdr_lat):
357                     return hdr_lat
358
359             hdr_lat = (
360                 int(in_list_1[0]), int(in_list_1[1]), int(in_list_1[2]),
361                 int(in_list_2[0]), int(in_list_2[1]), int(in_list_2[2])
362             )
363             for item in hdr_lat:
364                 if item in (-1, 4294967295, 0):
365                     return None
366             return hdr_lat
367
368         try:
369             out_msg = (
370                 f"1. {(data['ndr_low'] / 1e6):5.2f}      "
371                 f"{data['ndr_low_b']:5.2f}"
372                 f"\n2. {(data['pdr_low'] / 1e6):5.2f}      "
373                 f"{data['pdr_low_b']:5.2f}"
374             )
375             latency = (
376                 _process_lat(data['pdr_lat_10_1'], data['pdr_lat_10_2']),
377                 _process_lat(data['pdr_lat_50_1'], data['pdr_lat_50_2']),
378                 _process_lat(data['pdr_lat_90_1'], data['pdr_lat_90_2'])
379             )
380             if all(latency):
381                 max_len = len(str(max((max(item) for item in latency))))
382                 max_len = 4 if max_len < 4 else max_len
383
384                 for idx, lat in enumerate(latency):
385                     if not idx:
386                         out_msg += "\n"
387                     out_msg += (
388                         f"\n{idx + 3}. "
389                         f"{lat[0]:{max_len}d} "
390                         f"{lat[1]:{max_len}d} "
391                         f"{lat[2]:{max_len}d}      "
392                         f"{lat[3]:{max_len}d} "
393                         f"{lat[4]:{max_len}d} "
394                         f"{lat[5]:{max_len}d} "
395                     )
396
397             return out_msg
398
399         except (AttributeError, IndexError, ValueError, KeyError):
400             return "Test Failed."
401
402     def _get_testbed(self, msg):
403         """Called when extraction of testbed IP is required.
404         The testbed is identified by TG node IP address.
405
406         :param msg: Message to process.
407         :type msg: Message
408         :returns: Nothing.
409         """
410
411         if msg.message.count("Setup of TG node") or \
412                 msg.message.count("Setup of node TG host"):
413             reg_tg_ip = re.compile(
414                 r'.*TG .* (\d{1,3}.\d{1,3}.\d{1,3}.\d{1,3}).*')
415             try:
416                 self._testbed = str(re.search(reg_tg_ip, msg.message).group(1))
417             except (KeyError, ValueError, IndexError, AttributeError):
418                 pass
419             finally:
420                 self._data["metadata"]["testbed"] = self._testbed
421
422     def _get_vpp_version(self, msg):
423         """Called when extraction of VPP version is required.
424
425         :param msg: Message to process.
426         :type msg: Message
427         :returns: Nothing.
428         """
429
430         if msg.message.count("VPP version:") or \
431                 msg.message.count("VPP Version:"):
432             self._version = str(
433                 re.search(self.REGEX_VERSION_VPP, msg.message).group(2)
434             )
435             self._data["metadata"]["version"] = self._version
436
437     def _get_dpdk_version(self, msg):
438         """Called when extraction of DPDK version is required.
439
440         :param msg: Message to process.
441         :type msg: Message
442         :returns: Nothing.
443         """
444
445         if msg.message.count("DPDK Version:"):
446             try:
447                 self._version = str(re.search(
448                     self.REGEX_VERSION_DPDK, msg.message).group(2))
449                 self._data["metadata"]["version"] = self._version
450             except IndexError:
451                 pass
452
453     def _get_papi_history(self, msg):
454         """Called when extraction of PAPI command history is required.
455
456         :param msg: Message to process.
457         :type msg: Message
458         :returns: Nothing.
459         """
460         if msg.message.count("PAPI command history:"):
461             self._conf_history_lookup_nr += 1
462             if self._conf_history_lookup_nr == 1:
463                 self._data["tests"][self._test_id]["conf-history"] = str()
464             text = re.sub(
465                 r"\d{1,3}.\d{1,3}.\d{1,3}.\d{1,3} PAPI command history:",
466                 "",
467                 msg.message,
468                 count=1
469             ).replace('"', "'")
470             self._data["tests"][self._test_id]["conf-history"] += \
471                 f"**DUT{str(self._conf_history_lookup_nr)}:** {text}"
472
473     def _get_show_run(self, msg):
474         """Called when extraction of VPP operational data (output of CLI command
475         Show Runtime) is required.
476
477         :param msg: Message to process.
478         :type msg: Message
479         :returns: Nothing.
480         """
481
482         if not msg.message.count("stats runtime"):
483             return
484
485         # Temporary solution
486         if self._sh_run_counter > 1:
487             return
488
489         if "show-run" not in self._data["tests"][self._test_id].keys():
490             self._data["tests"][self._test_id]["show-run"] = dict()
491
492         groups = re.search(self.REGEX_TC_PAPI_CLI, msg.message)
493         if not groups:
494             return
495         try:
496             host = groups.group(1)
497         except (AttributeError, IndexError):
498             host = ""
499         try:
500             sock = groups.group(2)
501         except (AttributeError, IndexError):
502             sock = ""
503
504         dut = "dut{nr}".format(
505             nr=len(self._data['tests'][self._test_id]['show-run'].keys()) + 1)
506
507         self._data['tests'][self._test_id]['show-run'][dut] = \
508             copy.copy(
509                 {
510                     "host": host,
511                     "socket": sock,
512                     "runtime": str(msg.message).replace(' ', '').
513                                 replace('\n', '').replace("'", '"').
514                                 replace('b"', '"').replace('"', '"').
515                                 split(":", 1)[1]
516                 }
517             )
518
519     def _get_telemetry(self, msg):
520         """Called when extraction of VPP telemetry data is required.
521
522         :param msg: Message to process.
523         :type msg: Message
524         :returns: Nothing.
525         """
526
527         if self._telemetry_kw_counter > 1:
528             return
529         if not msg.message.count("# TYPE vpp_runtime_calls"):
530             return
531
532         if "telemetry-show-run" not in \
533                 self._data["tests"][self._test_id].keys():
534             self._data["tests"][self._test_id]["telemetry-show-run"] = dict()
535
536         self._telemetry_msg_counter += 1
537         groups = re.search(self.REGEX_SH_RUN_HOST, msg.message)
538         if not groups:
539             return
540         try:
541             host = groups.group(1)
542         except (AttributeError, IndexError):
543             host = ""
544         try:
545             sock = groups.group(2)
546         except (AttributeError, IndexError):
547             sock = ""
548         runtime = {
549             "source_type": "node",
550             "source_id": host,
551             "msg_type": "metric",
552             "log_level": "INFO",
553             "timestamp": msg.timestamp,
554             "msg": "show_runtime",
555             "host": host,
556             "socket": sock,
557             "data": list()
558         }
559         for line in msg.message.splitlines():
560             if not line.startswith("vpp_runtime_"):
561                 continue
562             try:
563                 params, value, timestamp = line.rsplit(" ", maxsplit=2)
564                 cut = params.index("{")
565                 name = params[:cut].split("_", maxsplit=2)[-1]
566                 labels = eval(
567                     "dict" + params[cut:].replace('{', '(').replace('}', ')')
568                 )
569                 labels["graph_node"] = labels.pop("name")
570                 runtime["data"].append(
571                     {
572                         "name": name,
573                         "value": value,
574                         "timestamp": timestamp,
575                         "labels": labels
576                     }
577                 )
578             except (TypeError, ValueError, IndexError):
579                 continue
580         self._data['tests'][self._test_id]['telemetry-show-run']\
581             [f"dut{self._telemetry_msg_counter}"] = copy.copy(
582                 {
583                     "host": host,
584                     "socket": sock,
585                     "runtime": runtime
586                 }
587             )
588
589     def _get_ndrpdr_throughput(self, msg):
590         """Get NDR_LOWER, NDR_UPPER, PDR_LOWER and PDR_UPPER from the test
591         message.
592
593         :param msg: The test message to be parsed.
594         :type msg: str
595         :returns: Parsed data as a dict and the status (PASS/FAIL).
596         :rtype: tuple(dict, str)
597         """
598
599         throughput = {
600             "NDR": {"LOWER": -1.0, "UPPER": -1.0},
601             "PDR": {"LOWER": -1.0, "UPPER": -1.0}
602         }
603         status = "FAIL"
604         groups = re.search(self.REGEX_NDRPDR_RATE, msg)
605
606         if groups is not None:
607             try:
608                 throughput["NDR"]["LOWER"] = float(groups.group(1))
609                 throughput["NDR"]["UPPER"] = float(groups.group(2))
610                 throughput["PDR"]["LOWER"] = float(groups.group(3))
611                 throughput["PDR"]["UPPER"] = float(groups.group(4))
612                 status = "PASS"
613             except (IndexError, ValueError):
614                 pass
615
616         return throughput, status
617
618     def _get_ndrpdr_throughput_gbps(self, msg):
619         """Get NDR_LOWER, NDR_UPPER, PDR_LOWER and PDR_UPPER in Gbps from the
620         test message.
621
622         :param msg: The test message to be parsed.
623         :type msg: str
624         :returns: Parsed data as a dict and the status (PASS/FAIL).
625         :rtype: tuple(dict, str)
626         """
627
628         gbps = {
629             "NDR": {"LOWER": -1.0, "UPPER": -1.0},
630             "PDR": {"LOWER": -1.0, "UPPER": -1.0}
631         }
632         status = "FAIL"
633         groups = re.search(self.REGEX_NDRPDR_GBPS, msg)
634
635         if groups is not None:
636             try:
637                 gbps["NDR"]["LOWER"] = float(groups.group(1))
638                 gbps["NDR"]["UPPER"] = float(groups.group(2))
639                 gbps["PDR"]["LOWER"] = float(groups.group(3))
640                 gbps["PDR"]["UPPER"] = float(groups.group(4))
641                 status = "PASS"
642             except (IndexError, ValueError):
643                 pass
644
645         return gbps, status
646
647     def _get_plr_throughput(self, msg):
648         """Get PLRsearch lower bound and PLRsearch upper bound from the test
649         message.
650
651         :param msg: The test message to be parsed.
652         :type msg: str
653         :returns: Parsed data as a dict and the status (PASS/FAIL).
654         :rtype: tuple(dict, str)
655         """
656
657         throughput = {
658             "LOWER": -1.0,
659             "UPPER": -1.0
660         }
661         status = "FAIL"
662         groups = re.search(self.REGEX_PLR_RATE, msg)
663
664         if groups is not None:
665             try:
666                 throughput["LOWER"] = float(groups.group(1))
667                 throughput["UPPER"] = float(groups.group(2))
668                 status = "PASS"
669             except (IndexError, ValueError):
670                 pass
671
672         return throughput, status
673
674     def _get_ndrpdr_latency(self, msg):
675         """Get LATENCY from the test message.
676
677         :param msg: The test message to be parsed.
678         :type msg: str
679         :returns: Parsed data as a dict and the status (PASS/FAIL).
680         :rtype: tuple(dict, str)
681         """
682         latency_default = {
683             "min": -1.0,
684             "avg": -1.0,
685             "max": -1.0,
686             "hdrh": ""
687         }
688         latency = {
689             "NDR": {
690                 "direction1": copy.copy(latency_default),
691                 "direction2": copy.copy(latency_default)
692             },
693             "PDR": {
694                 "direction1": copy.copy(latency_default),
695                 "direction2": copy.copy(latency_default)
696             },
697             "LAT0": {
698                 "direction1": copy.copy(latency_default),
699                 "direction2": copy.copy(latency_default)
700             },
701             "PDR10": {
702                 "direction1": copy.copy(latency_default),
703                 "direction2": copy.copy(latency_default)
704             },
705             "PDR50": {
706                 "direction1": copy.copy(latency_default),
707                 "direction2": copy.copy(latency_default)
708             },
709             "PDR90": {
710                 "direction1": copy.copy(latency_default),
711                 "direction2": copy.copy(latency_default)
712             },
713         }
714
715         groups = re.search(self.REGEX_NDRPDR_LAT, msg)
716         if groups is None:
717             groups = re.search(self.REGEX_NDRPDR_LAT_BASE, msg)
718         if groups is None:
719             return latency, "FAIL"
720
721         def process_latency(in_str):
722             """Return object with parsed latency values.
723
724             TODO: Define class for the return type.
725
726             :param in_str: Input string, min/avg/max/hdrh format.
727             :type in_str: str
728             :returns: Dict with corresponding keys, except hdrh float values.
729             :rtype dict:
730             :throws IndexError: If in_str does not have enough substrings.
731             :throws ValueError: If a substring does not convert to float.
732             """
733             in_list = in_str.split('/', 3)
734
735             rval = {
736                 "min": float(in_list[0]),
737                 "avg": float(in_list[1]),
738                 "max": float(in_list[2]),
739                 "hdrh": ""
740             }
741
742             if len(in_list) == 4:
743                 rval["hdrh"] = str(in_list[3])
744
745             return rval
746
747         try:
748             latency["NDR"]["direction1"] = process_latency(groups.group(1))
749             latency["NDR"]["direction2"] = process_latency(groups.group(2))
750             latency["PDR"]["direction1"] = process_latency(groups.group(3))
751             latency["PDR"]["direction2"] = process_latency(groups.group(4))
752             if groups.lastindex == 4:
753                 return latency, "PASS"
754         except (IndexError, ValueError):
755             pass
756
757         try:
758             latency["PDR90"]["direction1"] = process_latency(groups.group(5))
759             latency["PDR90"]["direction2"] = process_latency(groups.group(6))
760             latency["PDR50"]["direction1"] = process_latency(groups.group(7))
761             latency["PDR50"]["direction2"] = process_latency(groups.group(8))
762             latency["PDR10"]["direction1"] = process_latency(groups.group(9))
763             latency["PDR10"]["direction2"] = process_latency(groups.group(10))
764             latency["LAT0"]["direction1"] = process_latency(groups.group(11))
765             latency["LAT0"]["direction2"] = process_latency(groups.group(12))
766             if groups.lastindex == 12:
767                 return latency, "PASS"
768         except (IndexError, ValueError):
769             pass
770
771         return latency, "FAIL"
772
773     @staticmethod
774     def _get_hoststack_data(msg, tags):
775         """Get data from the hoststack test message.
776
777         :param msg: The test message to be parsed.
778         :param tags: Test tags.
779         :type msg: str
780         :type tags: list
781         :returns: Parsed data as a JSON dict and the status (PASS/FAIL).
782         :rtype: tuple(dict, str)
783         """
784         result = dict()
785         status = "FAIL"
786
787         msg = msg.replace("'", '"').replace(" ", "")
788         if "LDPRELOAD" in tags:
789             try:
790                 result = loads(msg)
791                 status = "PASS"
792             except JSONDecodeError:
793                 pass
794         elif "VPPECHO" in tags:
795             try:
796                 msg_lst = msg.replace("}{", "} {").split(" ")
797                 result = dict(
798                     client=loads(msg_lst[0]),
799                     server=loads(msg_lst[1])
800                 )
801                 status = "PASS"
802             except (JSONDecodeError, IndexError):
803                 pass
804
805         return result, status
806
807     def _get_vsap_data(self, msg, tags):
808         """Get data from the vsap test message.
809
810         :param msg: The test message to be parsed.
811         :param tags: Test tags.
812         :type msg: str
813         :type tags: list
814         :returns: Parsed data as a JSON dict and the status (PASS/FAIL).
815         :rtype: tuple(dict, str)
816         """
817         result = dict()
818         status = "FAIL"
819
820         groups = re.search(self.REGEX_VSAP_MSG_INFO, msg)
821         if groups is not None:
822             try:
823                 result["transfer-rate"] = float(groups.group(1)) * 1e3
824                 result["latency"] = float(groups.group(2))
825                 result["completed-requests"] = int(groups.group(5))
826                 result["failed-requests"] = int(groups.group(6))
827                 result["bytes-transferred"] = int(groups.group(4))
828                 if "TCP_CPS" in tags:
829                     result["cps"] = float(groups.group(3))
830                 elif "TCP_RPS" in tags:
831                     result["rps"] = float(groups.group(3))
832                 else:
833                     return result, status
834                 status = "PASS"
835             except (IndexError, ValueError) as err:
836                 logging.warning(err)
837         return result, status
838
839     def visit_suite(self, suite):
840         """Implements traversing through the suite and its direct children.
841
842         :param suite: Suite to process.
843         :type suite: Suite
844         :returns: Nothing.
845         """
846         if self.start_suite(suite) is not False:
847             suite.setup.visit(self)
848             suite.suites.visit(self)
849             suite.tests.visit(self)
850             suite.teardown.visit(self)
851             self.end_suite(suite)
852
853     def start_suite(self, suite):
854         """Called when suite starts.
855
856         :param suite: Suite to process.
857         :type suite: Suite
858         :returns: Nothing.
859         """
860         try:
861             parent_name = suite.parent.name
862         except AttributeError:
863             return
864
865         self._data["suites"][suite.longname.lower().replace('"', "'").\
866             replace(" ", "_")] = {
867                 "name": suite.name.lower(),
868                 "doc": suite.doc,
869                 "parent": parent_name,
870                 "level": len(suite.longname.split("."))
871             }
872
873     def visit_test(self, test):
874         """Implements traversing through the test.
875
876         :param test: Test to process.
877         :type test: Test
878         :returns: Nothing.
879         """
880         if self.start_test(test) is not False:
881             test.setup.visit(self)
882             test.body.visit(self)
883             test.teardown.visit(self)
884             self.end_test(test)
885
886     def start_test(self, test):
887         """Called when test starts.
888
889         :param test: Test to process.
890         :type test: Test
891         :returns: Nothing.
892         """
893
894         self._sh_run_counter = 0
895         self._telemetry_kw_counter = 0
896         self._telemetry_msg_counter = 0
897
898         longname_orig = test.longname.lower()
899
900         # Check the ignore list
901         if longname_orig in self._ignore:
902             return
903
904         tags = [str(tag) for tag in test.tags]
905         test_result = dict()
906
907         # Change the TC long name and name if defined in the mapping table
908         longname = self._mapping.get(longname_orig, None)
909         if longname is not None:
910             name = longname.split('.')[-1]
911         else:
912             longname = longname_orig
913             name = test.name.lower()
914
915         # Remove TC number from the TC long name (backward compatibility):
916         self._test_id = re.sub(self.REGEX_TC_NUMBER, "", longname)
917         # Remove TC number from the TC name (not needed):
918         test_result["name"] = re.sub(self.REGEX_TC_NUMBER, "", name)
919
920         test_result["parent"] = test.parent.name.lower()
921         test_result["tags"] = tags
922         test_result["doc"] = test.doc
923         test_result["type"] = ""
924         test_result["status"] = test.status
925         test_result["starttime"] = test.starttime
926         test_result["endtime"] = test.endtime
927
928         if test.status == "PASS":
929             if "NDRPDR" in tags:
930                 if "TCP_PPS" in tags or "UDP_PPS" in tags:
931                     test_result["msg"] = self._get_data_from_pps_test_msg(
932                         test.message)
933                 elif "TCP_CPS" in tags or "UDP_CPS" in tags:
934                     test_result["msg"] = self._get_data_from_cps_test_msg(
935                         test.message)
936                 else:
937                     test_result["msg"] = self._get_data_from_perf_test_msg(
938                         test.message)
939             elif "MRR" in tags or "FRMOBL" in tags or "BMRR" in tags:
940                 test_result["msg"] = self._get_data_from_mrr_test_msg(
941                     test.message)
942             else:
943                 test_result["msg"] = test.message
944         else:
945             test_result["msg"] = test.message
946
947         if "PERFTEST" in tags and "TREX" not in tags:
948             # Replace info about cores (e.g. -1c-) with the info about threads
949             # and cores (e.g. -1t1c-) in the long test case names and in the
950             # test case names if necessary.
951             tag_count = 0
952             tag_tc = str()
953             for tag in test_result["tags"]:
954                 groups = re.search(self.REGEX_TC_TAG, tag)
955                 if groups:
956                     tag_count += 1
957                     tag_tc = tag
958
959             if tag_count == 1:
960                 self._test_id = re.sub(
961                     self.REGEX_TC_NAME_NEW, f"-{tag_tc.lower()}-",
962                     self._test_id, count=1
963                 )
964                 test_result["name"] = re.sub(
965                     self.REGEX_TC_NAME_NEW, f"-{tag_tc.lower()}-",
966                     test_result["name"], count=1
967                 )
968             else:
969                 test_result["status"] = "FAIL"
970                 self._data["tests"][self._test_id] = test_result
971                 logging.debug(
972                     f"The test {self._test_id} has no or more than one "
973                     f"multi-threading tags.\n"
974                     f"Tags: {test_result['tags']}"
975                 )
976                 return
977
978         if "DEVICETEST" in tags:
979             test_result["type"] = "DEVICETEST"
980         elif "NDRPDR" in tags:
981             if "TCP_CPS" in tags or "UDP_CPS" in tags:
982                 test_result["type"] = "CPS"
983             else:
984                 test_result["type"] = "NDRPDR"
985             if test.status == "PASS":
986                 test_result["throughput"], test_result["status"] = \
987                     self._get_ndrpdr_throughput(test.message)
988                 test_result["gbps"], test_result["status"] = \
989                     self._get_ndrpdr_throughput_gbps(test.message)
990                 test_result["latency"], test_result["status"] = \
991                     self._get_ndrpdr_latency(test.message)
992         elif "MRR" in tags or "FRMOBL" in tags or "BMRR" in tags:
993             if "MRR" in tags:
994                 test_result["type"] = "MRR"
995             else:
996                 test_result["type"] = "BMRR"
997             if test.status == "PASS":
998                 test_result["result"] = dict()
999                 groups = re.search(self.REGEX_BMRR, test.message)
1000                 if groups is not None:
1001                     items_str = groups.group(1)
1002                     items_float = [
1003                         float(item.strip().replace("'", ""))
1004                         for item in items_str.split(",")
1005                     ]
1006                     # Use whole list in CSIT-1180.
1007                     stats = jumpavg.AvgStdevStats.for_runs(items_float)
1008                     test_result["result"]["samples"] = items_float
1009                     test_result["result"]["receive-rate"] = stats.avg
1010                     test_result["result"]["receive-stdev"] = stats.stdev
1011                 else:
1012                     groups = re.search(self.REGEX_MRR, test.message)
1013                     test_result["result"]["receive-rate"] = \
1014                         float(groups.group(3)) / float(groups.group(1))
1015         elif "SOAK" in tags:
1016             test_result["type"] = "SOAK"
1017             if test.status == "PASS":
1018                 test_result["throughput"], test_result["status"] = \
1019                     self._get_plr_throughput(test.message)
1020         elif "LDP_NGINX" in tags:
1021             test_result["type"] = "LDP_NGINX"
1022             test_result["result"], test_result["status"] = \
1023                 self._get_vsap_data(test.message, tags)
1024         elif "HOSTSTACK" in tags:
1025             test_result["type"] = "HOSTSTACK"
1026             if test.status == "PASS":
1027                 test_result["result"], test_result["status"] = \
1028                     self._get_hoststack_data(test.message, tags)
1029         elif "RECONF" in tags:
1030             test_result["type"] = "RECONF"
1031             if test.status == "PASS":
1032                 test_result["result"] = None
1033                 try:
1034                     grps_loss = re.search(self.REGEX_RECONF_LOSS, test.message)
1035                     grps_time = re.search(self.REGEX_RECONF_TIME, test.message)
1036                     test_result["result"] = {
1037                         "loss": int(grps_loss.group(1)),
1038                         "time": float(grps_time.group(1))
1039                     }
1040                 except (AttributeError, IndexError, ValueError, TypeError):
1041                     test_result["status"] = "FAIL"
1042         else:
1043             test_result["status"] = "FAIL"
1044
1045         self._data["tests"][self._test_id] = test_result
1046
1047     def visit_keyword(self, kw):
1048         """Implements traversing through the keyword and its child keywords.
1049
1050         :param keyword: Keyword to process.
1051         :type keyword: Keyword
1052         :returns: Nothing.
1053         """
1054         if self.start_keyword(kw) is not False:
1055             if hasattr(kw, "body"):
1056                 kw.body.visit(self)
1057             kw.teardown.visit(self)
1058             self.end_keyword(kw)
1059
1060     def start_keyword(self, keyword):
1061         """Called when keyword starts. Default implementation does nothing.
1062
1063         :param keyword: Keyword to process.
1064         :type keyword: Keyword
1065         :returns: Nothing.
1066         """
1067         self._kw_name = keyword.name
1068
1069     def end_keyword(self, keyword):
1070         """Called when keyword ends. Default implementation does nothing.
1071
1072         :param keyword: Keyword to process.
1073         :type keyword: Keyword
1074         :returns: Nothing.
1075         """
1076         _ = keyword
1077         self._kw_name = None
1078
1079     def visit_message(self, msg):
1080         """Implements visiting the message.
1081
1082         :param msg: Message to process.
1083         :type msg: Message
1084         :returns: Nothing.
1085         """
1086         if self.start_message(msg) is not False:
1087             self.end_message(msg)
1088
1089     def start_message(self, msg):
1090         """Called when message starts. Get required information from messages:
1091         - VPP version.
1092
1093         :param msg: Message to process.
1094         :type msg: Message
1095         :returns: Nothing.
1096         """
1097         if self._kw_name is None:
1098             return
1099         elif self._kw_name.count("Run Telemetry On All Duts"):
1100             if self._process_oper:
1101                 self._telemetry_kw_counter += 1
1102                 self._get_telemetry(msg)
1103         elif self._kw_name.count("Show Runtime On All Duts"):
1104             if self._process_oper:
1105                 self._sh_run_counter += 1
1106                 self._get_show_run(msg)
1107         elif self._kw_name.count("Show Vpp Version On All Duts"):
1108             if not self._version:
1109                 self._get_vpp_version(msg)
1110         elif self._kw_name.count("Install Dpdk Framework On All Duts"):
1111             if not self._version:
1112                 self._get_dpdk_version(msg)
1113         elif self._kw_name.count("Setup Framework"):
1114             if not self._testbed:
1115                 self._get_testbed(msg)
1116         elif self._kw_name.count("Show Papi History On All Duts"):
1117             self._conf_history_lookup_nr = 0
1118             self._get_papi_history(msg)
1119
1120
1121 class InputData:
1122     """Input data
1123
1124     The data is extracted from output.xml files generated by Jenkins jobs and
1125     stored in pandas' DataFrames.
1126
1127     The data structure:
1128     - job name
1129       - build number
1130         - metadata
1131           (as described in ExecutionChecker documentation)
1132         - suites
1133           (as described in ExecutionChecker documentation)
1134         - tests
1135           (as described in ExecutionChecker documentation)
1136     """
1137
1138     def __init__(self, spec, for_output):
1139         """Initialization.
1140
1141         :param spec: Specification.
1142         :param for_output: Output to be generated from downloaded data.
1143         :type spec: Specification
1144         :type for_output: str
1145         """
1146
1147         # Specification:
1148         self._cfg = spec
1149
1150         self._for_output = for_output
1151
1152         # Data store:
1153         self._input_data = pd.Series(dtype="float64")
1154
1155     @property
1156     def data(self):
1157         """Getter - Input data.
1158
1159         :returns: Input data
1160         :rtype: pandas.Series
1161         """
1162         return self._input_data
1163
1164     def metadata(self, job, build):
1165         """Getter - metadata
1166
1167         :param job: Job which metadata we want.
1168         :param build: Build which metadata we want.
1169         :type job: str
1170         :type build: str
1171         :returns: Metadata
1172         :rtype: pandas.Series
1173         """
1174         return self.data[job][build]["metadata"]
1175
1176     def suites(self, job, build):
1177         """Getter - suites
1178
1179         :param job: Job which suites we want.
1180         :param build: Build which suites we want.
1181         :type job: str
1182         :type build: str
1183         :returns: Suites.
1184         :rtype: pandas.Series
1185         """
1186         return self.data[job][str(build)]["suites"]
1187
1188     def tests(self, job, build):
1189         """Getter - tests
1190
1191         :param job: Job which tests we want.
1192         :param build: Build which tests we want.
1193         :type job: str
1194         :type build: str
1195         :returns: Tests.
1196         :rtype: pandas.Series
1197         """
1198         return self.data[job][build]["tests"]
1199
1200     def _parse_tests(self, job, build):
1201         """Process data from robot output.xml file and return JSON structured
1202         data.
1203
1204         :param job: The name of job which build output data will be processed.
1205         :param build: The build which output data will be processed.
1206         :type job: str
1207         :type build: dict
1208         :returns: JSON data structure.
1209         :rtype: dict
1210         """
1211
1212         metadata = {
1213             "job": job,
1214             "build": build
1215         }
1216
1217         with open(build["file-name"], 'r') as data_file:
1218             try:
1219                 result = ExecutionResult(data_file)
1220             except errors.DataError as err:
1221                 logging.error(
1222                     f"Error occurred while parsing output.xml: {repr(err)}"
1223                 )
1224                 return None
1225
1226         process_oper = False
1227         if "-vpp-perf-report-coverage-" in job:
1228             process_oper = True
1229         # elif "-vpp-perf-report-iterative-" in job:
1230         #     # Exceptions for TBs where we do not have coverage data:
1231         #     for item in ("-2n-icx", ):
1232         #         if item in job:
1233         #             process_oper = True
1234         #             break
1235         checker = ExecutionChecker(
1236             metadata, self._cfg.mapping, self._cfg.ignore, process_oper
1237         )
1238         result.visit(checker)
1239
1240         checker.data["metadata"]["tests_total"] = \
1241             result.statistics.total.total
1242         checker.data["metadata"]["tests_passed"] = \
1243             result.statistics.total.passed
1244         checker.data["metadata"]["tests_failed"] = \
1245             result.statistics.total.failed
1246         checker.data["metadata"]["elapsedtime"] = result.suite.elapsedtime
1247         checker.data["metadata"]["generated"] = result.suite.endtime[:14]
1248
1249         return checker.data
1250
1251     def _download_and_parse_build(self, job, build, repeat, pid=10000):
1252         """Download and parse the input data file.
1253
1254         :param pid: PID of the process executing this method.
1255         :param job: Name of the Jenkins job which generated the processed input
1256             file.
1257         :param build: Information about the Jenkins build which generated the
1258             processed input file.
1259         :param repeat: Repeat the download specified number of times if not
1260             successful.
1261         :type pid: int
1262         :type job: str
1263         :type build: dict
1264         :type repeat: int
1265         """
1266
1267         logging.info(f"Processing the job/build: {job}: {build['build']}")
1268
1269         state = "failed"
1270         success = False
1271         data = None
1272         do_repeat = repeat
1273         while do_repeat:
1274             success = download_and_unzip_data_file(self._cfg, job, build, pid)
1275             if success:
1276                 break
1277             do_repeat -= 1
1278         if not success:
1279             logging.error(
1280                 f"It is not possible to download the input data file from the "
1281                 f"job {job}, build {build['build']}, or it is damaged. "
1282                 f"Skipped."
1283             )
1284         if success:
1285             logging.info(f"  Processing data from build {build['build']}")
1286             data = self._parse_tests(job, build)
1287             if data is None:
1288                 logging.error(
1289                     f"Input data file from the job {job}, build "
1290                     f"{build['build']} is damaged. Skipped."
1291                 )
1292             else:
1293                 state = "processed"
1294
1295             try:
1296                 remove(build["file-name"])
1297             except OSError as err:
1298                 logging.error(
1299                     f"Cannot remove the file {build['file-name']}: {repr(err)}"
1300                 )
1301
1302         # If the time-period is defined in the specification file, remove all
1303         # files which are outside the time period.
1304         is_last = False
1305         timeperiod = self._cfg.environment.get("time-period", None)
1306         if timeperiod and data:
1307             now = dt.utcnow()
1308             timeperiod = timedelta(int(timeperiod))
1309             metadata = data.get("metadata", None)
1310             if metadata:
1311                 generated = metadata.get("generated", None)
1312                 if generated:
1313                     generated = dt.strptime(generated, "%Y%m%d %H:%M")
1314                     if (now - generated) > timeperiod:
1315                         # Remove the data and the file:
1316                         state = "removed"
1317                         data = None
1318                         is_last = True
1319                         logging.info(
1320                             f"  The build {job}/{build['build']} is "
1321                             f"outdated, will be removed."
1322                         )
1323         return {
1324             "data": data,
1325             "state": state,
1326             "job": job,
1327             "build": build,
1328             "last": is_last
1329         }
1330
1331     def download_and_parse_data(self, repeat=1):
1332         """Download the input data files, parse input data from input files and
1333         store in pandas' Series.
1334
1335         :param repeat: Repeat the download specified number of times if not
1336             successful.
1337         :type repeat: int
1338         """
1339
1340         logging.info("Downloading and parsing input files ...")
1341
1342         for job, builds in self._cfg.input.items():
1343             for build in builds:
1344
1345                 result = self._download_and_parse_build(job, build, repeat)
1346                 if result["last"]:
1347                     break
1348                 build_nr = result["build"]["build"]
1349
1350                 if result["data"]:
1351                     data = result["data"]
1352                     build_data = pd.Series({
1353                         "metadata": pd.Series(
1354                             list(data["metadata"].values()),
1355                             index=list(data["metadata"].keys())
1356                         ),
1357                         "suites": pd.Series(
1358                             list(data["suites"].values()),
1359                             index=list(data["suites"].keys())
1360                         ),
1361                         "tests": pd.Series(
1362                             list(data["tests"].values()),
1363                             index=list(data["tests"].keys())
1364                         )
1365                     })
1366
1367                     if self._input_data.get(job, None) is None:
1368                         self._input_data[job] = pd.Series(dtype="float64")
1369                     self._input_data[job][str(build_nr)] = build_data
1370                     self._cfg.set_input_file_name(
1371                         job, build_nr, result["build"]["file-name"]
1372                     )
1373                 self._cfg.set_input_state(job, build_nr, result["state"])
1374
1375                 mem_alloc = \
1376                     resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1000
1377                 logging.info(f"Memory allocation: {mem_alloc:.0f}MB")
1378
1379         logging.info("Done.")
1380
1381         msg = f"Successful downloads from the sources:\n"
1382         for source in self._cfg.environment["data-sources"]:
1383             if source["successful-downloads"]:
1384                 msg += (
1385                     f"{source['url']}/{source['path']}/"
1386                     f"{source['file-name']}: "
1387                     f"{source['successful-downloads']}\n"
1388                 )
1389         logging.info(msg)
1390
1391     def process_local_file(self, local_file, job="local", build_nr=1,
1392                            replace=True):
1393         """Process local XML file given as a command-line parameter.
1394
1395         :param local_file: The file to process.
1396         :param job: Job name.
1397         :param build_nr: Build number.
1398         :param replace: If True, the information about jobs and builds is
1399             replaced by the new one, otherwise the new jobs and builds are
1400             added.
1401         :type local_file: str
1402         :type job: str
1403         :type build_nr: int
1404         :type replace: bool
1405         :raises: PresentationError if an error occurs.
1406         """
1407         if not isfile(local_file):
1408             raise PresentationError(f"The file {local_file} does not exist.")
1409
1410         try:
1411             build_nr = int(local_file.split("/")[-1].split(".")[0])
1412         except (IndexError, ValueError):
1413             pass
1414
1415         build = {
1416             "build": build_nr,
1417             "status": "failed",
1418             "file-name": local_file
1419         }
1420         if replace:
1421             self._cfg.input = dict()
1422         self._cfg.add_build(job, build)
1423
1424         logging.info(f"Processing {job}: {build_nr:2d}: {local_file}")
1425         data = self._parse_tests(job, build)
1426         if data is None:
1427             raise PresentationError(
1428                 f"Error occurred while parsing the file {local_file}"
1429             )
1430
1431         build_data = pd.Series({
1432             "metadata": pd.Series(
1433                 list(data["metadata"].values()),
1434                 index=list(data["metadata"].keys())
1435             ),
1436             "suites": pd.Series(
1437                 list(data["suites"].values()),
1438                 index=list(data["suites"].keys())
1439             ),
1440             "tests": pd.Series(
1441                 list(data["tests"].values()),
1442                 index=list(data["tests"].keys())
1443             )
1444         })
1445
1446         if self._input_data.get(job, None) is None:
1447             self._input_data[job] = pd.Series(dtype="float64")
1448         self._input_data[job][str(build_nr)] = build_data
1449
1450         self._cfg.set_input_state(job, build_nr, "processed")
1451
1452     def process_local_directory(self, local_dir, replace=True):
1453         """Process local directory with XML file(s). The directory is processed
1454         as a 'job' and the XML files in it as builds.
1455         If the given directory contains only sub-directories, these
1456         sub-directories processed as jobs and corresponding XML files as builds
1457         of their job.
1458
1459         :param local_dir: Local directory to process.
1460         :param replace: If True, the information about jobs and builds is
1461             replaced by the new one, otherwise the new jobs and builds are
1462             added.
1463         :type local_dir: str
1464         :type replace: bool
1465         """
1466         if not isdir(local_dir):
1467             raise PresentationError(
1468                 f"The directory {local_dir} does not exist."
1469             )
1470
1471         # Check if the given directory includes only files, or only directories
1472         _, dirnames, filenames = next(walk(local_dir))
1473
1474         if filenames and not dirnames:
1475             filenames.sort()
1476             # local_builds:
1477             # key: dir (job) name, value: list of file names (builds)
1478             local_builds = {
1479                 local_dir: [join(local_dir, name) for name in filenames]
1480             }
1481
1482         elif dirnames and not filenames:
1483             dirnames.sort()
1484             # local_builds:
1485             # key: dir (job) name, value: list of file names (builds)
1486             local_builds = dict()
1487             for dirname in dirnames:
1488                 builds = [
1489                     join(local_dir, dirname, name)
1490                     for name in listdir(join(local_dir, dirname))
1491                     if isfile(join(local_dir, dirname, name))
1492                 ]
1493                 if builds:
1494                     local_builds[dirname] = sorted(builds)
1495
1496         elif not filenames and not dirnames:
1497             raise PresentationError(f"The directory {local_dir} is empty.")
1498         else:
1499             raise PresentationError(
1500                 f"The directory {local_dir} can include only files or only "
1501                 f"directories, not both.\nThe directory {local_dir} includes "
1502                 f"file(s):\n{filenames}\nand directories:\n{dirnames}"
1503             )
1504
1505         if replace:
1506             self._cfg.input = dict()
1507
1508         for job, files in local_builds.items():
1509             for idx, local_file in enumerate(files):
1510                 self.process_local_file(local_file, job, idx + 1, replace=False)
1511
1512     @staticmethod
1513     def _end_of_tag(tag_filter, start=0, closer="'"):
1514         """Return the index of character in the string which is the end of tag.
1515
1516         :param tag_filter: The string where the end of tag is being searched.
1517         :param start: The index where the searching is stated.
1518         :param closer: The character which is the tag closer.
1519         :type tag_filter: str
1520         :type start: int
1521         :type closer: str
1522         :returns: The index of the tag closer.
1523         :rtype: int
1524         """
1525         try:
1526             idx_opener = tag_filter.index(closer, start)
1527             return tag_filter.index(closer, idx_opener + 1)
1528         except ValueError:
1529             return None
1530
1531     @staticmethod
1532     def _condition(tag_filter):
1533         """Create a conditional statement from the given tag filter.
1534
1535         :param tag_filter: Filter based on tags from the element specification.
1536         :type tag_filter: str
1537         :returns: Conditional statement which can be evaluated.
1538         :rtype: str
1539         """
1540         index = 0
1541         while True:
1542             index = InputData._end_of_tag(tag_filter, index)
1543             if index is None:
1544                 return tag_filter
1545             index += 1
1546             tag_filter = tag_filter[:index] + " in tags" + tag_filter[index:]
1547
1548     def filter_data(self, element, params=None, data=None, data_set="tests",
1549                     continue_on_error=False):
1550         """Filter required data from the given jobs and builds.
1551
1552         The output data structure is:
1553         - job 1
1554           - build 1
1555             - test (or suite) 1 ID:
1556               - param 1
1557               - param 2
1558               ...
1559               - param n
1560             ...
1561             - test (or suite) n ID:
1562             ...
1563           ...
1564           - build n
1565         ...
1566         - job n
1567
1568         :param element: Element which will use the filtered data.
1569         :param params: Parameters which will be included in the output. If None,
1570             all parameters are included.
1571         :param data: If not None, this data is used instead of data specified
1572             in the element.
1573         :param data_set: The set of data to be filtered: tests, suites,
1574             metadata.
1575         :param continue_on_error: Continue if there is error while reading the
1576             data. The Item will be empty then
1577         :type element: pandas.Series
1578         :type params: list
1579         :type data: dict
1580         :type data_set: str
1581         :type continue_on_error: bool
1582         :returns: Filtered data.
1583         :rtype pandas.Series
1584         """
1585
1586         try:
1587             if data_set == "suites":
1588                 cond = "True"
1589             elif element["filter"] in ("all", "template"):
1590                 cond = "True"
1591             else:
1592                 cond = InputData._condition(element["filter"])
1593             logging.debug(f"   Filter: {cond}")
1594         except KeyError:
1595             logging.error("  No filter defined.")
1596             return None
1597
1598         if params is None:
1599             params = element.get("parameters", None)
1600             if params:
1601                 params.extend(("type", "status"))
1602
1603         data_to_filter = data if data else element["data"]
1604         data = pd.Series(dtype="float64")
1605         try:
1606             for job, builds in data_to_filter.items():
1607                 data[job] = pd.Series(dtype="float64")
1608                 for build in builds:
1609                     data[job][str(build)] = pd.Series(dtype="float64")
1610                     try:
1611                         data_dict = dict(
1612                             self.data[job][str(build)][data_set].items())
1613                     except KeyError:
1614                         if continue_on_error:
1615                             continue
1616                         return None
1617
1618                     for test_id, test_data in data_dict.items():
1619                         if eval(cond, {"tags": test_data.get("tags", "")}):
1620                             data[job][str(build)][test_id] = \
1621                                 pd.Series(dtype="float64")
1622                             if params is None:
1623                                 for param, val in test_data.items():
1624                                     data[job][str(build)][test_id][param] = val
1625                             else:
1626                                 for param in params:
1627                                     try:
1628                                         data[job][str(build)][test_id][param] =\
1629                                             test_data[param]
1630                                     except KeyError:
1631                                         data[job][str(build)][test_id][param] =\
1632                                             "No Data"
1633             return data
1634
1635         except (KeyError, IndexError, ValueError) as err:
1636             logging.error(
1637                 f"Missing mandatory parameter in the element specification: "
1638                 f"{repr(err)}"
1639             )
1640             return None
1641         except AttributeError as err:
1642             logging.error(repr(err))
1643             return None
1644         except SyntaxError as err:
1645             logging.error(
1646                 f"The filter {cond} is not correct. Check if all tags are "
1647                 f"enclosed by apostrophes.\n{repr(err)}"
1648             )
1649             return None
1650
1651     def filter_tests_by_name(self, element, params=None, data_set="tests",
1652                              continue_on_error=False):
1653         """Filter required data from the given jobs and builds.
1654
1655         The output data structure is:
1656         - job 1
1657           - build 1
1658             - test (or suite) 1 ID:
1659               - param 1
1660               - param 2
1661               ...
1662               - param n
1663             ...
1664             - test (or suite) n ID:
1665             ...
1666           ...
1667           - build n
1668         ...
1669         - job n
1670
1671         :param element: Element which will use the filtered data.
1672         :param params: Parameters which will be included in the output. If None,
1673         all parameters are included.
1674         :param data_set: The set of data to be filtered: tests, suites,
1675         metadata.
1676         :param continue_on_error: Continue if there is error while reading the
1677         data. The Item will be empty then
1678         :type element: pandas.Series
1679         :type params: list
1680         :type data_set: str
1681         :type continue_on_error: bool
1682         :returns: Filtered data.
1683         :rtype pandas.Series
1684         """
1685
1686         include = element.get("include", None)
1687         if not include:
1688             logging.warning("No tests to include, skipping the element.")
1689             return None
1690
1691         if params is None:
1692             params = element.get("parameters", None)
1693             if params and "type" not in params:
1694                 params.append("type")
1695
1696         cores = element.get("core", None)
1697         if cores:
1698             tests = list()
1699             for core in cores:
1700                 for test in include:
1701                     tests.append(test.format(core=core))
1702         else:
1703             tests = include
1704
1705         data = pd.Series(dtype="float64")
1706         try:
1707             for job, builds in element["data"].items():
1708                 data[job] = pd.Series(dtype="float64")
1709                 for build in builds:
1710                     data[job][str(build)] = pd.Series(dtype="float64")
1711                     for test in tests:
1712                         try:
1713                             reg_ex = re.compile(str(test).lower())
1714                             for test_id in self.data[job][
1715                                     str(build)][data_set].keys():
1716                                 if re.match(reg_ex, str(test_id).lower()):
1717                                     test_data = self.data[job][
1718                                         str(build)][data_set][test_id]
1719                                     data[job][str(build)][test_id] = \
1720                                         pd.Series(dtype="float64")
1721                                     if params is None:
1722                                         for param, val in test_data.items():
1723                                             data[job][str(build)][test_id]\
1724                                                 [param] = val
1725                                     else:
1726                                         for param in params:
1727                                             try:
1728                                                 data[job][str(build)][
1729                                                     test_id][param] = \
1730                                                     test_data[param]
1731                                             except KeyError:
1732                                                 data[job][str(build)][
1733                                                     test_id][param] = "No Data"
1734                         except KeyError as err:
1735                             if continue_on_error:
1736                                 logging.debug(repr(err))
1737                                 continue
1738                             logging.error(repr(err))
1739                             return None
1740             return data
1741
1742         except (KeyError, IndexError, ValueError) as err:
1743             logging.error(
1744                 f"Missing mandatory parameter in the element "
1745                 f"specification: {repr(err)}"
1746             )
1747             return None
1748         except AttributeError as err:
1749             logging.error(repr(err))
1750             return None
1751
1752     @staticmethod
1753     def merge_data(data):
1754         """Merge data from more jobs and builds to a simple data structure.
1755
1756         The output data structure is:
1757
1758         - test (suite) 1 ID:
1759           - param 1
1760           - param 2
1761           ...
1762           - param n
1763         ...
1764         - test (suite) n ID:
1765         ...
1766
1767         :param data: Data to merge.
1768         :type data: pandas.Series
1769         :returns: Merged data.
1770         :rtype: pandas.Series
1771         """
1772
1773         logging.info("    Merging data ...")
1774
1775         merged_data = pd.Series(dtype="float64")
1776         for builds in data.values:
1777             for item in builds.values:
1778                 for item_id, item_data in item.items():
1779                     merged_data[item_id] = item_data
1780         return merged_data
1781
1782     def print_all_oper_data(self):
1783         """Print all operational data to console.
1784         """
1785
1786         for job in self._input_data.values:
1787             for build in job.values:
1788                 for test_id, test_data in build["tests"].items():
1789                     print(f"{test_id}")
1790                     if test_data.get("show-run", None) is None:
1791                         continue
1792                     for dut_name, data in test_data["show-run"].items():
1793                         if data.get("runtime", None) is None:
1794                             continue
1795                         runtime = loads(data["runtime"])
1796                         try:
1797                             threads_nr = len(runtime[0]["clocks"])
1798                         except (IndexError, KeyError):
1799                             continue
1800                         threads = OrderedDict(
1801                             {idx: list() for idx in range(threads_nr)})
1802                         for item in runtime:
1803                             for idx in range(threads_nr):
1804                                 if item["vectors"][idx] > 0:
1805                                     clocks = item["clocks"][idx] / \
1806                                              item["vectors"][idx]
1807                                 elif item["calls"][idx] > 0:
1808                                     clocks = item["clocks"][idx] / \
1809                                              item["calls"][idx]
1810                                 elif item["suspends"][idx] > 0:
1811                                     clocks = item["clocks"][idx] / \
1812                                              item["suspends"][idx]
1813                                 else:
1814                                     clocks = 0.0
1815
1816                                 if item["calls"][idx] > 0:
1817                                     vectors_call = item["vectors"][idx] / \
1818                                                    item["calls"][idx]
1819                                 else:
1820                                     vectors_call = 0.0
1821
1822                                 if int(item["calls"][idx]) + int(
1823                                         item["vectors"][idx]) + \
1824                                         int(item["suspends"][idx]):
1825                                     threads[idx].append([
1826                                         item["name"],
1827                                         item["calls"][idx],
1828                                         item["vectors"][idx],
1829                                         item["suspends"][idx],
1830                                         clocks,
1831                                         vectors_call
1832                                     ])
1833
1834                         print(f"Host IP: {data.get('host', '')}, "
1835                               f"Socket: {data.get('socket', '')}")
1836                         for thread_nr, thread in threads.items():
1837                             txt_table = prettytable.PrettyTable(
1838                                 (
1839                                     "Name",
1840                                     "Nr of Vectors",
1841                                     "Nr of Packets",
1842                                     "Suspends",
1843                                     "Cycles per Packet",
1844                                     "Average Vector Size"
1845                                 )
1846                             )
1847                             avg = 0.0
1848                             for row in thread:
1849                                 txt_table.add_row(row)
1850                                 avg += row[-1]
1851                             if len(thread) == 0:
1852                                 avg = ""
1853                             else:
1854                                 avg = f", Average Vector Size per Node: " \
1855                                       f"{(avg / len(thread)):.2f}"
1856                             th_name = "main" if thread_nr == 0 \
1857                                 else f"worker_{thread_nr}"
1858                             print(f"{dut_name}, {th_name}{avg}")
1859                             txt_table.float_format = ".2"
1860                             txt_table.align = "r"
1861                             txt_table.align["Name"] = "l"
1862                             print(f"{txt_table.get_string()}\n")