Report: Add mrr stdev to comp tables
[csit.git] / resources / tools / presentation / input_data_parser.py
1 # Copyright (c) 2019 Cisco and/or its affiliates.
2 # Licensed under the Apache License, Version 2.0 (the "License");
3 # you may not use this file except in compliance with the License.
4 # You may obtain a copy of the License at:
5 #
6 #     http://www.apache.org/licenses/LICENSE-2.0
7 #
8 # Unless required by applicable law or agreed to in writing, software
9 # distributed under the License is distributed on an "AS IS" BASIS,
10 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 # See the License for the specific language governing permissions and
12 # limitations under the License.
13
14 """Data pre-processing
15
16 - extract data from output.xml files generated by Jenkins jobs and store in
17   pandas' Series,
18 - provide access to the data.
19 - filter the data using tags,
20 """
21
22 import re
23 import copy
24 import resource
25 import logging
26
27 from collections import OrderedDict
28 from os import remove, walk, listdir
29 from os.path import isfile, isdir, join
30 from datetime import datetime as dt
31 from datetime import timedelta
32 from json import loads
33 from json.decoder import JSONDecodeError
34
35 import hdrh.histogram
36 import hdrh.codec
37 import prettytable
38 import pandas as pd
39
40 from robot.api import ExecutionResult, ResultVisitor
41 from robot import errors
42
43 from resources.libraries.python import jumpavg
44 from input_data_files import download_and_unzip_data_file
45 from pal_errors import PresentationError
46
47
48 # Separator used in file names
49 SEPARATOR = u"__"
50
51
52 class ExecutionChecker(ResultVisitor):
53     """Class to traverse through the test suite structure.
54
55     The functionality implemented in this class generates a json structure:
56
57     Performance tests:
58
59     {
60         "metadata": {
61             "generated": "Timestamp",
62             "version": "SUT version",
63             "job": "Jenkins job name",
64             "build": "Information about the build"
65         },
66         "suites": {
67             "Suite long name 1": {
68                 "name": Suite name,
69                 "doc": "Suite 1 documentation",
70                 "parent": "Suite 1 parent",
71                 "level": "Level of the suite in the suite hierarchy"
72             }
73             "Suite long name N": {
74                 "name": Suite name,
75                 "doc": "Suite N documentation",
76                 "parent": "Suite 2 parent",
77                 "level": "Level of the suite in the suite hierarchy"
78             }
79         }
80         "tests": {
81             # NDRPDR tests:
82             "ID": {
83                 "name": "Test name",
84                 "parent": "Name of the parent of the test",
85                 "doc": "Test documentation",
86                 "msg": "Test message",
87                 "conf-history": "DUT1 and DUT2 VAT History",
88                 "show-run": "Show Run",
89                 "tags": ["tag 1", "tag 2", "tag n"],
90                 "type": "NDRPDR",
91                 "status": "PASS" | "FAIL",
92                 "throughput": {
93                     "NDR": {
94                         "LOWER": float,
95                         "UPPER": float
96                     },
97                     "PDR": {
98                         "LOWER": float,
99                         "UPPER": float
100                     }
101                 },
102                 "latency": {
103                     "NDR": {
104                         "direction1": {
105                             "min": float,
106                             "avg": float,
107                             "max": float,
108                             "hdrh": str
109                         },
110                         "direction2": {
111                             "min": float,
112                             "avg": float,
113                             "max": float,
114                             "hdrh": str
115                         }
116                     },
117                     "PDR": {
118                         "direction1": {
119                             "min": float,
120                             "avg": float,
121                             "max": float,
122                             "hdrh": str
123                         },
124                         "direction2": {
125                             "min": float,
126                             "avg": float,
127                             "max": float,
128                             "hdrh": str
129                         }
130                     }
131                 }
132             }
133
134             # TCP tests:
135             "ID": {
136                 "name": "Test name",
137                 "parent": "Name of the parent of the test",
138                 "doc": "Test documentation",
139                 "msg": "Test message",
140                 "tags": ["tag 1", "tag 2", "tag n"],
141                 "type": "TCP",
142                 "status": "PASS" | "FAIL",
143                 "result": int
144             }
145
146             # MRR, BMRR tests:
147             "ID": {
148                 "name": "Test name",
149                 "parent": "Name of the parent of the test",
150                 "doc": "Test documentation",
151                 "msg": "Test message",
152                 "tags": ["tag 1", "tag 2", "tag n"],
153                 "type": "MRR" | "BMRR",
154                 "status": "PASS" | "FAIL",
155                 "result": {
156                     "receive-rate": float,
157                     # Average of a list, computed using AvgStdevStats.
158                     # In CSIT-1180, replace with List[float].
159                 }
160             }
161
162             "ID" {
163                 # next test
164             }
165         }
166     }
167
168
169     Functional tests:
170
171     {
172         "metadata": {  # Optional
173             "version": "VPP version",
174             "job": "Jenkins job name",
175             "build": "Information about the build"
176         },
177         "suites": {
178             "Suite name 1": {
179                 "doc": "Suite 1 documentation",
180                 "parent": "Suite 1 parent",
181                 "level": "Level of the suite in the suite hierarchy"
182             }
183             "Suite name N": {
184                 "doc": "Suite N documentation",
185                 "parent": "Suite 2 parent",
186                 "level": "Level of the suite in the suite hierarchy"
187             }
188         }
189         "tests": {
190             "ID": {
191                 "name": "Test name",
192                 "parent": "Name of the parent of the test",
193                 "doc": "Test documentation"
194                 "msg": "Test message"
195                 "tags": ["tag 1", "tag 2", "tag n"],
196                 "conf-history": "DUT1 and DUT2 VAT History"
197                 "show-run": "Show Run"
198                 "status": "PASS" | "FAIL"
199             },
200             "ID" {
201                 # next test
202             }
203         }
204     }
205
206     .. note:: ID is the lowercase full path to the test.
207     """
208
209     REGEX_PLR_RATE = re.compile(
210         r'PLRsearch lower bound::?\s(\d+.\d+).*\n'
211         r'PLRsearch upper bound::?\s(\d+.\d+)'
212     )
213     REGEX_NDRPDR_RATE = re.compile(
214         r'NDR_LOWER:\s(\d+.\d+).*\n.*\n'
215         r'NDR_UPPER:\s(\d+.\d+).*\n'
216         r'PDR_LOWER:\s(\d+.\d+).*\n.*\n'
217         r'PDR_UPPER:\s(\d+.\d+)'
218     )
219     REGEX_PERF_MSG_INFO = re.compile(
220         r'NDR_LOWER:\s(\d+.\d+)\s.*\s(\d+.\d+)\s.*\n.*\n.*\n'
221         r'PDR_LOWER:\s(\d+.\d+)\s.*\s(\d+.\d+)\s.*\n.*\n.*\n'
222         r'Latency at 90% PDR:.*\[\'(.*)\', \'(.*)\'\].*\n'
223         r'Latency at 50% PDR:.*\[\'(.*)\', \'(.*)\'\].*\n'
224         r'Latency at 10% PDR:.*\[\'(.*)\', \'(.*)\'\].*\n'
225     )
226     REGEX_MRR_MSG_INFO = re.compile(r'.*\[(.*)\]')
227
228     # TODO: Remove when not needed
229     REGEX_NDRPDR_LAT_BASE = re.compile(
230         r'LATENCY.*\[\'(.*)\', \'(.*)\'\]\s\n.*\n.*\n'
231         r'LATENCY.*\[\'(.*)\', \'(.*)\'\]'
232     )
233     REGEX_NDRPDR_LAT = re.compile(
234         r'LATENCY.*\[\'(.*)\', \'(.*)\'\]\s\n.*\n.*\n'
235         r'LATENCY.*\[\'(.*)\', \'(.*)\'\]\s\n.*\n'
236         r'Latency.*\[\'(.*)\', \'(.*)\'\]\s\n'
237         r'Latency.*\[\'(.*)\', \'(.*)\'\]\s\n'
238         r'Latency.*\[\'(.*)\', \'(.*)\'\]\s\n'
239         r'Latency.*\[\'(.*)\', \'(.*)\'\]'
240     )
241     # TODO: Remove when not needed
242     REGEX_NDRPDR_LAT_LONG = re.compile(
243         r'LATENCY.*\[\'(.*)\', \'(.*)\'\]\s\n.*\n.*\n'
244         r'LATENCY.*\[\'(.*)\', \'(.*)\'\]\s\n.*\n'
245         r'Latency.*\[\'(.*)\', \'(.*)\'\]\s\n'
246         r'Latency.*\[\'(.*)\', \'(.*)\'\]\s\n'
247         r'Latency.*\[\'(.*)\', \'(.*)\'\]\s\n'
248         r'Latency.*\[\'(.*)\', \'(.*)\'\]\s\n'
249         r'Latency.*\[\'(.*)\', \'(.*)\'\]\s\n'
250         r'Latency.*\[\'(.*)\', \'(.*)\'\]\s\n'
251         r'Latency.*\[\'(.*)\', \'(.*)\'\]'
252     )
253     REGEX_VERSION_VPP = re.compile(
254         r"(return STDOUT Version:\s*|"
255         r"VPP Version:\s*|VPP version:\s*)(.*)"
256     )
257     REGEX_VERSION_DPDK = re.compile(
258         r"(DPDK version:\s*|DPDK Version:\s*)(.*)"
259     )
260     REGEX_TCP = re.compile(
261         r'Total\s(rps|cps|throughput):\s(\d*).*$'
262     )
263     REGEX_MRR = re.compile(
264         r'MaxReceivedRate_Results\s\[pkts/(\d*)sec\]:\s'
265         r'tx\s(\d*),\srx\s(\d*)'
266     )
267     REGEX_BMRR = re.compile(
268         r'Maximum Receive Rate trial results'
269         r' in packets per second: \[(.*)\]'
270     )
271     REGEX_RECONF_LOSS = re.compile(
272         r'Packets lost due to reconfig: (\d*)'
273     )
274     REGEX_RECONF_TIME = re.compile(
275         r'Implied time lost: (\d*.[\de-]*)'
276     )
277     REGEX_TC_TAG = re.compile(r'\d+[tT]\d+[cC]')
278
279     REGEX_TC_NAME_OLD = re.compile(r'-\d+[tT]\d+[cC]-')
280
281     REGEX_TC_NAME_NEW = re.compile(r'-\d+[cC]-')
282
283     REGEX_TC_NUMBER = re.compile(r'tc\d{2}-')
284
285     REGEX_TC_PAPI_CLI = re.compile(r'.*\((\d+.\d+.\d+.\d+.) - (.*)\)')
286
287     def __init__(self, metadata, mapping, ignore):
288         """Initialisation.
289
290         :param metadata: Key-value pairs to be included in "metadata" part of
291             JSON structure.
292         :param mapping: Mapping of the old names of test cases to the new
293             (actual) one.
294         :param ignore: List of TCs to be ignored.
295         :type metadata: dict
296         :type mapping: dict
297         :type ignore: list
298         """
299
300         # Type of message to parse out from the test messages
301         self._msg_type = None
302
303         # VPP version
304         self._version = None
305
306         # Timestamp
307         self._timestamp = None
308
309         # Testbed. The testbed is identified by TG node IP address.
310         self._testbed = None
311
312         # Mapping of TCs long names
313         self._mapping = mapping
314
315         # Ignore list
316         self._ignore = ignore
317
318         # Number of PAPI History messages found:
319         # 0 - no message
320         # 1 - PAPI History of DUT1
321         # 2 - PAPI History of DUT2
322         self._conf_history_lookup_nr = 0
323
324         self._sh_run_counter = 0
325
326         # Test ID of currently processed test- the lowercase full path to the
327         # test
328         self._test_id = None
329
330         # The main data structure
331         self._data = {
332             u"metadata": OrderedDict(),
333             u"suites": OrderedDict(),
334             u"tests": OrderedDict()
335         }
336
337         # Save the provided metadata
338         for key, val in metadata.items():
339             self._data[u"metadata"][key] = val
340
341         # Dictionary defining the methods used to parse different types of
342         # messages
343         self.parse_msg = {
344             u"timestamp": self._get_timestamp,
345             u"vpp-version": self._get_vpp_version,
346             u"dpdk-version": self._get_dpdk_version,
347             # TODO: Remove when not needed:
348             u"teardown-vat-history": self._get_vat_history,
349             u"teardown-papi-history": self._get_papi_history,
350             u"test-show-runtime": self._get_show_run,
351             u"testbed": self._get_testbed
352         }
353
354     @property
355     def data(self):
356         """Getter - Data parsed from the XML file.
357
358         :returns: Data parsed from the XML file.
359         :rtype: dict
360         """
361         return self._data
362
363     def _get_data_from_mrr_test_msg(self, msg):
364         """Get info from message of MRR performance tests.
365
366         :param msg: Message to be processed.
367         :type msg: str
368         :returns: Processed message or original message if a problem occurs.
369         :rtype: str
370         """
371
372         groups = re.search(self.REGEX_MRR_MSG_INFO, msg)
373         if not groups or groups.lastindex != 1:
374             return u"Test Failed."
375
376         try:
377             data = groups.group(1).split(u", ")
378         except (AttributeError, IndexError, ValueError, KeyError):
379             return u"Test Failed."
380
381         out_str = u"["
382         try:
383             for item in data:
384                 out_str += f"{(float(item) / 1e6):.2f}, "
385             return out_str[:-2] + u"]"
386         except (AttributeError, IndexError, ValueError, KeyError):
387             return u"Test Failed."
388
389     def _get_data_from_perf_test_msg(self, msg):
390         """Get info from message of NDRPDR performance tests.
391
392         :param msg: Message to be processed.
393         :type msg: str
394         :returns: Processed message or original message if a problem occurs.
395         :rtype: str
396         """
397
398         groups = re.search(self.REGEX_PERF_MSG_INFO, msg)
399         if not groups or groups.lastindex != 10:
400             return u"Test Failed."
401
402         try:
403             data = {
404                 u"ndr_low": float(groups.group(1)),
405                 u"ndr_low_b": float(groups.group(2)),
406                 u"pdr_low": float(groups.group(3)),
407                 u"pdr_low_b": float(groups.group(4)),
408                 u"pdr_lat_90_1": groups.group(5),
409                 u"pdr_lat_90_2": groups.group(6),
410                 u"pdr_lat_50_1": groups.group(7),
411                 u"pdr_lat_50_2": groups.group(8),
412                 u"pdr_lat_10_1": groups.group(9),
413                 u"pdr_lat_10_2": groups.group(10),
414             }
415         except (AttributeError, IndexError, ValueError, KeyError):
416             return u"Test Failed."
417
418         def _process_lat(in_str_1, in_str_2):
419             """Extract min, avg, max values from latency string.
420
421             :param in_str_1: Latency string for one direction produced by robot
422                 framework.
423             :param in_str_2: Latency string for second direction produced by
424                 robot framework.
425             :type in_str_1: str
426             :type in_str_2: str
427             :returns: Processed latency string or None if a problem occurs.
428             :rtype: tuple
429             """
430             in_list_1 = in_str_1.split('/', 3)
431             in_list_2 = in_str_2.split('/', 3)
432
433             if len(in_list_1) != 4 and len(in_list_2) != 4:
434                 return None
435
436             in_list_1[3] += u"=" * (len(in_list_1[3]) % 4)
437             try:
438                 hdr_lat_1 = hdrh.histogram.HdrHistogram.decode(in_list_1[3])
439             except hdrh.codec.HdrLengthException:
440                 return None
441
442             in_list_2[3] += u"=" * (len(in_list_2[3]) % 4)
443             try:
444                 hdr_lat_2 = hdrh.histogram.HdrHistogram.decode(in_list_2[3])
445             except hdrh.codec.HdrLengthException:
446                 return None
447
448             if hdr_lat_1 and hdr_lat_2:
449                 hdr_lat = (
450                     hdr_lat_1.get_value_at_percentile(50.0),
451                     hdr_lat_1.get_value_at_percentile(90.0),
452                     hdr_lat_1.get_value_at_percentile(99.0),
453                     hdr_lat_2.get_value_at_percentile(50.0),
454                     hdr_lat_2.get_value_at_percentile(90.0),
455                     hdr_lat_2.get_value_at_percentile(99.0)
456                 )
457
458                 if all(hdr_lat):
459                     return hdr_lat
460
461             return None
462
463         try:
464             out_msg = (
465                 f"1. {(data[u'ndr_low'] / 1e6):5.2f}      "
466                 f"{data[u'ndr_low_b']:5.2f}"
467                 f"\n2. {(data[u'pdr_low'] / 1e6):5.2f}      "
468                 f"{data[u'pdr_low_b']:5.2f}"
469             )
470             latency = (
471                 _process_lat(data[u'pdr_lat_10_1'], data[u'pdr_lat_10_2']),
472                 _process_lat(data[u'pdr_lat_50_1'], data[u'pdr_lat_50_2']),
473                 _process_lat(data[u'pdr_lat_90_1'], data[u'pdr_lat_90_2'])
474             )
475             if all(latency):
476                 max_len = len(str(max((max(item) for item in latency))))
477                 max_len = 4 if max_len < 4 else max_len
478
479                 for idx, lat in enumerate(latency):
480                     if not idx:
481                         out_msg += u"\n"
482                     out_msg += (
483                         f"\n{idx + 3}. "
484                         f"{lat[0]:{max_len}d} "
485                         f"{lat[1]:{max_len}d} "
486                         f"{lat[2]:{max_len}d}      "
487                         f"{lat[3]:{max_len}d} "
488                         f"{lat[4]:{max_len}d} "
489                         f"{lat[5]:{max_len}d} "
490                     )
491
492             return out_msg
493
494         except (AttributeError, IndexError, ValueError, KeyError):
495             return u"Test Failed."
496
497     def _get_testbed(self, msg):
498         """Called when extraction of testbed IP is required.
499         The testbed is identified by TG node IP address.
500
501         :param msg: Message to process.
502         :type msg: Message
503         :returns: Nothing.
504         """
505
506         if msg.message.count(u"Setup of TG node") or \
507                 msg.message.count(u"Setup of node TG host"):
508             reg_tg_ip = re.compile(
509                 r'.*TG .* (\d{1,3}.\d{1,3}.\d{1,3}.\d{1,3}).*')
510             try:
511                 self._testbed = str(re.search(reg_tg_ip, msg.message).group(1))
512             except (KeyError, ValueError, IndexError, AttributeError):
513                 pass
514             finally:
515                 self._data[u"metadata"][u"testbed"] = self._testbed
516                 self._msg_type = None
517
518     def _get_vpp_version(self, msg):
519         """Called when extraction of VPP version is required.
520
521         :param msg: Message to process.
522         :type msg: Message
523         :returns: Nothing.
524         """
525
526         if msg.message.count(u"return STDOUT Version:") or \
527             msg.message.count(u"VPP Version:") or \
528             msg.message.count(u"VPP version:"):
529             self._version = str(re.search(self.REGEX_VERSION_VPP, msg.message).
530                                 group(2))
531             self._data[u"metadata"][u"version"] = self._version
532             self._msg_type = None
533
534     def _get_dpdk_version(self, msg):
535         """Called when extraction of DPDK version is required.
536
537         :param msg: Message to process.
538         :type msg: Message
539         :returns: Nothing.
540         """
541
542         if msg.message.count(u"DPDK Version:"):
543             try:
544                 self._version = str(re.search(
545                     self.REGEX_VERSION_DPDK, msg.message).group(2))
546                 self._data[u"metadata"][u"version"] = self._version
547             except IndexError:
548                 pass
549             finally:
550                 self._msg_type = None
551
552     def _get_timestamp(self, msg):
553         """Called when extraction of timestamp is required.
554
555         :param msg: Message to process.
556         :type msg: Message
557         :returns: Nothing.
558         """
559
560         self._timestamp = msg.timestamp[:14]
561         self._data[u"metadata"][u"generated"] = self._timestamp
562         self._msg_type = None
563
564     def _get_vat_history(self, msg):
565         """Called when extraction of VAT command history is required.
566
567         TODO: Remove when not needed.
568
569         :param msg: Message to process.
570         :type msg: Message
571         :returns: Nothing.
572         """
573         if msg.message.count(u"VAT command history:"):
574             self._conf_history_lookup_nr += 1
575             if self._conf_history_lookup_nr == 1:
576                 self._data[u"tests"][self._test_id][u"conf-history"] = str()
577             else:
578                 self._msg_type = None
579             text = re.sub(r"\d{1,3}.\d{1,3}.\d{1,3}.\d{1,3} "
580                           r"VAT command history:", u"",
581                           msg.message, count=1).replace(u'\n', u' |br| ').\
582                 replace(u'"', u"'")
583
584             self._data[u"tests"][self._test_id][u"conf-history"] += (
585                 f" |br| **DUT{str(self._conf_history_lookup_nr)}:** {text}"
586             )
587
588     def _get_papi_history(self, msg):
589         """Called when extraction of PAPI command history is required.
590
591         :param msg: Message to process.
592         :type msg: Message
593         :returns: Nothing.
594         """
595         if msg.message.count(u"PAPI command history:"):
596             self._conf_history_lookup_nr += 1
597             if self._conf_history_lookup_nr == 1:
598                 self._data[u"tests"][self._test_id][u"conf-history"] = str()
599             else:
600                 self._msg_type = None
601             text = re.sub(r"\d{1,3}.\d{1,3}.\d{1,3}.\d{1,3} "
602                           r"PAPI command history:", u"",
603                           msg.message, count=1).replace(u'\n', u' |br| ').\
604                 replace(u'"', u"'")
605             self._data[u"tests"][self._test_id][u"conf-history"] += (
606                 f" |br| **DUT{str(self._conf_history_lookup_nr)}:** {text}"
607             )
608
609     def _get_show_run(self, msg):
610         """Called when extraction of VPP operational data (output of CLI command
611         Show Runtime) is required.
612
613         :param msg: Message to process.
614         :type msg: Message
615         :returns: Nothing.
616         """
617
618         if not msg.message.count(u"stats runtime"):
619             return
620
621         # Temporary solution
622         if self._sh_run_counter > 1:
623             return
624
625         if u"show-run" not in self._data[u"tests"][self._test_id].keys():
626             self._data[u"tests"][self._test_id][u"show-run"] = dict()
627
628         groups = re.search(self.REGEX_TC_PAPI_CLI, msg.message)
629         if not groups:
630             return
631         try:
632             host = groups.group(1)
633         except (AttributeError, IndexError):
634             host = u""
635         try:
636             sock = groups.group(2)
637         except (AttributeError, IndexError):
638             sock = u""
639
640         runtime = loads(str(msg.message).replace(u' ', u'').replace(u'\n', u'').
641                         replace(u"'", u'"').replace(u'b"', u'"').
642                         replace(u'u"', u'"').split(u":", 1)[1])
643
644         try:
645             threads_nr = len(runtime[0][u"clocks"])
646         except (IndexError, KeyError):
647             return
648
649         dut = u"DUT{nr}".format(
650             nr=len(self._data[u'tests'][self._test_id][u'show-run'].keys()) + 1)
651
652         oper = {
653             u"host": host,
654             u"socket": sock,
655             u"threads": OrderedDict({idx: list() for idx in range(threads_nr)})
656         }
657
658         for item in runtime:
659             for idx in range(threads_nr):
660                 if item[u"vectors"][idx] > 0:
661                     clocks = item[u"clocks"][idx] / item[u"vectors"][idx]
662                 elif item[u"calls"][idx] > 0:
663                     clocks = item[u"clocks"][idx] / item[u"calls"][idx]
664                 elif item[u"suspends"][idx] > 0:
665                     clocks = item[u"clocks"][idx] / item[u"suspends"][idx]
666                 else:
667                     clocks = 0.0
668
669                 if item[u"calls"][idx] > 0:
670                     vectors_call = item[u"vectors"][idx] / item[u"calls"][idx]
671                 else:
672                     vectors_call = 0.0
673
674                 if int(item[u"calls"][idx]) + int(item[u"vectors"][idx]) + \
675                         int(item[u"suspends"][idx]):
676                     oper[u"threads"][idx].append([
677                         item[u"name"],
678                         item[u"calls"][idx],
679                         item[u"vectors"][idx],
680                         item[u"suspends"][idx],
681                         clocks,
682                         vectors_call
683                     ])
684
685         self._data[u'tests'][self._test_id][u'show-run'][dut] = copy.copy(oper)
686
687     def _get_ndrpdr_throughput(self, msg):
688         """Get NDR_LOWER, NDR_UPPER, PDR_LOWER and PDR_UPPER from the test
689         message.
690
691         :param msg: The test message to be parsed.
692         :type msg: str
693         :returns: Parsed data as a dict and the status (PASS/FAIL).
694         :rtype: tuple(dict, str)
695         """
696
697         throughput = {
698             u"NDR": {u"LOWER": -1.0, u"UPPER": -1.0},
699             u"PDR": {u"LOWER": -1.0, u"UPPER": -1.0}
700         }
701         status = u"FAIL"
702         groups = re.search(self.REGEX_NDRPDR_RATE, msg)
703
704         if groups is not None:
705             try:
706                 throughput[u"NDR"][u"LOWER"] = float(groups.group(1))
707                 throughput[u"NDR"][u"UPPER"] = float(groups.group(2))
708                 throughput[u"PDR"][u"LOWER"] = float(groups.group(3))
709                 throughput[u"PDR"][u"UPPER"] = float(groups.group(4))
710                 status = u"PASS"
711             except (IndexError, ValueError):
712                 pass
713
714         return throughput, status
715
716     def _get_plr_throughput(self, msg):
717         """Get PLRsearch lower bound and PLRsearch upper bound from the test
718         message.
719
720         :param msg: The test message to be parsed.
721         :type msg: str
722         :returns: Parsed data as a dict and the status (PASS/FAIL).
723         :rtype: tuple(dict, str)
724         """
725
726         throughput = {
727             u"LOWER": -1.0,
728             u"UPPER": -1.0
729         }
730         status = u"FAIL"
731         groups = re.search(self.REGEX_PLR_RATE, msg)
732
733         if groups is not None:
734             try:
735                 throughput[u"LOWER"] = float(groups.group(1))
736                 throughput[u"UPPER"] = float(groups.group(2))
737                 status = u"PASS"
738             except (IndexError, ValueError):
739                 pass
740
741         return throughput, status
742
743     def _get_ndrpdr_latency(self, msg):
744         """Get LATENCY from the test message.
745
746         :param msg: The test message to be parsed.
747         :type msg: str
748         :returns: Parsed data as a dict and the status (PASS/FAIL).
749         :rtype: tuple(dict, str)
750         """
751         latency_default = {
752             u"min": -1.0,
753             u"avg": -1.0,
754             u"max": -1.0,
755             u"hdrh": u""
756         }
757         latency = {
758             u"NDR": {
759                 u"direction1": copy.copy(latency_default),
760                 u"direction2": copy.copy(latency_default)
761             },
762             u"PDR": {
763                 u"direction1": copy.copy(latency_default),
764                 u"direction2": copy.copy(latency_default)
765             },
766             u"LAT0": {
767                 u"direction1": copy.copy(latency_default),
768                 u"direction2": copy.copy(latency_default)
769             },
770             u"PDR10": {
771                 u"direction1": copy.copy(latency_default),
772                 u"direction2": copy.copy(latency_default)
773             },
774             u"PDR50": {
775                 u"direction1": copy.copy(latency_default),
776                 u"direction2": copy.copy(latency_default)
777             },
778             u"PDR90": {
779                 u"direction1": copy.copy(latency_default),
780                 u"direction2": copy.copy(latency_default)
781             },
782         }
783
784         # TODO: Rewrite when long and base are not needed
785         groups = re.search(self.REGEX_NDRPDR_LAT_LONG, msg)
786         if groups is None:
787             groups = re.search(self.REGEX_NDRPDR_LAT, msg)
788         if groups is None:
789             groups = re.search(self.REGEX_NDRPDR_LAT_BASE, msg)
790         if groups is None:
791             return latency, u"FAIL"
792
793         def process_latency(in_str):
794             """Return object with parsed latency values.
795
796             TODO: Define class for the return type.
797
798             :param in_str: Input string, min/avg/max/hdrh format.
799             :type in_str: str
800             :returns: Dict with corresponding keys, except hdrh float values.
801             :rtype dict:
802             :throws IndexError: If in_str does not have enough substrings.
803             :throws ValueError: If a substring does not convert to float.
804             """
805             in_list = in_str.split('/', 3)
806
807             rval = {
808                 u"min": float(in_list[0]),
809                 u"avg": float(in_list[1]),
810                 u"max": float(in_list[2]),
811                 u"hdrh": u""
812             }
813
814             if len(in_list) == 4:
815                 rval[u"hdrh"] = str(in_list[3])
816
817             return rval
818
819         try:
820             latency[u"NDR"][u"direction1"] = process_latency(groups.group(1))
821             latency[u"NDR"][u"direction2"] = process_latency(groups.group(2))
822             latency[u"PDR"][u"direction1"] = process_latency(groups.group(3))
823             latency[u"PDR"][u"direction2"] = process_latency(groups.group(4))
824             if groups.lastindex == 4:
825                 return latency, u"PASS"
826         except (IndexError, ValueError):
827             pass
828
829         try:
830             latency[u"PDR90"][u"direction1"] = process_latency(groups.group(5))
831             latency[u"PDR90"][u"direction2"] = process_latency(groups.group(6))
832             latency[u"PDR50"][u"direction1"] = process_latency(groups.group(7))
833             latency[u"PDR50"][u"direction2"] = process_latency(groups.group(8))
834             latency[u"PDR10"][u"direction1"] = process_latency(groups.group(9))
835             latency[u"PDR10"][u"direction2"] = process_latency(groups.group(10))
836             latency[u"LAT0"][u"direction1"] = process_latency(groups.group(11))
837             latency[u"LAT0"][u"direction2"] = process_latency(groups.group(12))
838             if groups.lastindex == 12:
839                 return latency, u"PASS"
840         except (IndexError, ValueError):
841             pass
842
843         # TODO: Remove when not needed
844         latency[u"NDR10"] = {
845             u"direction1": copy.copy(latency_default),
846             u"direction2": copy.copy(latency_default)
847         }
848         latency[u"NDR50"] = {
849             u"direction1": copy.copy(latency_default),
850             u"direction2": copy.copy(latency_default)
851         }
852         latency[u"NDR90"] = {
853             u"direction1": copy.copy(latency_default),
854             u"direction2": copy.copy(latency_default)
855         }
856         try:
857             latency[u"LAT0"][u"direction1"] = process_latency(groups.group(5))
858             latency[u"LAT0"][u"direction2"] = process_latency(groups.group(6))
859             latency[u"NDR10"][u"direction1"] = process_latency(groups.group(7))
860             latency[u"NDR10"][u"direction2"] = process_latency(groups.group(8))
861             latency[u"NDR50"][u"direction1"] = process_latency(groups.group(9))
862             latency[u"NDR50"][u"direction2"] = process_latency(groups.group(10))
863             latency[u"NDR90"][u"direction1"] = process_latency(groups.group(11))
864             latency[u"NDR90"][u"direction2"] = process_latency(groups.group(12))
865             latency[u"PDR10"][u"direction1"] = process_latency(groups.group(13))
866             latency[u"PDR10"][u"direction2"] = process_latency(groups.group(14))
867             latency[u"PDR50"][u"direction1"] = process_latency(groups.group(15))
868             latency[u"PDR50"][u"direction2"] = process_latency(groups.group(16))
869             latency[u"PDR90"][u"direction1"] = process_latency(groups.group(17))
870             latency[u"PDR90"][u"direction2"] = process_latency(groups.group(18))
871             return latency, u"PASS"
872         except (IndexError, ValueError):
873             pass
874
875         return latency, u"FAIL"
876
877     @staticmethod
878     def _get_hoststack_data(msg, tags):
879         """Get data from the hoststack test message.
880
881         :param msg: The test message to be parsed.
882         :param tags: Test tags.
883         :type msg: str
884         :type tags: list
885         :returns: Parsed data as a JSON dict and the status (PASS/FAIL).
886         :rtype: tuple(dict, str)
887         """
888         result = dict()
889         status = u"FAIL"
890
891         msg = msg.replace(u"'", u'"').replace(u" ", u"")
892         if u"LDPRELOAD" in tags:
893             try:
894                 result = loads(msg)
895                 status = u"PASS"
896             except JSONDecodeError:
897                 pass
898         elif u"VPPECHO" in tags:
899             try:
900                 msg_lst = msg.replace(u"}{", u"} {").split(u" ")
901                 result = dict(
902                     client=loads(msg_lst[0]),
903                     server=loads(msg_lst[1])
904                 )
905                 status = u"PASS"
906             except (JSONDecodeError, IndexError):
907                 pass
908
909         return result, status
910
911     def visit_suite(self, suite):
912         """Implements traversing through the suite and its direct children.
913
914         :param suite: Suite to process.
915         :type suite: Suite
916         :returns: Nothing.
917         """
918         if self.start_suite(suite) is not False:
919             suite.suites.visit(self)
920             suite.tests.visit(self)
921             self.end_suite(suite)
922
923     def start_suite(self, suite):
924         """Called when suite starts.
925
926         :param suite: Suite to process.
927         :type suite: Suite
928         :returns: Nothing.
929         """
930
931         try:
932             parent_name = suite.parent.name
933         except AttributeError:
934             return
935
936         doc_str = suite.doc.\
937             replace(u'"', u"'").\
938             replace(u'\n', u' ').\
939             replace(u'\r', u'').\
940             replace(u'*[', u' |br| *[').\
941             replace(u"*", u"**").\
942             replace(u' |br| *[', u'*[', 1)
943
944         self._data[u"suites"][suite.longname.lower().
945                               replace(u'"', u"'").
946                               replace(u" ", u"_")] = {
947                                   u"name": suite.name.lower(),
948                                   u"doc": doc_str,
949                                   u"parent": parent_name,
950                                   u"level": len(suite.longname.split(u"."))
951                               }
952
953         suite.keywords.visit(self)
954
955     def end_suite(self, suite):
956         """Called when suite ends.
957
958         :param suite: Suite to process.
959         :type suite: Suite
960         :returns: Nothing.
961         """
962
963     def visit_test(self, test):
964         """Implements traversing through the test.
965
966         :param test: Test to process.
967         :type test: Test
968         :returns: Nothing.
969         """
970         if self.start_test(test) is not False:
971             test.keywords.visit(self)
972             self.end_test(test)
973
974     def start_test(self, test):
975         """Called when test starts.
976
977         :param test: Test to process.
978         :type test: Test
979         :returns: Nothing.
980         """
981
982         self._sh_run_counter = 0
983
984         longname_orig = test.longname.lower()
985
986         # Check the ignore list
987         if longname_orig in self._ignore:
988             return
989
990         tags = [str(tag) for tag in test.tags]
991         test_result = dict()
992
993         # Change the TC long name and name if defined in the mapping table
994         longname = self._mapping.get(longname_orig, None)
995         if longname is not None:
996             name = longname.split(u'.')[-1]
997             logging.debug(
998                 f"{self._data[u'metadata']}\n{longname_orig}\n{longname}\n"
999                 f"{name}"
1000             )
1001         else:
1002             longname = longname_orig
1003             name = test.name.lower()
1004
1005         # Remove TC number from the TC long name (backward compatibility):
1006         self._test_id = re.sub(self.REGEX_TC_NUMBER, u"", longname)
1007         # Remove TC number from the TC name (not needed):
1008         test_result[u"name"] = re.sub(self.REGEX_TC_NUMBER, "", name)
1009
1010         test_result[u"parent"] = test.parent.name.lower()
1011         test_result[u"tags"] = tags
1012         test_result["doc"] = test.doc.\
1013             replace(u'"', u"'").\
1014             replace(u'\n', u' ').\
1015             replace(u'\r', u'').\
1016             replace(u'[', u' |br| [').\
1017             replace(u' |br| [', u'[', 1)
1018         test_result[u"type"] = u"FUNC"
1019         test_result[u"status"] = test.status
1020
1021         if test.status == u"PASS":
1022             if u"NDRPDR" in tags:
1023                 test_result[u"msg"] = self._get_data_from_perf_test_msg(
1024                     test.message).replace(u'\n', u' |br| ').\
1025                     replace(u'\r', u'').replace(u'"', u"'")
1026             elif u"MRR" in tags or u"FRMOBL" in tags or u"BMRR" in tags:
1027                 test_result[u"msg"] = self._get_data_from_mrr_test_msg(
1028                     test.message).replace(u'\n', u' |br| ').\
1029                     replace(u'\r', u'').replace(u'"', u"'")
1030             else:
1031                 test_result[u"msg"] = test.message.replace(u'\n', u' |br| ').\
1032                     replace(u'\r', u'').replace(u'"', u"'")
1033         else:
1034             test_result[u"msg"] = u"Test Failed."
1035
1036         if u"PERFTEST" in tags:
1037             # Replace info about cores (e.g. -1c-) with the info about threads
1038             # and cores (e.g. -1t1c-) in the long test case names and in the
1039             # test case names if necessary.
1040             groups = re.search(self.REGEX_TC_NAME_OLD, self._test_id)
1041             if not groups:
1042                 tag_count = 0
1043                 tag_tc = str()
1044                 for tag in test_result[u"tags"]:
1045                     groups = re.search(self.REGEX_TC_TAG, tag)
1046                     if groups:
1047                         tag_count += 1
1048                         tag_tc = tag
1049
1050                 if tag_count == 1:
1051                     self._test_id = re.sub(
1052                         self.REGEX_TC_NAME_NEW, f"-{tag_tc.lower()}-",
1053                         self._test_id, count=1
1054                     )
1055                     test_result[u"name"] = re.sub(
1056                         self.REGEX_TC_NAME_NEW, f"-{tag_tc.lower()}-",
1057                         test_result["name"], count=1
1058                     )
1059                 else:
1060                     test_result[u"status"] = u"FAIL"
1061                     self._data[u"tests"][self._test_id] = test_result
1062                     logging.debug(
1063                         f"The test {self._test_id} has no or more than one "
1064                         f"multi-threading tags.\n"
1065                         f"Tags: {test_result[u'tags']}"
1066                     )
1067                     return
1068
1069         if test.status == u"PASS":
1070             if u"NDRPDR" in tags:
1071                 test_result[u"type"] = u"NDRPDR"
1072                 test_result[u"throughput"], test_result[u"status"] = \
1073                     self._get_ndrpdr_throughput(test.message)
1074                 test_result[u"latency"], test_result[u"status"] = \
1075                     self._get_ndrpdr_latency(test.message)
1076             elif u"SOAK" in tags:
1077                 test_result[u"type"] = u"SOAK"
1078                 test_result[u"throughput"], test_result[u"status"] = \
1079                     self._get_plr_throughput(test.message)
1080             elif u"HOSTSTACK" in tags:
1081                 test_result[u"type"] = u"HOSTSTACK"
1082                 test_result[u"result"], test_result[u"status"] = \
1083                     self._get_hoststack_data(test.message, tags)
1084             elif u"TCP" in tags:
1085                 test_result[u"type"] = u"TCP"
1086                 groups = re.search(self.REGEX_TCP, test.message)
1087                 test_result[u"result"] = int(groups.group(2))
1088             elif u"MRR" in tags or u"FRMOBL" in tags or u"BMRR" in tags:
1089                 if u"MRR" in tags:
1090                     test_result[u"type"] = u"MRR"
1091                 else:
1092                     test_result[u"type"] = u"BMRR"
1093
1094                 test_result[u"result"] = dict()
1095                 groups = re.search(self.REGEX_BMRR, test.message)
1096                 if groups is not None:
1097                     items_str = groups.group(1)
1098                     items_float = [float(item.strip()) for item
1099                                    in items_str.split(",")]
1100                     # Use whole list in CSIT-1180.
1101                     stats = jumpavg.AvgStdevStats.for_runs(items_float)
1102                     test_result[u"result"][u"receive-rate"] = stats.avg
1103                     test_result[u"result"][u"receive-stdev"] = stats.stdev
1104                 else:
1105                     groups = re.search(self.REGEX_MRR, test.message)
1106                     test_result[u"result"][u"receive-rate"] = \
1107                         float(groups.group(3)) / float(groups.group(1))
1108             elif u"RECONF" in tags:
1109                 test_result[u"type"] = u"RECONF"
1110                 test_result[u"result"] = None
1111                 try:
1112                     grps_loss = re.search(self.REGEX_RECONF_LOSS, test.message)
1113                     grps_time = re.search(self.REGEX_RECONF_TIME, test.message)
1114                     test_result[u"result"] = {
1115                         u"loss": int(grps_loss.group(1)),
1116                         u"time": float(grps_time.group(1))
1117                     }
1118                 except (AttributeError, IndexError, ValueError, TypeError):
1119                     test_result[u"status"] = u"FAIL"
1120             elif u"DEVICETEST" in tags:
1121                 test_result[u"type"] = u"DEVICETEST"
1122             else:
1123                 test_result[u"status"] = u"FAIL"
1124                 self._data[u"tests"][self._test_id] = test_result
1125                 return
1126
1127         self._data[u"tests"][self._test_id] = test_result
1128
1129     def end_test(self, test):
1130         """Called when test ends.
1131
1132         :param test: Test to process.
1133         :type test: Test
1134         :returns: Nothing.
1135         """
1136
1137     def visit_keyword(self, keyword):
1138         """Implements traversing through the keyword and its child keywords.
1139
1140         :param keyword: Keyword to process.
1141         :type keyword: Keyword
1142         :returns: Nothing.
1143         """
1144         if self.start_keyword(keyword) is not False:
1145             self.end_keyword(keyword)
1146
1147     def start_keyword(self, keyword):
1148         """Called when keyword starts. Default implementation does nothing.
1149
1150         :param keyword: Keyword to process.
1151         :type keyword: Keyword
1152         :returns: Nothing.
1153         """
1154         try:
1155             if keyword.type == u"setup":
1156                 self.visit_setup_kw(keyword)
1157             elif keyword.type == u"teardown":
1158                 self.visit_teardown_kw(keyword)
1159             else:
1160                 self.visit_test_kw(keyword)
1161         except AttributeError:
1162             pass
1163
1164     def end_keyword(self, keyword):
1165         """Called when keyword ends. Default implementation does nothing.
1166
1167         :param keyword: Keyword to process.
1168         :type keyword: Keyword
1169         :returns: Nothing.
1170         """
1171
1172     def visit_test_kw(self, test_kw):
1173         """Implements traversing through the test keyword and its child
1174         keywords.
1175
1176         :param test_kw: Keyword to process.
1177         :type test_kw: Keyword
1178         :returns: Nothing.
1179         """
1180         for keyword in test_kw.keywords:
1181             if self.start_test_kw(keyword) is not False:
1182                 self.visit_test_kw(keyword)
1183                 self.end_test_kw(keyword)
1184
1185     def start_test_kw(self, test_kw):
1186         """Called when test keyword starts. Default implementation does
1187         nothing.
1188
1189         :param test_kw: Keyword to process.
1190         :type test_kw: Keyword
1191         :returns: Nothing.
1192         """
1193         if test_kw.name.count(u"Show Runtime On All Duts") or \
1194                 test_kw.name.count(u"Show Runtime Counters On All Duts"):
1195             self._msg_type = u"test-show-runtime"
1196             self._sh_run_counter += 1
1197         elif test_kw.name.count(u"Install Dpdk Test On All Duts") and \
1198                 not self._version:
1199             self._msg_type = u"dpdk-version"
1200         else:
1201             return
1202         test_kw.messages.visit(self)
1203
1204     def end_test_kw(self, test_kw):
1205         """Called when keyword ends. Default implementation does nothing.
1206
1207         :param test_kw: Keyword to process.
1208         :type test_kw: Keyword
1209         :returns: Nothing.
1210         """
1211
1212     def visit_setup_kw(self, setup_kw):
1213         """Implements traversing through the teardown keyword and its child
1214         keywords.
1215
1216         :param setup_kw: Keyword to process.
1217         :type setup_kw: Keyword
1218         :returns: Nothing.
1219         """
1220         for keyword in setup_kw.keywords:
1221             if self.start_setup_kw(keyword) is not False:
1222                 self.visit_setup_kw(keyword)
1223                 self.end_setup_kw(keyword)
1224
1225     def start_setup_kw(self, setup_kw):
1226         """Called when teardown keyword starts. Default implementation does
1227         nothing.
1228
1229         :param setup_kw: Keyword to process.
1230         :type setup_kw: Keyword
1231         :returns: Nothing.
1232         """
1233         if setup_kw.name.count(u"Show Vpp Version On All Duts") \
1234                 and not self._version:
1235             self._msg_type = u"vpp-version"
1236         elif setup_kw.name.count(u"Set Global Variable") \
1237                 and not self._timestamp:
1238             self._msg_type = u"timestamp"
1239         elif setup_kw.name.count(u"Setup Framework") and not self._testbed:
1240             self._msg_type = u"testbed"
1241         else:
1242             return
1243         setup_kw.messages.visit(self)
1244
1245     def end_setup_kw(self, setup_kw):
1246         """Called when keyword ends. Default implementation does nothing.
1247
1248         :param setup_kw: Keyword to process.
1249         :type setup_kw: Keyword
1250         :returns: Nothing.
1251         """
1252
1253     def visit_teardown_kw(self, teardown_kw):
1254         """Implements traversing through the teardown keyword and its child
1255         keywords.
1256
1257         :param teardown_kw: Keyword to process.
1258         :type teardown_kw: Keyword
1259         :returns: Nothing.
1260         """
1261         for keyword in teardown_kw.keywords:
1262             if self.start_teardown_kw(keyword) is not False:
1263                 self.visit_teardown_kw(keyword)
1264                 self.end_teardown_kw(keyword)
1265
1266     def start_teardown_kw(self, teardown_kw):
1267         """Called when teardown keyword starts
1268
1269         :param teardown_kw: Keyword to process.
1270         :type teardown_kw: Keyword
1271         :returns: Nothing.
1272         """
1273
1274         if teardown_kw.name.count(u"Show Vat History On All Duts"):
1275             # TODO: Remove when not needed:
1276             self._conf_history_lookup_nr = 0
1277             self._msg_type = u"teardown-vat-history"
1278             teardown_kw.messages.visit(self)
1279         elif teardown_kw.name.count(u"Show Papi History On All Duts"):
1280             self._conf_history_lookup_nr = 0
1281             self._msg_type = u"teardown-papi-history"
1282             teardown_kw.messages.visit(self)
1283
1284     def end_teardown_kw(self, teardown_kw):
1285         """Called when keyword ends. Default implementation does nothing.
1286
1287         :param teardown_kw: Keyword to process.
1288         :type teardown_kw: Keyword
1289         :returns: Nothing.
1290         """
1291
1292     def visit_message(self, msg):
1293         """Implements visiting the message.
1294
1295         :param msg: Message to process.
1296         :type msg: Message
1297         :returns: Nothing.
1298         """
1299         if self.start_message(msg) is not False:
1300             self.end_message(msg)
1301
1302     def start_message(self, msg):
1303         """Called when message starts. Get required information from messages:
1304         - VPP version.
1305
1306         :param msg: Message to process.
1307         :type msg: Message
1308         :returns: Nothing.
1309         """
1310         if self._msg_type:
1311             self.parse_msg[self._msg_type](msg)
1312
1313     def end_message(self, msg):
1314         """Called when message ends. Default implementation does nothing.
1315
1316         :param msg: Message to process.
1317         :type msg: Message
1318         :returns: Nothing.
1319         """
1320
1321
1322 class InputData:
1323     """Input data
1324
1325     The data is extracted from output.xml files generated by Jenkins jobs and
1326     stored in pandas' DataFrames.
1327
1328     The data structure:
1329     - job name
1330       - build number
1331         - metadata
1332           (as described in ExecutionChecker documentation)
1333         - suites
1334           (as described in ExecutionChecker documentation)
1335         - tests
1336           (as described in ExecutionChecker documentation)
1337     """
1338
1339     def __init__(self, spec):
1340         """Initialization.
1341
1342         :param spec: Specification.
1343         :type spec: Specification
1344         """
1345
1346         # Specification:
1347         self._cfg = spec
1348
1349         # Data store:
1350         self._input_data = pd.Series()
1351
1352     @property
1353     def data(self):
1354         """Getter - Input data.
1355
1356         :returns: Input data
1357         :rtype: pandas.Series
1358         """
1359         return self._input_data
1360
1361     def metadata(self, job, build):
1362         """Getter - metadata
1363
1364         :param job: Job which metadata we want.
1365         :param build: Build which metadata we want.
1366         :type job: str
1367         :type build: str
1368         :returns: Metadata
1369         :rtype: pandas.Series
1370         """
1371         return self.data[job][build][u"metadata"]
1372
1373     def suites(self, job, build):
1374         """Getter - suites
1375
1376         :param job: Job which suites we want.
1377         :param build: Build which suites we want.
1378         :type job: str
1379         :type build: str
1380         :returns: Suites.
1381         :rtype: pandas.Series
1382         """
1383         return self.data[job][str(build)][u"suites"]
1384
1385     def tests(self, job, build):
1386         """Getter - tests
1387
1388         :param job: Job which tests we want.
1389         :param build: Build which tests we want.
1390         :type job: str
1391         :type build: str
1392         :returns: Tests.
1393         :rtype: pandas.Series
1394         """
1395         return self.data[job][build][u"tests"]
1396
1397     def _parse_tests(self, job, build, log):
1398         """Process data from robot output.xml file and return JSON structured
1399         data.
1400
1401         :param job: The name of job which build output data will be processed.
1402         :param build: The build which output data will be processed.
1403         :param log: List of log messages.
1404         :type job: str
1405         :type build: dict
1406         :type log: list of tuples (severity, msg)
1407         :returns: JSON data structure.
1408         :rtype: dict
1409         """
1410
1411         metadata = {
1412             u"job": job,
1413             u"build": build
1414         }
1415
1416         with open(build[u"file-name"], u'r') as data_file:
1417             try:
1418                 result = ExecutionResult(data_file)
1419             except errors.DataError as err:
1420                 log.append(
1421                     (u"ERROR", f"Error occurred while parsing output.xml: "
1422                                f"{repr(err)}")
1423                 )
1424                 return None
1425         checker = ExecutionChecker(metadata, self._cfg.mapping,
1426                                    self._cfg.ignore)
1427         result.visit(checker)
1428
1429         return checker.data
1430
1431     def _download_and_parse_build(self, job, build, repeat, pid=10000):
1432         """Download and parse the input data file.
1433
1434         :param pid: PID of the process executing this method.
1435         :param job: Name of the Jenkins job which generated the processed input
1436             file.
1437         :param build: Information about the Jenkins build which generated the
1438             processed input file.
1439         :param repeat: Repeat the download specified number of times if not
1440             successful.
1441         :type pid: int
1442         :type job: str
1443         :type build: dict
1444         :type repeat: int
1445         """
1446
1447         logs = list()
1448
1449         logs.append(
1450             (u"INFO", f"  Processing the job/build: {job}: {build[u'build']}")
1451         )
1452
1453         state = u"failed"
1454         success = False
1455         data = None
1456         do_repeat = repeat
1457         while do_repeat:
1458             success = download_and_unzip_data_file(self._cfg, job, build, pid,
1459                                                    logs)
1460             if success:
1461                 break
1462             do_repeat -= 1
1463         if not success:
1464             logs.append(
1465                 (u"ERROR",
1466                  f"It is not possible to download the input data file from the "
1467                  f"job {job}, build {build[u'build']}, or it is damaged. "
1468                  f"Skipped.")
1469             )
1470         if success:
1471             logs.append(
1472                 (u"INFO",
1473                  f"    Processing data from the build {build[u'build']} ...")
1474             )
1475             data = self._parse_tests(job, build, logs)
1476             if data is None:
1477                 logs.append(
1478                     (u"ERROR",
1479                      f"Input data file from the job {job}, build "
1480                      f"{build[u'build']} is damaged. Skipped.")
1481                 )
1482             else:
1483                 state = u"processed"
1484
1485             try:
1486                 remove(build[u"file-name"])
1487             except OSError as err:
1488                 logs.append(
1489                     ("ERROR", f"Cannot remove the file {build[u'file-name']}: "
1490                               f"{repr(err)}")
1491                 )
1492
1493         # If the time-period is defined in the specification file, remove all
1494         # files which are outside the time period.
1495         timeperiod = self._cfg.input.get(u"time-period", None)
1496         if timeperiod and data:
1497             now = dt.utcnow()
1498             timeperiod = timedelta(int(timeperiod))
1499             metadata = data.get(u"metadata", None)
1500             if metadata:
1501                 generated = metadata.get(u"generated", None)
1502                 if generated:
1503                     generated = dt.strptime(generated, u"%Y%m%d %H:%M")
1504                     if (now - generated) > timeperiod:
1505                         # Remove the data and the file:
1506                         state = u"removed"
1507                         data = None
1508                         logs.append(
1509                             (u"INFO",
1510                              f"    The build {job}/{build[u'build']} is "
1511                              f"outdated, will be removed.")
1512                         )
1513         logs.append((u"INFO", u"  Done."))
1514
1515         for level, line in logs:
1516             if level == u"INFO":
1517                 logging.info(line)
1518             elif level == u"ERROR":
1519                 logging.error(line)
1520             elif level == u"DEBUG":
1521                 logging.debug(line)
1522             elif level == u"CRITICAL":
1523                 logging.critical(line)
1524             elif level == u"WARNING":
1525                 logging.warning(line)
1526
1527         return {u"data": data, u"state": state, u"job": job, u"build": build}
1528
1529     def download_and_parse_data(self, repeat=1):
1530         """Download the input data files, parse input data from input files and
1531         store in pandas' Series.
1532
1533         :param repeat: Repeat the download specified number of times if not
1534             successful.
1535         :type repeat: int
1536         """
1537
1538         logging.info(u"Downloading and parsing input files ...")
1539
1540         for job, builds in self._cfg.builds.items():
1541             for build in builds:
1542
1543                 result = self._download_and_parse_build(job, build, repeat)
1544                 build_nr = result[u"build"][u"build"]
1545
1546                 if result[u"data"]:
1547                     data = result[u"data"]
1548                     build_data = pd.Series({
1549                         u"metadata": pd.Series(
1550                             list(data[u"metadata"].values()),
1551                             index=list(data[u"metadata"].keys())
1552                         ),
1553                         u"suites": pd.Series(
1554                             list(data[u"suites"].values()),
1555                             index=list(data[u"suites"].keys())
1556                         ),
1557                         u"tests": pd.Series(
1558                             list(data[u"tests"].values()),
1559                             index=list(data[u"tests"].keys())
1560                         )
1561                     })
1562
1563                     if self._input_data.get(job, None) is None:
1564                         self._input_data[job] = pd.Series()
1565                     self._input_data[job][str(build_nr)] = build_data
1566
1567                     self._cfg.set_input_file_name(
1568                         job, build_nr, result[u"build"][u"file-name"])
1569
1570                 self._cfg.set_input_state(job, build_nr, result[u"state"])
1571
1572                 mem_alloc = \
1573                     resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1000
1574                 logging.info(f"Memory allocation: {mem_alloc:.0f}MB")
1575
1576         logging.info(u"Done.")
1577
1578     def process_local_file(self, local_file, job=u"local", build_nr=1,
1579                            replace=True):
1580         """Process local XML file given as a command-line parameter.
1581
1582         :param local_file: The file to process.
1583         :param job: Job name.
1584         :param build_nr: Build number.
1585         :param replace: If True, the information about jobs and builds is
1586             replaced by the new one, otherwise the new jobs and builds are
1587             added.
1588         :type local_file: str
1589         :type job: str
1590         :type build_nr: int
1591         :type replace: bool
1592         :raises: PresentationError if an error occurs.
1593         """
1594         if not isfile(local_file):
1595             raise PresentationError(f"The file {local_file} does not exist.")
1596
1597         build = {
1598             u"build": build_nr,
1599             u"status": u"failed",
1600             u"file-name": local_file
1601         }
1602         if replace:
1603             self._cfg.builds = dict()
1604         self._cfg.add_build(job, build)
1605
1606         logging.info(f"Processing {job}: {build_nr:2d}: {local_file}")
1607         data = self._parse_tests(job, build, list())
1608         if data is None:
1609             raise PresentationError(
1610                 f"Error occurred while parsing the file {local_file}"
1611             )
1612
1613         build_data = pd.Series({
1614             u"metadata": pd.Series(
1615                 list(data[u"metadata"].values()),
1616                 index=list(data[u"metadata"].keys())
1617             ),
1618             u"suites": pd.Series(
1619                 list(data[u"suites"].values()),
1620                 index=list(data[u"suites"].keys())
1621             ),
1622             u"tests": pd.Series(
1623                 list(data[u"tests"].values()),
1624                 index=list(data[u"tests"].keys())
1625             )
1626         })
1627
1628         if self._input_data.get(job, None) is None:
1629             self._input_data[job] = pd.Series()
1630         self._input_data[job][str(build_nr)] = build_data
1631
1632         self._cfg.set_input_state(job, build_nr, u"processed")
1633
1634     def process_local_directory(self, local_dir, replace=True):
1635         """Process local directory with XML file(s). The directory is processed
1636         as a 'job' and the XML files in it as builds.
1637         If the given directory contains only sub-directories, these
1638         sub-directories processed as jobs and corresponding XML files as builds
1639         of their job.
1640
1641         :param local_dir: Local directory to process.
1642         :param replace: If True, the information about jobs and builds is
1643             replaced by the new one, otherwise the new jobs and builds are
1644             added.
1645         :type local_dir: str
1646         :type replace: bool
1647         """
1648         if not isdir(local_dir):
1649             raise PresentationError(
1650                 f"The directory {local_dir} does not exist."
1651             )
1652
1653         # Check if the given directory includes only files, or only directories
1654         _, dirnames, filenames = next(walk(local_dir))
1655
1656         if filenames and not dirnames:
1657             filenames.sort()
1658             # local_builds:
1659             # key: dir (job) name, value: list of file names (builds)
1660             local_builds = {
1661                 local_dir: [join(local_dir, name) for name in filenames]
1662             }
1663
1664         elif dirnames and not filenames:
1665             dirnames.sort()
1666             # local_builds:
1667             # key: dir (job) name, value: list of file names (builds)
1668             local_builds = dict()
1669             for dirname in dirnames:
1670                 builds = [
1671                     join(local_dir, dirname, name)
1672                     for name in listdir(join(local_dir, dirname))
1673                     if isfile(join(local_dir, dirname, name))
1674                 ]
1675                 if builds:
1676                     local_builds[dirname] = sorted(builds)
1677
1678         elif not filenames and not dirnames:
1679             raise PresentationError(f"The directory {local_dir} is empty.")
1680         else:
1681             raise PresentationError(
1682                 f"The directory {local_dir} can include only files or only "
1683                 f"directories, not both.\nThe directory {local_dir} includes "
1684                 f"file(s):\n{filenames}\nand directories:\n{dirnames}"
1685             )
1686
1687         if replace:
1688             self._cfg.builds = dict()
1689
1690         for job, files in local_builds.items():
1691             for idx, local_file in enumerate(files):
1692                 self.process_local_file(local_file, job, idx + 1, replace=False)
1693
1694     @staticmethod
1695     def _end_of_tag(tag_filter, start=0, closer=u"'"):
1696         """Return the index of character in the string which is the end of tag.
1697
1698         :param tag_filter: The string where the end of tag is being searched.
1699         :param start: The index where the searching is stated.
1700         :param closer: The character which is the tag closer.
1701         :type tag_filter: str
1702         :type start: int
1703         :type closer: str
1704         :returns: The index of the tag closer.
1705         :rtype: int
1706         """
1707         try:
1708             idx_opener = tag_filter.index(closer, start)
1709             return tag_filter.index(closer, idx_opener + 1)
1710         except ValueError:
1711             return None
1712
1713     @staticmethod
1714     def _condition(tag_filter):
1715         """Create a conditional statement from the given tag filter.
1716
1717         :param tag_filter: Filter based on tags from the element specification.
1718         :type tag_filter: str
1719         :returns: Conditional statement which can be evaluated.
1720         :rtype: str
1721         """
1722         index = 0
1723         while True:
1724             index = InputData._end_of_tag(tag_filter, index)
1725             if index is None:
1726                 return tag_filter
1727             index += 1
1728             tag_filter = tag_filter[:index] + u" in tags" + tag_filter[index:]
1729
1730     def filter_data(self, element, params=None, data=None, data_set=u"tests",
1731                     continue_on_error=False):
1732         """Filter required data from the given jobs and builds.
1733
1734         The output data structure is:
1735         - job 1
1736           - build 1
1737             - test (or suite) 1 ID:
1738               - param 1
1739               - param 2
1740               ...
1741               - param n
1742             ...
1743             - test (or suite) n ID:
1744             ...
1745           ...
1746           - build n
1747         ...
1748         - job n
1749
1750         :param element: Element which will use the filtered data.
1751         :param params: Parameters which will be included in the output. If None,
1752             all parameters are included.
1753         :param data: If not None, this data is used instead of data specified
1754             in the element.
1755         :param data_set: The set of data to be filtered: tests, suites,
1756             metadata.
1757         :param continue_on_error: Continue if there is error while reading the
1758             data. The Item will be empty then
1759         :type element: pandas.Series
1760         :type params: list
1761         :type data: dict
1762         :type data_set: str
1763         :type continue_on_error: bool
1764         :returns: Filtered data.
1765         :rtype pandas.Series
1766         """
1767
1768         try:
1769             if data_set == "suites":
1770                 cond = u"True"
1771             elif element[u"filter"] in (u"all", u"template"):
1772                 cond = u"True"
1773             else:
1774                 cond = InputData._condition(element[u"filter"])
1775             logging.debug(f"   Filter: {cond}")
1776         except KeyError:
1777             logging.error(u"  No filter defined.")
1778             return None
1779
1780         if params is None:
1781             params = element.get(u"parameters", None)
1782             if params:
1783                 params.append(u"type")
1784
1785         data_to_filter = data if data else element[u"data"]
1786         data = pd.Series()
1787         try:
1788             for job, builds in data_to_filter.items():
1789                 data[job] = pd.Series()
1790                 for build in builds:
1791                     data[job][str(build)] = pd.Series()
1792                     try:
1793                         data_dict = dict(
1794                             self.data[job][str(build)][data_set].items())
1795                     except KeyError:
1796                         if continue_on_error:
1797                             continue
1798                         return None
1799
1800                     for test_id, test_data in data_dict.items():
1801                         if eval(cond, {u"tags": test_data.get(u"tags", u"")}):
1802                             data[job][str(build)][test_id] = pd.Series()
1803                             if params is None:
1804                                 for param, val in test_data.items():
1805                                     data[job][str(build)][test_id][param] = val
1806                             else:
1807                                 for param in params:
1808                                     try:
1809                                         data[job][str(build)][test_id][param] =\
1810                                             test_data[param]
1811                                     except KeyError:
1812                                         data[job][str(build)][test_id][param] =\
1813                                             u"No Data"
1814             return data
1815
1816         except (KeyError, IndexError, ValueError) as err:
1817             logging.error(
1818                 f"Missing mandatory parameter in the element specification: "
1819                 f"{repr(err)}"
1820             )
1821             return None
1822         except AttributeError as err:
1823             logging.error(repr(err))
1824             return None
1825         except SyntaxError as err:
1826             logging.error(
1827                 f"The filter {cond} is not correct. Check if all tags are "
1828                 f"enclosed by apostrophes.\n{repr(err)}"
1829             )
1830             return None
1831
1832     def filter_tests_by_name(self, element, params=None, data_set=u"tests",
1833                              continue_on_error=False):
1834         """Filter required data from the given jobs and builds.
1835
1836         The output data structure is:
1837         - job 1
1838           - build 1
1839             - test (or suite) 1 ID:
1840               - param 1
1841               - param 2
1842               ...
1843               - param n
1844             ...
1845             - test (or suite) n ID:
1846             ...
1847           ...
1848           - build n
1849         ...
1850         - job n
1851
1852         :param element: Element which will use the filtered data.
1853         :param params: Parameters which will be included in the output. If None,
1854         all parameters are included.
1855         :param data_set: The set of data to be filtered: tests, suites,
1856         metadata.
1857         :param continue_on_error: Continue if there is error while reading the
1858         data. The Item will be empty then
1859         :type element: pandas.Series
1860         :type params: list
1861         :type data_set: str
1862         :type continue_on_error: bool
1863         :returns: Filtered data.
1864         :rtype pandas.Series
1865         """
1866
1867         include = element.get(u"include", None)
1868         if not include:
1869             logging.warning(u"No tests to include, skipping the element.")
1870             return None
1871
1872         if params is None:
1873             params = element.get(u"parameters", None)
1874             if params:
1875                 params.append(u"type")
1876
1877         data = pd.Series()
1878         try:
1879             for job, builds in element[u"data"].items():
1880                 data[job] = pd.Series()
1881                 for build in builds:
1882                     data[job][str(build)] = pd.Series()
1883                     for test in include:
1884                         try:
1885                             reg_ex = re.compile(str(test).lower())
1886                             for test_id in self.data[job][
1887                                     str(build)][data_set].keys():
1888                                 if re.match(reg_ex, str(test_id).lower()):
1889                                     test_data = self.data[job][
1890                                         str(build)][data_set][test_id]
1891                                     data[job][str(build)][test_id] = pd.Series()
1892                                     if params is None:
1893                                         for param, val in test_data.items():
1894                                             data[job][str(build)][test_id]\
1895                                                 [param] = val
1896                                     else:
1897                                         for param in params:
1898                                             try:
1899                                                 data[job][str(build)][
1900                                                     test_id][param] = \
1901                                                     test_data[param]
1902                                             except KeyError:
1903                                                 data[job][str(build)][
1904                                                     test_id][param] = u"No Data"
1905                         except KeyError as err:
1906                             logging.error(repr(err))
1907                             if continue_on_error:
1908                                 continue
1909                             return None
1910             return data
1911
1912         except (KeyError, IndexError, ValueError) as err:
1913             logging.error(
1914                 f"Missing mandatory parameter in the element "
1915                 f"specification: {repr(err)}"
1916             )
1917             return None
1918         except AttributeError as err:
1919             logging.error(repr(err))
1920             return None
1921
1922     @staticmethod
1923     def merge_data(data):
1924         """Merge data from more jobs and builds to a simple data structure.
1925
1926         The output data structure is:
1927
1928         - test (suite) 1 ID:
1929           - param 1
1930           - param 2
1931           ...
1932           - param n
1933         ...
1934         - test (suite) n ID:
1935         ...
1936
1937         :param data: Data to merge.
1938         :type data: pandas.Series
1939         :returns: Merged data.
1940         :rtype: pandas.Series
1941         """
1942
1943         logging.info(u"    Merging data ...")
1944
1945         merged_data = pd.Series()
1946         for builds in data.values:
1947             for item in builds.values:
1948                 for item_id, item_data in item.items():
1949                     merged_data[item_id] = item_data
1950         return merged_data
1951
1952     def print_all_oper_data(self):
1953         """Print all operational data to console.
1954         """
1955
1956         tbl_hdr = (
1957             u"Name",
1958             u"Nr of Vectors",
1959             u"Nr of Packets",
1960             u"Suspends",
1961             u"Cycles per Packet",
1962             u"Average Vector Size"
1963         )
1964
1965         for job in self._input_data.values:
1966             for build in job.values:
1967                 for test_id, test_data in build[u"tests"].items():
1968                     print(f"{test_id}")
1969                     if test_data.get(u"show-run", None) is None:
1970                         continue
1971                     for dut_name, data in test_data[u"show-run"].items():
1972                         if data.get(u"threads", None) is None:
1973                             continue
1974                         print(f"Host IP: {data.get(u'host', '')}, "
1975                               f"Socket: {data.get(u'socket', '')}")
1976                         for thread_nr, thread in data[u"threads"].items():
1977                             txt_table = prettytable.PrettyTable(tbl_hdr)
1978                             avg = 0.0
1979                             for row in thread:
1980                                 txt_table.add_row(row)
1981                                 avg += row[-1]
1982                             if len(thread) == 0:
1983                                 avg = u""
1984                             else:
1985                                 avg = f", Average Vector Size per Node: " \
1986                                       f"{(avg / len(thread)):.2f}"
1987                             th_name = u"main" if thread_nr == 0 \
1988                                 else f"worker_{thread_nr}"
1989                             print(f"{dut_name}, {th_name}{avg}")
1990                             txt_table.float_format = u".2"
1991                             txt_table.align = u"r"
1992                             txt_table.align[u"Name"] = u"l"
1993                             print(f"{txt_table.get_string()}\n")