9151cf2f8e0e0f448b130a6d93a469fc65fb80f8
[csit.git] / resources / tools / presentation / input_data_parser.py
1 # Copyright (c) 2021 Cisco and/or its affiliates.
2 # Licensed under the Apache License, Version 2.0 (the "License");
3 # you may not use this file except in compliance with the License.
4 # You may obtain a copy of the License at:
5 #
6 #     http://www.apache.org/licenses/LICENSE-2.0
7 #
8 # Unless required by applicable law or agreed to in writing, software
9 # distributed under the License is distributed on an "AS IS" BASIS,
10 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 # See the License for the specific language governing permissions and
12 # limitations under the License.
13
14 """Data pre-processing
15
16 - extract data from output.xml files generated by Jenkins jobs and store in
17   pandas' Series,
18 - provide access to the data.
19 - filter the data using tags,
20 """
21
22 import re
23 import copy
24 import resource
25 import logging
26
27 from collections import OrderedDict
28 from os import remove, walk, listdir
29 from os.path import isfile, isdir, join
30 from datetime import datetime as dt
31 from datetime import timedelta
32 from json import loads
33 from json.decoder import JSONDecodeError
34
35 import hdrh.histogram
36 import hdrh.codec
37 import prettytable
38 import pandas as pd
39
40 from robot.api import ExecutionResult, ResultVisitor
41 from robot import errors
42
43 from resources.libraries.python import jumpavg
44 from input_data_files import download_and_unzip_data_file
45 from pal_errors import PresentationError
46
47
48 # Separator used in file names
49 SEPARATOR = u"__"
50
51
52 class ExecutionChecker(ResultVisitor):
53     """Class to traverse through the test suite structure.
54
55     The functionality implemented in this class generates a json structure:
56
57     Performance tests:
58
59     {
60         "metadata": {
61             "generated": "Timestamp",
62             "version": "SUT version",
63             "job": "Jenkins job name",
64             "build": "Information about the build"
65         },
66         "suites": {
67             "Suite long name 1": {
68                 "name": Suite name,
69                 "doc": "Suite 1 documentation",
70                 "parent": "Suite 1 parent",
71                 "level": "Level of the suite in the suite hierarchy"
72             }
73             "Suite long name N": {
74                 "name": Suite name,
75                 "doc": "Suite N documentation",
76                 "parent": "Suite 2 parent",
77                 "level": "Level of the suite in the suite hierarchy"
78             }
79         }
80         "tests": {
81             # NDRPDR tests:
82             "ID": {
83                 "name": "Test name",
84                 "parent": "Name of the parent of the test",
85                 "doc": "Test documentation",
86                 "msg": "Test message",
87                 "conf-history": "DUT1 and DUT2 VAT History",
88                 "show-run": "Show Run",
89                 "tags": ["tag 1", "tag 2", "tag n"],
90                 "type": "NDRPDR",
91                 "status": "PASS" | "FAIL",
92                 "throughput": {
93                     "NDR": {
94                         "LOWER": float,
95                         "UPPER": float
96                     },
97                     "PDR": {
98                         "LOWER": float,
99                         "UPPER": float
100                     }
101                 },
102                 "latency": {
103                     "NDR": {
104                         "direction1": {
105                             "min": float,
106                             "avg": float,
107                             "max": float,
108                             "hdrh": str
109                         },
110                         "direction2": {
111                             "min": float,
112                             "avg": float,
113                             "max": float,
114                             "hdrh": str
115                         }
116                     },
117                     "PDR": {
118                         "direction1": {
119                             "min": float,
120                             "avg": float,
121                             "max": float,
122                             "hdrh": str
123                         },
124                         "direction2": {
125                             "min": float,
126                             "avg": float,
127                             "max": float,
128                             "hdrh": str
129                         }
130                     }
131                 }
132             }
133
134             # TCP tests:
135             "ID": {
136                 "name": "Test name",
137                 "parent": "Name of the parent of the test",
138                 "doc": "Test documentation",
139                 "msg": "Test message",
140                 "tags": ["tag 1", "tag 2", "tag n"],
141                 "type": "TCP",
142                 "status": "PASS" | "FAIL",
143                 "result": int
144             }
145
146             # MRR, BMRR tests:
147             "ID": {
148                 "name": "Test name",
149                 "parent": "Name of the parent of the test",
150                 "doc": "Test documentation",
151                 "msg": "Test message",
152                 "tags": ["tag 1", "tag 2", "tag n"],
153                 "type": "MRR" | "BMRR",
154                 "status": "PASS" | "FAIL",
155                 "result": {
156                     "receive-rate": float,
157                     # Average of a list, computed using AvgStdevStats.
158                     # In CSIT-1180, replace with List[float].
159                 }
160             }
161
162             "ID" {
163                 # next test
164             }
165         }
166     }
167
168
169     Functional tests:
170
171     {
172         "metadata": {  # Optional
173             "version": "VPP version",
174             "job": "Jenkins job name",
175             "build": "Information about the build"
176         },
177         "suites": {
178             "Suite name 1": {
179                 "doc": "Suite 1 documentation",
180                 "parent": "Suite 1 parent",
181                 "level": "Level of the suite in the suite hierarchy"
182             }
183             "Suite name N": {
184                 "doc": "Suite N documentation",
185                 "parent": "Suite 2 parent",
186                 "level": "Level of the suite in the suite hierarchy"
187             }
188         }
189         "tests": {
190             "ID": {
191                 "name": "Test name",
192                 "parent": "Name of the parent of the test",
193                 "doc": "Test documentation"
194                 "msg": "Test message"
195                 "tags": ["tag 1", "tag 2", "tag n"],
196                 "conf-history": "DUT1 and DUT2 VAT History"
197                 "show-run": "Show Run"
198                 "status": "PASS" | "FAIL"
199             },
200             "ID" {
201                 # next test
202             }
203         }
204     }
205
206     .. note:: ID is the lowercase full path to the test.
207     """
208
209     REGEX_PLR_RATE = re.compile(
210         r'PLRsearch lower bound::?\s(\d+.\d+).*\n'
211         r'PLRsearch upper bound::?\s(\d+.\d+)'
212     )
213     REGEX_NDRPDR_RATE = re.compile(
214         r'NDR_LOWER:\s(\d+.\d+).*\n.*\n'
215         r'NDR_UPPER:\s(\d+.\d+).*\n'
216         r'PDR_LOWER:\s(\d+.\d+).*\n.*\n'
217         r'PDR_UPPER:\s(\d+.\d+)'
218     )
219     REGEX_NDRPDR_GBPS = re.compile(
220         r'NDR_LOWER:.*,\s(\d+.\d+).*\n.*\n'
221         r'NDR_UPPER:.*,\s(\d+.\d+).*\n'
222         r'PDR_LOWER:.*,\s(\d+.\d+).*\n.*\n'
223         r'PDR_UPPER:.*,\s(\d+.\d+)'
224     )
225     REGEX_PERF_MSG_INFO = re.compile(
226         r'NDR_LOWER:\s(\d+.\d+)\s.*\s(\d+.\d+)\s.*\n.*\n.*\n'
227         r'PDR_LOWER:\s(\d+.\d+)\s.*\s(\d+.\d+)\s.*\n.*\n.*\n'
228         r'Latency at 90% PDR:.*\[\'(.*)\', \'(.*)\'\].*\n'
229         r'Latency at 50% PDR:.*\[\'(.*)\', \'(.*)\'\].*\n'
230         r'Latency at 10% PDR:.*\[\'(.*)\', \'(.*)\'\].*\n'
231     )
232     REGEX_CPS_MSG_INFO = re.compile(
233         r'NDR_LOWER:\s(\d+.\d+)\s.*\s.*\n.*\n.*\n'
234         r'PDR_LOWER:\s(\d+.\d+)\s.*\s.*\n.*\n.*'
235     )
236     REGEX_PPS_MSG_INFO = re.compile(
237         r'NDR_LOWER:\s(\d+.\d+)\s.*\s(\d+.\d+)\s.*\n.*\n.*\n'
238         r'PDR_LOWER:\s(\d+.\d+)\s.*\s(\d+.\d+)\s.*\n.*\n.*'
239     )
240     REGEX_MRR_MSG_INFO = re.compile(r'.*\[(.*)\]')
241
242     REGEX_VSAP_MSG_INFO = re.compile(
243         r'Transfer Rate: (\d*.\d*).*\n'
244         r'Latency: (\d*.\d*).*\n'
245         r'Completed requests: (\d*).*\n'
246         r'Failed requests: (\d*).*\n'
247         r'Total data transferred: (\d*).*\n'
248         r'Connection [cr]ps rate:\s*(\d*.\d*)'
249     )
250
251     # Needed for CPS and PPS tests
252     REGEX_NDRPDR_LAT_BASE = re.compile(
253         r'LATENCY.*\[\'(.*)\', \'(.*)\'\]\s\n.*\n.*\n'
254         r'LATENCY.*\[\'(.*)\', \'(.*)\'\]'
255     )
256     REGEX_NDRPDR_LAT = re.compile(
257         r'LATENCY.*\[\'(.*)\', \'(.*)\'\]\s\n.*\n.*\n'
258         r'LATENCY.*\[\'(.*)\', \'(.*)\'\]\s\n.*\n'
259         r'Latency.*\[\'(.*)\', \'(.*)\'\]\s\n'
260         r'Latency.*\[\'(.*)\', \'(.*)\'\]\s\n'
261         r'Latency.*\[\'(.*)\', \'(.*)\'\]\s\n'
262         r'Latency.*\[\'(.*)\', \'(.*)\'\]'
263     )
264
265     REGEX_VERSION_VPP = re.compile(
266         r"(return STDOUT Version:\s*|"
267         r"VPP Version:\s*|VPP version:\s*)(.*)"
268     )
269     REGEX_VERSION_DPDK = re.compile(
270         r"(DPDK version:\s*|DPDK Version:\s*)(.*)"
271     )
272     REGEX_TCP = re.compile(
273         r'Total\s(rps|cps|throughput):\s(\d*).*$'
274     )
275     REGEX_MRR = re.compile(
276         r'MaxReceivedRate_Results\s\[pkts/(\d*)sec\]:\s'
277         r'tx\s(\d*),\srx\s(\d*)'
278     )
279     REGEX_BMRR = re.compile(
280         r'.*trial results.*: \[(.*)\]'
281     )
282     REGEX_RECONF_LOSS = re.compile(
283         r'Packets lost due to reconfig: (\d*)'
284     )
285     REGEX_RECONF_TIME = re.compile(
286         r'Implied time lost: (\d*.[\de-]*)'
287     )
288     REGEX_TC_TAG = re.compile(r'\d+[tT]\d+[cC]')
289
290     REGEX_TC_NAME_OLD = re.compile(r'-\d+[tT]\d+[cC]-')
291
292     REGEX_TC_NAME_NEW = re.compile(r'-\d+[cC]-')
293
294     REGEX_TC_NUMBER = re.compile(r'tc\d{2}-')
295
296     REGEX_TC_PAPI_CLI = re.compile(r'.*\((\d+.\d+.\d+.\d+.) - (.*)\)')
297
298     def __init__(self, metadata, mapping, ignore, for_output):
299         """Initialisation.
300
301         :param metadata: Key-value pairs to be included in "metadata" part of
302             JSON structure.
303         :param mapping: Mapping of the old names of test cases to the new
304             (actual) one.
305         :param ignore: List of TCs to be ignored.
306         :param for_output: Output to be generated from downloaded data.
307         :type metadata: dict
308         :type mapping: dict
309         :type ignore: list
310         :type for_output: str
311         """
312
313         # Type of message to parse out from the test messages
314         self._msg_type = None
315
316         # VPP version
317         self._version = None
318
319         # Timestamp
320         self._timestamp = None
321
322         # Testbed. The testbed is identified by TG node IP address.
323         self._testbed = None
324
325         # Mapping of TCs long names
326         self._mapping = mapping
327
328         # Ignore list
329         self._ignore = ignore
330
331         self._for_output = for_output
332
333         # Number of PAPI History messages found:
334         # 0 - no message
335         # 1 - PAPI History of DUT1
336         # 2 - PAPI History of DUT2
337         self._conf_history_lookup_nr = 0
338
339         self._sh_run_counter = 0
340
341         # Test ID of currently processed test- the lowercase full path to the
342         # test
343         self._test_id = None
344
345         # The main data structure
346         self._data = {
347             u"metadata": OrderedDict(),
348             u"suites": OrderedDict(),
349             u"tests": OrderedDict()
350         }
351
352         # Save the provided metadata
353         for key, val in metadata.items():
354             self._data[u"metadata"][key] = val
355
356         # Dictionary defining the methods used to parse different types of
357         # messages
358         self.parse_msg = {
359             u"timestamp": self._get_timestamp,
360             u"vpp-version": self._get_vpp_version,
361             u"dpdk-version": self._get_dpdk_version,
362             u"teardown-papi-history": self._get_papi_history,
363             u"test-show-runtime": self._get_show_run,
364             u"testbed": self._get_testbed
365         }
366
367     @property
368     def data(self):
369         """Getter - Data parsed from the XML file.
370
371         :returns: Data parsed from the XML file.
372         :rtype: dict
373         """
374         return self._data
375
376     def _get_data_from_mrr_test_msg(self, msg):
377         """Get info from message of MRR performance tests.
378
379         :param msg: Message to be processed.
380         :type msg: str
381         :returns: Processed message or original message if a problem occurs.
382         :rtype: str
383         """
384
385         groups = re.search(self.REGEX_MRR_MSG_INFO, msg)
386         if not groups or groups.lastindex != 1:
387             return u"Test Failed."
388
389         try:
390             data = groups.group(1).split(u", ")
391         except (AttributeError, IndexError, ValueError, KeyError):
392             return u"Test Failed."
393
394         out_str = u"["
395         try:
396             for item in data:
397                 out_str += f"{(float(item) / 1e6):.2f}, "
398             return out_str[:-2] + u"]"
399         except (AttributeError, IndexError, ValueError, KeyError):
400             return u"Test Failed."
401
402     def _get_data_from_cps_test_msg(self, msg):
403         """Get info from message of NDRPDR CPS tests.
404
405         :param msg: Message to be processed.
406         :type msg: str
407         :returns: Processed message or "Test Failed." if a problem occurs.
408         :rtype: str
409         """
410
411         groups = re.search(self.REGEX_CPS_MSG_INFO, msg)
412         if not groups or groups.lastindex != 2:
413             return u"Test Failed."
414
415         try:
416             return (
417                 f"1. {(float(groups.group(1)) / 1e6):5.2f}\n"
418                 f"2. {(float(groups.group(2)) / 1e6):5.2f}"
419             )
420         except (AttributeError, IndexError, ValueError, KeyError):
421             return u"Test Failed."
422
423     def _get_data_from_pps_test_msg(self, msg):
424         """Get info from message of NDRPDR PPS tests.
425
426         :param msg: Message to be processed.
427         :type msg: str
428         :returns: Processed message or "Test Failed." if a problem occurs.
429         :rtype: str
430         """
431
432         groups = re.search(self.REGEX_PPS_MSG_INFO, msg)
433         if not groups or groups.lastindex != 4:
434             return u"Test Failed."
435
436         try:
437             return (
438                 f"1. {(float(groups.group(1)) / 1e6):5.2f}      "
439                 f"{float(groups.group(2)):5.2f}\n"
440                 f"2. {(float(groups.group(3)) / 1e6):5.2f}      "
441                 f"{float(groups.group(4)):5.2f}"
442             )
443         except (AttributeError, IndexError, ValueError, KeyError):
444             return u"Test Failed."
445
446     def _get_data_from_perf_test_msg(self, msg):
447         """Get info from message of NDRPDR performance tests.
448
449         :param msg: Message to be processed.
450         :type msg: str
451         :returns: Processed message or "Test Failed." if a problem occurs.
452         :rtype: str
453         """
454
455         groups = re.search(self.REGEX_PERF_MSG_INFO, msg)
456         if not groups or groups.lastindex != 10:
457             return u"Test Failed."
458
459         try:
460             data = {
461                 u"ndr_low": float(groups.group(1)),
462                 u"ndr_low_b": float(groups.group(2)),
463                 u"pdr_low": float(groups.group(3)),
464                 u"pdr_low_b": float(groups.group(4)),
465                 u"pdr_lat_90_1": groups.group(5),
466                 u"pdr_lat_90_2": groups.group(6),
467                 u"pdr_lat_50_1": groups.group(7),
468                 u"pdr_lat_50_2": groups.group(8),
469                 u"pdr_lat_10_1": groups.group(9),
470                 u"pdr_lat_10_2": groups.group(10),
471             }
472         except (AttributeError, IndexError, ValueError, KeyError):
473             return u"Test Failed."
474
475         def _process_lat(in_str_1, in_str_2):
476             """Extract min, avg, max values from latency string.
477
478             :param in_str_1: Latency string for one direction produced by robot
479                 framework.
480             :param in_str_2: Latency string for second direction produced by
481                 robot framework.
482             :type in_str_1: str
483             :type in_str_2: str
484             :returns: Processed latency string or None if a problem occurs.
485             :rtype: tuple
486             """
487             in_list_1 = in_str_1.split('/', 3)
488             in_list_2 = in_str_2.split('/', 3)
489
490             if len(in_list_1) != 4 and len(in_list_2) != 4:
491                 return None
492
493             in_list_1[3] += u"=" * (len(in_list_1[3]) % 4)
494             try:
495                 hdr_lat_1 = hdrh.histogram.HdrHistogram.decode(in_list_1[3])
496             except hdrh.codec.HdrLengthException:
497                 return None
498
499             in_list_2[3] += u"=" * (len(in_list_2[3]) % 4)
500             try:
501                 hdr_lat_2 = hdrh.histogram.HdrHistogram.decode(in_list_2[3])
502             except hdrh.codec.HdrLengthException:
503                 return None
504
505             if hdr_lat_1 and hdr_lat_2:
506                 hdr_lat = (
507                     hdr_lat_1.get_value_at_percentile(50.0),
508                     hdr_lat_1.get_value_at_percentile(90.0),
509                     hdr_lat_1.get_value_at_percentile(99.0),
510                     hdr_lat_2.get_value_at_percentile(50.0),
511                     hdr_lat_2.get_value_at_percentile(90.0),
512                     hdr_lat_2.get_value_at_percentile(99.0)
513                 )
514
515                 if all(hdr_lat):
516                     return hdr_lat
517
518             return None
519
520         try:
521             out_msg = (
522                 f"1. {(data[u'ndr_low'] / 1e6):5.2f}      "
523                 f"{data[u'ndr_low_b']:5.2f}"
524                 f"\n2. {(data[u'pdr_low'] / 1e6):5.2f}      "
525                 f"{data[u'pdr_low_b']:5.2f}"
526             )
527             latency = (
528                 _process_lat(data[u'pdr_lat_10_1'], data[u'pdr_lat_10_2']),
529                 _process_lat(data[u'pdr_lat_50_1'], data[u'pdr_lat_50_2']),
530                 _process_lat(data[u'pdr_lat_90_1'], data[u'pdr_lat_90_2'])
531             )
532             if all(latency):
533                 max_len = len(str(max((max(item) for item in latency))))
534                 max_len = 4 if max_len < 4 else max_len
535
536                 for idx, lat in enumerate(latency):
537                     if not idx:
538                         out_msg += u"\n"
539                     out_msg += (
540                         f"\n{idx + 3}. "
541                         f"{lat[0]:{max_len}d} "
542                         f"{lat[1]:{max_len}d} "
543                         f"{lat[2]:{max_len}d}      "
544                         f"{lat[3]:{max_len}d} "
545                         f"{lat[4]:{max_len}d} "
546                         f"{lat[5]:{max_len}d} "
547                     )
548
549             return out_msg
550
551         except (AttributeError, IndexError, ValueError, KeyError):
552             return u"Test Failed."
553
554     def _get_testbed(self, msg):
555         """Called when extraction of testbed IP is required.
556         The testbed is identified by TG node IP address.
557
558         :param msg: Message to process.
559         :type msg: Message
560         :returns: Nothing.
561         """
562
563         if msg.message.count(u"Setup of TG node") or \
564                 msg.message.count(u"Setup of node TG host"):
565             reg_tg_ip = re.compile(
566                 r'.*TG .* (\d{1,3}.\d{1,3}.\d{1,3}.\d{1,3}).*')
567             try:
568                 self._testbed = str(re.search(reg_tg_ip, msg.message).group(1))
569             except (KeyError, ValueError, IndexError, AttributeError):
570                 pass
571             finally:
572                 self._data[u"metadata"][u"testbed"] = self._testbed
573                 self._msg_type = None
574
575     def _get_vpp_version(self, msg):
576         """Called when extraction of VPP version is required.
577
578         :param msg: Message to process.
579         :type msg: Message
580         :returns: Nothing.
581         """
582
583         if msg.message.count(u"return STDOUT Version:") or \
584                 msg.message.count(u"VPP Version:") or \
585                 msg.message.count(u"VPP version:"):
586             self._version = str(
587                 re.search(self.REGEX_VERSION_VPP, msg.message).group(2)
588             )
589             self._data[u"metadata"][u"version"] = self._version
590             self._msg_type = None
591
592     def _get_dpdk_version(self, msg):
593         """Called when extraction of DPDK version is required.
594
595         :param msg: Message to process.
596         :type msg: Message
597         :returns: Nothing.
598         """
599
600         if msg.message.count(u"DPDK Version:"):
601             try:
602                 self._version = str(re.search(
603                     self.REGEX_VERSION_DPDK, msg.message).group(2))
604                 self._data[u"metadata"][u"version"] = self._version
605             except IndexError:
606                 pass
607             finally:
608                 self._msg_type = None
609
610     def _get_timestamp(self, msg):
611         """Called when extraction of timestamp is required.
612
613         :param msg: Message to process.
614         :type msg: Message
615         :returns: Nothing.
616         """
617
618         self._timestamp = msg.timestamp[:14]
619         self._data[u"metadata"][u"generated"] = self._timestamp
620         self._msg_type = None
621
622     def _get_papi_history(self, msg):
623         """Called when extraction of PAPI command history is required.
624
625         :param msg: Message to process.
626         :type msg: Message
627         :returns: Nothing.
628         """
629         if msg.message.count(u"PAPI command history:"):
630             self._conf_history_lookup_nr += 1
631             if self._conf_history_lookup_nr == 1:
632                 self._data[u"tests"][self._test_id][u"conf-history"] = str()
633             else:
634                 self._msg_type = None
635             text = re.sub(
636                 r"\d{1,3}.\d{1,3}.\d{1,3}.\d{1,3} PAPI command history:",
637                 u"",
638                 msg.message,
639                 count=1
640             ).replace(u'"', u"'")
641             self._data[u"tests"][self._test_id][u"conf-history"] += (
642                 f"**DUT{str(self._conf_history_lookup_nr)}:** {text}"
643             )
644
645     def _get_show_run(self, msg):
646         """Called when extraction of VPP operational data (output of CLI command
647         Show Runtime) is required.
648
649         :param msg: Message to process.
650         :type msg: Message
651         :returns: Nothing.
652         """
653
654         if not msg.message.count(u"stats runtime"):
655             return
656
657         # Temporary solution
658         if self._sh_run_counter > 1:
659             return
660
661         if u"show-run" not in self._data[u"tests"][self._test_id].keys():
662             self._data[u"tests"][self._test_id][u"show-run"] = dict()
663
664         groups = re.search(self.REGEX_TC_PAPI_CLI, msg.message)
665         if not groups:
666             return
667         try:
668             host = groups.group(1)
669         except (AttributeError, IndexError):
670             host = u""
671         try:
672             sock = groups.group(2)
673         except (AttributeError, IndexError):
674             sock = u""
675
676         dut = u"dut{nr}".format(
677             nr=len(self._data[u'tests'][self._test_id][u'show-run'].keys()) + 1)
678
679         self._data[u'tests'][self._test_id][u'show-run'][dut] = \
680             copy.copy(
681                 {
682                     u"host": host,
683                     u"socket": sock,
684                     u"runtime": str(msg.message).replace(u' ', u'').
685                                 replace(u'\n', u'').replace(u"'", u'"').
686                                 replace(u'b"', u'"').replace(u'u"', u'"').
687                                 split(u":", 1)[1]
688                 }
689             )
690
691     def _get_ndrpdr_throughput(self, msg):
692         """Get NDR_LOWER, NDR_UPPER, PDR_LOWER and PDR_UPPER from the test
693         message.
694
695         :param msg: The test message to be parsed.
696         :type msg: str
697         :returns: Parsed data as a dict and the status (PASS/FAIL).
698         :rtype: tuple(dict, str)
699         """
700
701         throughput = {
702             u"NDR": {u"LOWER": -1.0, u"UPPER": -1.0},
703             u"PDR": {u"LOWER": -1.0, u"UPPER": -1.0}
704         }
705         status = u"FAIL"
706         groups = re.search(self.REGEX_NDRPDR_RATE, msg)
707
708         if groups is not None:
709             try:
710                 throughput[u"NDR"][u"LOWER"] = float(groups.group(1))
711                 throughput[u"NDR"][u"UPPER"] = float(groups.group(2))
712                 throughput[u"PDR"][u"LOWER"] = float(groups.group(3))
713                 throughput[u"PDR"][u"UPPER"] = float(groups.group(4))
714                 status = u"PASS"
715             except (IndexError, ValueError):
716                 pass
717
718         return throughput, status
719
720     def _get_ndrpdr_throughput_gbps(self, msg):
721         """Get NDR_LOWER, NDR_UPPER, PDR_LOWER and PDR_UPPER in Gbps from the
722         test message.
723
724         :param msg: The test message to be parsed.
725         :type msg: str
726         :returns: Parsed data as a dict and the status (PASS/FAIL).
727         :rtype: tuple(dict, str)
728         """
729
730         gbps = {
731             u"NDR": {u"LOWER": -1.0, u"UPPER": -1.0},
732             u"PDR": {u"LOWER": -1.0, u"UPPER": -1.0}
733         }
734         status = u"FAIL"
735         groups = re.search(self.REGEX_NDRPDR_GBPS, msg)
736
737         if groups is not None:
738             try:
739                 gbps[u"NDR"][u"LOWER"] = float(groups.group(1))
740                 gbps[u"NDR"][u"UPPER"] = float(groups.group(2))
741                 gbps[u"PDR"][u"LOWER"] = float(groups.group(3))
742                 gbps[u"PDR"][u"UPPER"] = float(groups.group(4))
743                 status = u"PASS"
744             except (IndexError, ValueError):
745                 pass
746
747         return gbps, status
748
749     def _get_plr_throughput(self, msg):
750         """Get PLRsearch lower bound and PLRsearch upper bound from the test
751         message.
752
753         :param msg: The test message to be parsed.
754         :type msg: str
755         :returns: Parsed data as a dict and the status (PASS/FAIL).
756         :rtype: tuple(dict, str)
757         """
758
759         throughput = {
760             u"LOWER": -1.0,
761             u"UPPER": -1.0
762         }
763         status = u"FAIL"
764         groups = re.search(self.REGEX_PLR_RATE, msg)
765
766         if groups is not None:
767             try:
768                 throughput[u"LOWER"] = float(groups.group(1))
769                 throughput[u"UPPER"] = float(groups.group(2))
770                 status = u"PASS"
771             except (IndexError, ValueError):
772                 pass
773
774         return throughput, status
775
776     def _get_ndrpdr_latency(self, msg):
777         """Get LATENCY from the test message.
778
779         :param msg: The test message to be parsed.
780         :type msg: str
781         :returns: Parsed data as a dict and the status (PASS/FAIL).
782         :rtype: tuple(dict, str)
783         """
784         latency_default = {
785             u"min": -1.0,
786             u"avg": -1.0,
787             u"max": -1.0,
788             u"hdrh": u""
789         }
790         latency = {
791             u"NDR": {
792                 u"direction1": copy.copy(latency_default),
793                 u"direction2": copy.copy(latency_default)
794             },
795             u"PDR": {
796                 u"direction1": copy.copy(latency_default),
797                 u"direction2": copy.copy(latency_default)
798             },
799             u"LAT0": {
800                 u"direction1": copy.copy(latency_default),
801                 u"direction2": copy.copy(latency_default)
802             },
803             u"PDR10": {
804                 u"direction1": copy.copy(latency_default),
805                 u"direction2": copy.copy(latency_default)
806             },
807             u"PDR50": {
808                 u"direction1": copy.copy(latency_default),
809                 u"direction2": copy.copy(latency_default)
810             },
811             u"PDR90": {
812                 u"direction1": copy.copy(latency_default),
813                 u"direction2": copy.copy(latency_default)
814             },
815         }
816
817         groups = re.search(self.REGEX_NDRPDR_LAT, msg)
818         if groups is None:
819             groups = re.search(self.REGEX_NDRPDR_LAT_BASE, msg)
820         if groups is None:
821             return latency, u"FAIL"
822
823         def process_latency(in_str):
824             """Return object with parsed latency values.
825
826             TODO: Define class for the return type.
827
828             :param in_str: Input string, min/avg/max/hdrh format.
829             :type in_str: str
830             :returns: Dict with corresponding keys, except hdrh float values.
831             :rtype dict:
832             :throws IndexError: If in_str does not have enough substrings.
833             :throws ValueError: If a substring does not convert to float.
834             """
835             in_list = in_str.split('/', 3)
836
837             rval = {
838                 u"min": float(in_list[0]),
839                 u"avg": float(in_list[1]),
840                 u"max": float(in_list[2]),
841                 u"hdrh": u""
842             }
843
844             if len(in_list) == 4:
845                 rval[u"hdrh"] = str(in_list[3])
846
847             return rval
848
849         try:
850             latency[u"NDR"][u"direction1"] = process_latency(groups.group(1))
851             latency[u"NDR"][u"direction2"] = process_latency(groups.group(2))
852             latency[u"PDR"][u"direction1"] = process_latency(groups.group(3))
853             latency[u"PDR"][u"direction2"] = process_latency(groups.group(4))
854             if groups.lastindex == 4:
855                 return latency, u"PASS"
856         except (IndexError, ValueError):
857             pass
858
859         try:
860             latency[u"PDR90"][u"direction1"] = process_latency(groups.group(5))
861             latency[u"PDR90"][u"direction2"] = process_latency(groups.group(6))
862             latency[u"PDR50"][u"direction1"] = process_latency(groups.group(7))
863             latency[u"PDR50"][u"direction2"] = process_latency(groups.group(8))
864             latency[u"PDR10"][u"direction1"] = process_latency(groups.group(9))
865             latency[u"PDR10"][u"direction2"] = process_latency(groups.group(10))
866             latency[u"LAT0"][u"direction1"] = process_latency(groups.group(11))
867             latency[u"LAT0"][u"direction2"] = process_latency(groups.group(12))
868             if groups.lastindex == 12:
869                 return latency, u"PASS"
870         except (IndexError, ValueError):
871             pass
872
873         return latency, u"FAIL"
874
875     @staticmethod
876     def _get_hoststack_data(msg, tags):
877         """Get data from the hoststack test message.
878
879         :param msg: The test message to be parsed.
880         :param tags: Test tags.
881         :type msg: str
882         :type tags: list
883         :returns: Parsed data as a JSON dict and the status (PASS/FAIL).
884         :rtype: tuple(dict, str)
885         """
886         result = dict()
887         status = u"FAIL"
888
889         msg = msg.replace(u"'", u'"').replace(u" ", u"")
890         if u"LDPRELOAD" in tags:
891             try:
892                 result = loads(msg)
893                 status = u"PASS"
894             except JSONDecodeError:
895                 pass
896         elif u"VPPECHO" in tags:
897             try:
898                 msg_lst = msg.replace(u"}{", u"} {").split(u" ")
899                 result = dict(
900                     client=loads(msg_lst[0]),
901                     server=loads(msg_lst[1])
902                 )
903                 status = u"PASS"
904             except (JSONDecodeError, IndexError):
905                 pass
906
907         return result, status
908
909     def _get_vsap_data(self, msg, tags):
910         """Get data from the vsap test message.
911
912         :param msg: The test message to be parsed.
913         :param tags: Test tags.
914         :type msg: str
915         :type tags: list
916         :returns: Parsed data as a JSON dict and the status (PASS/FAIL).
917         :rtype: tuple(dict, str)
918         """
919         result = dict()
920         status = u"FAIL"
921
922         groups = re.search(self.REGEX_VSAP_MSG_INFO, msg)
923         if groups is not None:
924             try:
925                 result[u"transfer-rate"] = float(groups.group(1)) * 1e3
926                 result[u"latency"] = float(groups.group(2))
927                 result[u"completed-requests"] = int(groups.group(3))
928                 result[u"failed-requests"] = int(groups.group(4))
929                 result[u"bytes-transferred"] = int(groups.group(5))
930                 if u"TCP_CPS"in tags:
931                     result[u"cps"] = float(groups.group(6))
932                 elif u"TCP_RPS" in tags:
933                     result[u"rps"] = float(groups.group(6))
934                 else:
935                     return result, status
936                 status = u"PASS"
937             except (IndexError, ValueError):
938                 pass
939
940         return result, status
941
942     def visit_suite(self, suite):
943         """Implements traversing through the suite and its direct children.
944
945         :param suite: Suite to process.
946         :type suite: Suite
947         :returns: Nothing.
948         """
949         if self.start_suite(suite) is not False:
950             suite.suites.visit(self)
951             suite.tests.visit(self)
952             self.end_suite(suite)
953
954     def start_suite(self, suite):
955         """Called when suite starts.
956
957         :param suite: Suite to process.
958         :type suite: Suite
959         :returns: Nothing.
960         """
961
962         try:
963             parent_name = suite.parent.name
964         except AttributeError:
965             return
966
967         self._data[u"suites"][suite.longname.lower().
968                               replace(u'"', u"'").
969                               replace(u" ", u"_")] = {
970                                   u"name": suite.name.lower(),
971                                   u"doc": suite.doc,
972                                   u"parent": parent_name,
973                                   u"level": len(suite.longname.split(u"."))
974                               }
975
976         suite.keywords.visit(self)
977
978     def end_suite(self, suite):
979         """Called when suite ends.
980
981         :param suite: Suite to process.
982         :type suite: Suite
983         :returns: Nothing.
984         """
985
986     def visit_test(self, test):
987         """Implements traversing through the test.
988
989         :param test: Test to process.
990         :type test: Test
991         :returns: Nothing.
992         """
993         if self.start_test(test) is not False:
994             test.keywords.visit(self)
995             self.end_test(test)
996
997     def start_test(self, test):
998         """Called when test starts.
999
1000         :param test: Test to process.
1001         :type test: Test
1002         :returns: Nothing.
1003         """
1004
1005         self._sh_run_counter = 0
1006
1007         longname_orig = test.longname.lower()
1008
1009         # Check the ignore list
1010         if longname_orig in self._ignore:
1011             return
1012
1013         tags = [str(tag) for tag in test.tags]
1014         test_result = dict()
1015
1016         # Change the TC long name and name if defined in the mapping table
1017         longname = self._mapping.get(longname_orig, None)
1018         if longname is not None:
1019             name = longname.split(u'.')[-1]
1020             logging.debug(
1021                 f"{self._data[u'metadata']}\n{longname_orig}\n{longname}\n"
1022                 f"{name}"
1023             )
1024         else:
1025             longname = longname_orig
1026             name = test.name.lower()
1027
1028         # Remove TC number from the TC long name (backward compatibility):
1029         self._test_id = re.sub(self.REGEX_TC_NUMBER, u"", longname)
1030         # Remove TC number from the TC name (not needed):
1031         test_result[u"name"] = re.sub(self.REGEX_TC_NUMBER, "", name)
1032
1033         test_result[u"parent"] = test.parent.name.lower()
1034         test_result[u"tags"] = tags
1035         test_result["doc"] = test.doc
1036         test_result[u"type"] = u""
1037         test_result[u"status"] = test.status
1038         test_result[u"starttime"] = test.starttime
1039         test_result[u"endtime"] = test.endtime
1040
1041         if test.status == u"PASS":
1042             if u"NDRPDR" in tags:
1043                 if u"TCP_PPS" in tags or u"UDP_PPS" in tags:
1044                     test_result[u"msg"] = self._get_data_from_pps_test_msg(
1045                         test.message)
1046                 elif u"TCP_CPS" in tags or u"UDP_CPS" in tags:
1047                     test_result[u"msg"] = self._get_data_from_cps_test_msg(
1048                         test.message)
1049                 else:
1050                     test_result[u"msg"] = self._get_data_from_perf_test_msg(
1051                         test.message)
1052             elif u"MRR" in tags or u"FRMOBL" in tags or u"BMRR" in tags:
1053                 test_result[u"msg"] = self._get_data_from_mrr_test_msg(
1054                     test.message)
1055             else:
1056                 test_result[u"msg"] = test.message
1057         else:
1058             test_result[u"msg"] = test.message
1059
1060         if u"PERFTEST" in tags:
1061             # Replace info about cores (e.g. -1c-) with the info about threads
1062             # and cores (e.g. -1t1c-) in the long test case names and in the
1063             # test case names if necessary.
1064             groups = re.search(self.REGEX_TC_NAME_OLD, self._test_id)
1065             if not groups:
1066                 tag_count = 0
1067                 tag_tc = str()
1068                 for tag in test_result[u"tags"]:
1069                     groups = re.search(self.REGEX_TC_TAG, tag)
1070                     if groups:
1071                         tag_count += 1
1072                         tag_tc = tag
1073
1074                 if tag_count == 1:
1075                     self._test_id = re.sub(
1076                         self.REGEX_TC_NAME_NEW, f"-{tag_tc.lower()}-",
1077                         self._test_id, count=1
1078                     )
1079                     test_result[u"name"] = re.sub(
1080                         self.REGEX_TC_NAME_NEW, f"-{tag_tc.lower()}-",
1081                         test_result["name"], count=1
1082                     )
1083                 else:
1084                     test_result[u"status"] = u"FAIL"
1085                     self._data[u"tests"][self._test_id] = test_result
1086                     logging.debug(
1087                         f"The test {self._test_id} has no or more than one "
1088                         f"multi-threading tags.\n"
1089                         f"Tags: {test_result[u'tags']}"
1090                     )
1091                     return
1092
1093         if u"DEVICETEST" in tags:
1094             test_result[u"type"] = u"DEVICETEST"
1095         elif u"NDRPDR" in tags:
1096             if u"TCP_CPS" in tags or u"UDP_CPS" in tags:
1097                 test_result[u"type"] = u"CPS"
1098             else:
1099                 test_result[u"type"] = u"NDRPDR"
1100             if test.status == u"PASS":
1101                 test_result[u"throughput"], test_result[u"status"] = \
1102                     self._get_ndrpdr_throughput(test.message)
1103                 test_result[u"gbps"], test_result[u"status"] = \
1104                     self._get_ndrpdr_throughput_gbps(test.message)
1105                 test_result[u"latency"], test_result[u"status"] = \
1106                     self._get_ndrpdr_latency(test.message)
1107         elif u"MRR" in tags or u"FRMOBL" in tags or u"BMRR" in tags:
1108             if u"MRR" in tags:
1109                 test_result[u"type"] = u"MRR"
1110             else:
1111                 test_result[u"type"] = u"BMRR"
1112             if test.status == u"PASS":
1113                 test_result[u"result"] = dict()
1114                 groups = re.search(self.REGEX_BMRR, test.message)
1115                 if groups is not None:
1116                     items_str = groups.group(1)
1117                     items_float = [
1118                         float(item.strip().replace(u"'", u""))
1119                         for item in items_str.split(",")
1120                     ]
1121                     # Use whole list in CSIT-1180.
1122                     stats = jumpavg.AvgStdevStats.for_runs(items_float)
1123                     test_result[u"result"][u"samples"] = items_float
1124                     test_result[u"result"][u"receive-rate"] = stats.avg
1125                     test_result[u"result"][u"receive-stdev"] = stats.stdev
1126                 else:
1127                     groups = re.search(self.REGEX_MRR, test.message)
1128                     test_result[u"result"][u"receive-rate"] = \
1129                         float(groups.group(3)) / float(groups.group(1))
1130         elif u"SOAK" in tags:
1131             test_result[u"type"] = u"SOAK"
1132             if test.status == u"PASS":
1133                 test_result[u"throughput"], test_result[u"status"] = \
1134                     self._get_plr_throughput(test.message)
1135         elif u"HOSTSTACK" in tags:
1136             test_result[u"type"] = u"HOSTSTACK"
1137             if test.status == u"PASS":
1138                 test_result[u"result"], test_result[u"status"] = \
1139                     self._get_hoststack_data(test.message, tags)
1140         elif u"LDP_NGINX" in tags:
1141             test_result[u"type"] = u"LDP_NGINX"
1142             test_result[u"result"], test_result[u"status"] = \
1143                 self._get_vsap_data(test.message, tags)
1144         # elif u"TCP" in tags:  # This might be not used
1145         #     test_result[u"type"] = u"TCP"
1146         #     if test.status == u"PASS":
1147         #         groups = re.search(self.REGEX_TCP, test.message)
1148         #         test_result[u"result"] = int(groups.group(2))
1149         elif u"RECONF" in tags:
1150             test_result[u"type"] = u"RECONF"
1151             if test.status == u"PASS":
1152                 test_result[u"result"] = None
1153                 try:
1154                     grps_loss = re.search(self.REGEX_RECONF_LOSS, test.message)
1155                     grps_time = re.search(self.REGEX_RECONF_TIME, test.message)
1156                     test_result[u"result"] = {
1157                         u"loss": int(grps_loss.group(1)),
1158                         u"time": float(grps_time.group(1))
1159                     }
1160                 except (AttributeError, IndexError, ValueError, TypeError):
1161                     test_result[u"status"] = u"FAIL"
1162         else:
1163             test_result[u"status"] = u"FAIL"
1164
1165         self._data[u"tests"][self._test_id] = test_result
1166
1167     def end_test(self, test):
1168         """Called when test ends.
1169
1170         :param test: Test to process.
1171         :type test: Test
1172         :returns: Nothing.
1173         """
1174
1175     def visit_keyword(self, keyword):
1176         """Implements traversing through the keyword and its child keywords.
1177
1178         :param keyword: Keyword to process.
1179         :type keyword: Keyword
1180         :returns: Nothing.
1181         """
1182         if self.start_keyword(keyword) is not False:
1183             self.end_keyword(keyword)
1184
1185     def start_keyword(self, keyword):
1186         """Called when keyword starts. Default implementation does nothing.
1187
1188         :param keyword: Keyword to process.
1189         :type keyword: Keyword
1190         :returns: Nothing.
1191         """
1192         try:
1193             if keyword.type == u"setup":
1194                 self.visit_setup_kw(keyword)
1195             elif keyword.type == u"teardown":
1196                 self.visit_teardown_kw(keyword)
1197             else:
1198                 self.visit_test_kw(keyword)
1199         except AttributeError:
1200             pass
1201
1202     def end_keyword(self, keyword):
1203         """Called when keyword ends. Default implementation does nothing.
1204
1205         :param keyword: Keyword to process.
1206         :type keyword: Keyword
1207         :returns: Nothing.
1208         """
1209
1210     def visit_test_kw(self, test_kw):
1211         """Implements traversing through the test keyword and its child
1212         keywords.
1213
1214         :param test_kw: Keyword to process.
1215         :type test_kw: Keyword
1216         :returns: Nothing.
1217         """
1218         for keyword in test_kw.keywords:
1219             if self.start_test_kw(keyword) is not False:
1220                 self.visit_test_kw(keyword)
1221                 self.end_test_kw(keyword)
1222
1223     def start_test_kw(self, test_kw):
1224         """Called when test keyword starts. Default implementation does
1225         nothing.
1226
1227         :param test_kw: Keyword to process.
1228         :type test_kw: Keyword
1229         :returns: Nothing.
1230         """
1231         if ((self._for_output != u"trending") and
1232             (test_kw.name.count(u"Show Runtime On All Duts") or
1233              test_kw.name.count(u"Show Runtime Counters On All Duts") or
1234              test_kw.name.count(u"Vpp Show Runtime On All Duts"))):
1235             self._msg_type = u"test-show-runtime"
1236             self._sh_run_counter += 1
1237         else:
1238             return
1239         test_kw.messages.visit(self)
1240
1241     def end_test_kw(self, test_kw):
1242         """Called when keyword ends. Default implementation does nothing.
1243
1244         :param test_kw: Keyword to process.
1245         :type test_kw: Keyword
1246         :returns: Nothing.
1247         """
1248
1249     def visit_setup_kw(self, setup_kw):
1250         """Implements traversing through the teardown keyword and its child
1251         keywords.
1252
1253         :param setup_kw: Keyword to process.
1254         :type setup_kw: Keyword
1255         :returns: Nothing.
1256         """
1257         for keyword in setup_kw.keywords:
1258             if self.start_setup_kw(keyword) is not False:
1259                 self.visit_setup_kw(keyword)
1260                 self.end_setup_kw(keyword)
1261
1262     def start_setup_kw(self, setup_kw):
1263         """Called when teardown keyword starts. Default implementation does
1264         nothing.
1265
1266         :param setup_kw: Keyword to process.
1267         :type setup_kw: Keyword
1268         :returns: Nothing.
1269         """
1270         if setup_kw.name.count(u"Show Vpp Version On All Duts") \
1271                 and not self._version:
1272             self._msg_type = u"vpp-version"
1273         elif setup_kw.name.count(u"Install Dpdk Framework On All Duts") and \
1274                 not self._version:
1275             self._msg_type = u"dpdk-version"
1276         elif setup_kw.name.count(u"Set Global Variable") \
1277                 and not self._timestamp:
1278             self._msg_type = u"timestamp"
1279         elif setup_kw.name.count(u"Setup Framework") and not self._testbed:
1280             self._msg_type = u"testbed"
1281         else:
1282             return
1283         setup_kw.messages.visit(self)
1284
1285     def end_setup_kw(self, setup_kw):
1286         """Called when keyword ends. Default implementation does nothing.
1287
1288         :param setup_kw: Keyword to process.
1289         :type setup_kw: Keyword
1290         :returns: Nothing.
1291         """
1292
1293     def visit_teardown_kw(self, teardown_kw):
1294         """Implements traversing through the teardown keyword and its child
1295         keywords.
1296
1297         :param teardown_kw: Keyword to process.
1298         :type teardown_kw: Keyword
1299         :returns: Nothing.
1300         """
1301         for keyword in teardown_kw.keywords:
1302             if self.start_teardown_kw(keyword) is not False:
1303                 self.visit_teardown_kw(keyword)
1304                 self.end_teardown_kw(keyword)
1305
1306     def start_teardown_kw(self, teardown_kw):
1307         """Called when teardown keyword starts
1308
1309         :param teardown_kw: Keyword to process.
1310         :type teardown_kw: Keyword
1311         :returns: Nothing.
1312         """
1313         if teardown_kw.name.count(u"Show Papi History On All Duts"):
1314             self._conf_history_lookup_nr = 0
1315             self._msg_type = u"teardown-papi-history"
1316             teardown_kw.messages.visit(self)
1317
1318     def end_teardown_kw(self, teardown_kw):
1319         """Called when keyword ends. Default implementation does nothing.
1320
1321         :param teardown_kw: Keyword to process.
1322         :type teardown_kw: Keyword
1323         :returns: Nothing.
1324         """
1325
1326     def visit_message(self, msg):
1327         """Implements visiting the message.
1328
1329         :param msg: Message to process.
1330         :type msg: Message
1331         :returns: Nothing.
1332         """
1333         if self.start_message(msg) is not False:
1334             self.end_message(msg)
1335
1336     def start_message(self, msg):
1337         """Called when message starts. Get required information from messages:
1338         - VPP version.
1339
1340         :param msg: Message to process.
1341         :type msg: Message
1342         :returns: Nothing.
1343         """
1344         if self._msg_type:
1345             self.parse_msg[self._msg_type](msg)
1346
1347     def end_message(self, msg):
1348         """Called when message ends. Default implementation does nothing.
1349
1350         :param msg: Message to process.
1351         :type msg: Message
1352         :returns: Nothing.
1353         """
1354
1355
1356 class InputData:
1357     """Input data
1358
1359     The data is extracted from output.xml files generated by Jenkins jobs and
1360     stored in pandas' DataFrames.
1361
1362     The data structure:
1363     - job name
1364       - build number
1365         - metadata
1366           (as described in ExecutionChecker documentation)
1367         - suites
1368           (as described in ExecutionChecker documentation)
1369         - tests
1370           (as described in ExecutionChecker documentation)
1371     """
1372
1373     def __init__(self, spec, for_output):
1374         """Initialization.
1375
1376         :param spec: Specification.
1377         :param for_output: Output to be generated from downloaded data.
1378         :type spec: Specification
1379         :type for_output: str
1380         """
1381
1382         # Specification:
1383         self._cfg = spec
1384
1385         self._for_output = for_output
1386
1387         # Data store:
1388         self._input_data = pd.Series()
1389
1390     @property
1391     def data(self):
1392         """Getter - Input data.
1393
1394         :returns: Input data
1395         :rtype: pandas.Series
1396         """
1397         return self._input_data
1398
1399     def metadata(self, job, build):
1400         """Getter - metadata
1401
1402         :param job: Job which metadata we want.
1403         :param build: Build which metadata we want.
1404         :type job: str
1405         :type build: str
1406         :returns: Metadata
1407         :rtype: pandas.Series
1408         """
1409         return self.data[job][build][u"metadata"]
1410
1411     def suites(self, job, build):
1412         """Getter - suites
1413
1414         :param job: Job which suites we want.
1415         :param build: Build which suites we want.
1416         :type job: str
1417         :type build: str
1418         :returns: Suites.
1419         :rtype: pandas.Series
1420         """
1421         return self.data[job][str(build)][u"suites"]
1422
1423     def tests(self, job, build):
1424         """Getter - tests
1425
1426         :param job: Job which tests we want.
1427         :param build: Build which tests we want.
1428         :type job: str
1429         :type build: str
1430         :returns: Tests.
1431         :rtype: pandas.Series
1432         """
1433         return self.data[job][build][u"tests"]
1434
1435     def _parse_tests(self, job, build):
1436         """Process data from robot output.xml file and return JSON structured
1437         data.
1438
1439         :param job: The name of job which build output data will be processed.
1440         :param build: The build which output data will be processed.
1441         :type job: str
1442         :type build: dict
1443         :returns: JSON data structure.
1444         :rtype: dict
1445         """
1446
1447         metadata = {
1448             u"job": job,
1449             u"build": build
1450         }
1451
1452         with open(build[u"file-name"], u'r') as data_file:
1453             try:
1454                 result = ExecutionResult(data_file)
1455             except errors.DataError as err:
1456                 logging.error(
1457                     f"Error occurred while parsing output.xml: {repr(err)}"
1458                 )
1459                 return None
1460         checker = ExecutionChecker(
1461             metadata, self._cfg.mapping, self._cfg.ignore, self._for_output
1462         )
1463         result.visit(checker)
1464
1465         return checker.data
1466
1467     def _download_and_parse_build(self, job, build, repeat, pid=10000):
1468         """Download and parse the input data file.
1469
1470         :param pid: PID of the process executing this method.
1471         :param job: Name of the Jenkins job which generated the processed input
1472             file.
1473         :param build: Information about the Jenkins build which generated the
1474             processed input file.
1475         :param repeat: Repeat the download specified number of times if not
1476             successful.
1477         :type pid: int
1478         :type job: str
1479         :type build: dict
1480         :type repeat: int
1481         """
1482
1483         logging.info(f"Processing the job/build: {job}: {build[u'build']}")
1484
1485         state = u"failed"
1486         success = False
1487         data = None
1488         do_repeat = repeat
1489         while do_repeat:
1490             success = download_and_unzip_data_file(self._cfg, job, build, pid)
1491             if success:
1492                 break
1493             do_repeat -= 1
1494         if not success:
1495             logging.error(
1496                 f"It is not possible to download the input data file from the "
1497                 f"job {job}, build {build[u'build']}, or it is damaged. "
1498                 f"Skipped."
1499             )
1500         if success:
1501             logging.info(f"  Processing data from build {build[u'build']}")
1502             data = self._parse_tests(job, build)
1503             if data is None:
1504                 logging.error(
1505                     f"Input data file from the job {job}, build "
1506                     f"{build[u'build']} is damaged. Skipped."
1507                 )
1508             else:
1509                 state = u"processed"
1510
1511             try:
1512                 remove(build[u"file-name"])
1513             except OSError as err:
1514                 logging.error(
1515                     f"Cannot remove the file {build[u'file-name']}: {repr(err)}"
1516                 )
1517
1518         # If the time-period is defined in the specification file, remove all
1519         # files which are outside the time period.
1520         is_last = False
1521         timeperiod = self._cfg.environment.get(u"time-period", None)
1522         if timeperiod and data:
1523             now = dt.utcnow()
1524             timeperiod = timedelta(int(timeperiod))
1525             metadata = data.get(u"metadata", None)
1526             if metadata:
1527                 generated = metadata.get(u"generated", None)
1528                 if generated:
1529                     generated = dt.strptime(generated, u"%Y%m%d %H:%M")
1530                     if (now - generated) > timeperiod:
1531                         # Remove the data and the file:
1532                         state = u"removed"
1533                         data = None
1534                         is_last = True
1535                         logging.info(
1536                             f"  The build {job}/{build[u'build']} is "
1537                             f"outdated, will be removed."
1538                         )
1539         return {
1540             u"data": data,
1541             u"state": state,
1542             u"job": job,
1543             u"build": build,
1544             u"last": is_last
1545         }
1546
1547     def download_and_parse_data(self, repeat=1):
1548         """Download the input data files, parse input data from input files and
1549         store in pandas' Series.
1550
1551         :param repeat: Repeat the download specified number of times if not
1552             successful.
1553         :type repeat: int
1554         """
1555
1556         logging.info(u"Downloading and parsing input files ...")
1557
1558         for job, builds in self._cfg.input.items():
1559             for build in builds:
1560
1561                 result = self._download_and_parse_build(job, build, repeat)
1562                 if result[u"last"]:
1563                     break
1564                 build_nr = result[u"build"][u"build"]
1565
1566                 if result[u"data"]:
1567                     data = result[u"data"]
1568                     build_data = pd.Series({
1569                         u"metadata": pd.Series(
1570                             list(data[u"metadata"].values()),
1571                             index=list(data[u"metadata"].keys())
1572                         ),
1573                         u"suites": pd.Series(
1574                             list(data[u"suites"].values()),
1575                             index=list(data[u"suites"].keys())
1576                         ),
1577                         u"tests": pd.Series(
1578                             list(data[u"tests"].values()),
1579                             index=list(data[u"tests"].keys())
1580                         )
1581                     })
1582
1583                     if self._input_data.get(job, None) is None:
1584                         self._input_data[job] = pd.Series()
1585                     self._input_data[job][str(build_nr)] = build_data
1586                     self._cfg.set_input_file_name(
1587                         job, build_nr, result[u"build"][u"file-name"]
1588                     )
1589                 self._cfg.set_input_state(job, build_nr, result[u"state"])
1590
1591                 mem_alloc = \
1592                     resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1000
1593                 logging.info(f"Memory allocation: {mem_alloc:.0f}MB")
1594
1595         logging.info(u"Done.")
1596
1597         msg = f"Successful downloads from the sources:\n"
1598         for source in self._cfg.environment[u"data-sources"]:
1599             if source[u"successful-downloads"]:
1600                 msg += (
1601                     f"{source[u'url']}/{source[u'path']}/"
1602                     f"{source[u'file-name']}: "
1603                     f"{source[u'successful-downloads']}\n"
1604                 )
1605         logging.info(msg)
1606
1607     def process_local_file(self, local_file, job=u"local", build_nr=1,
1608                            replace=True):
1609         """Process local XML file given as a command-line parameter.
1610
1611         :param local_file: The file to process.
1612         :param job: Job name.
1613         :param build_nr: Build number.
1614         :param replace: If True, the information about jobs and builds is
1615             replaced by the new one, otherwise the new jobs and builds are
1616             added.
1617         :type local_file: str
1618         :type job: str
1619         :type build_nr: int
1620         :type replace: bool
1621         :raises: PresentationError if an error occurs.
1622         """
1623         if not isfile(local_file):
1624             raise PresentationError(f"The file {local_file} does not exist.")
1625
1626         try:
1627             build_nr = int(local_file.split(u"/")[-1].split(u".")[0])
1628         except (IndexError, ValueError):
1629             pass
1630
1631         build = {
1632             u"build": build_nr,
1633             u"status": u"failed",
1634             u"file-name": local_file
1635         }
1636         if replace:
1637             self._cfg.input = dict()
1638         self._cfg.add_build(job, build)
1639
1640         logging.info(f"Processing {job}: {build_nr:2d}: {local_file}")
1641         data = self._parse_tests(job, build)
1642         if data is None:
1643             raise PresentationError(
1644                 f"Error occurred while parsing the file {local_file}"
1645             )
1646
1647         build_data = pd.Series({
1648             u"metadata": pd.Series(
1649                 list(data[u"metadata"].values()),
1650                 index=list(data[u"metadata"].keys())
1651             ),
1652             u"suites": pd.Series(
1653                 list(data[u"suites"].values()),
1654                 index=list(data[u"suites"].keys())
1655             ),
1656             u"tests": pd.Series(
1657                 list(data[u"tests"].values()),
1658                 index=list(data[u"tests"].keys())
1659             )
1660         })
1661
1662         if self._input_data.get(job, None) is None:
1663             self._input_data[job] = pd.Series()
1664         self._input_data[job][str(build_nr)] = build_data
1665
1666         self._cfg.set_input_state(job, build_nr, u"processed")
1667
1668     def process_local_directory(self, local_dir, replace=True):
1669         """Process local directory with XML file(s). The directory is processed
1670         as a 'job' and the XML files in it as builds.
1671         If the given directory contains only sub-directories, these
1672         sub-directories processed as jobs and corresponding XML files as builds
1673         of their job.
1674
1675         :param local_dir: Local directory to process.
1676         :param replace: If True, the information about jobs and builds is
1677             replaced by the new one, otherwise the new jobs and builds are
1678             added.
1679         :type local_dir: str
1680         :type replace: bool
1681         """
1682         if not isdir(local_dir):
1683             raise PresentationError(
1684                 f"The directory {local_dir} does not exist."
1685             )
1686
1687         # Check if the given directory includes only files, or only directories
1688         _, dirnames, filenames = next(walk(local_dir))
1689
1690         if filenames and not dirnames:
1691             filenames.sort()
1692             # local_builds:
1693             # key: dir (job) name, value: list of file names (builds)
1694             local_builds = {
1695                 local_dir: [join(local_dir, name) for name in filenames]
1696             }
1697
1698         elif dirnames and not filenames:
1699             dirnames.sort()
1700             # local_builds:
1701             # key: dir (job) name, value: list of file names (builds)
1702             local_builds = dict()
1703             for dirname in dirnames:
1704                 builds = [
1705                     join(local_dir, dirname, name)
1706                     for name in listdir(join(local_dir, dirname))
1707                     if isfile(join(local_dir, dirname, name))
1708                 ]
1709                 if builds:
1710                     local_builds[dirname] = sorted(builds)
1711
1712         elif not filenames and not dirnames:
1713             raise PresentationError(f"The directory {local_dir} is empty.")
1714         else:
1715             raise PresentationError(
1716                 f"The directory {local_dir} can include only files or only "
1717                 f"directories, not both.\nThe directory {local_dir} includes "
1718                 f"file(s):\n{filenames}\nand directories:\n{dirnames}"
1719             )
1720
1721         if replace:
1722             self._cfg.input = dict()
1723
1724         for job, files in local_builds.items():
1725             for idx, local_file in enumerate(files):
1726                 self.process_local_file(local_file, job, idx + 1, replace=False)
1727
1728     @staticmethod
1729     def _end_of_tag(tag_filter, start=0, closer=u"'"):
1730         """Return the index of character in the string which is the end of tag.
1731
1732         :param tag_filter: The string where the end of tag is being searched.
1733         :param start: The index where the searching is stated.
1734         :param closer: The character which is the tag closer.
1735         :type tag_filter: str
1736         :type start: int
1737         :type closer: str
1738         :returns: The index of the tag closer.
1739         :rtype: int
1740         """
1741         try:
1742             idx_opener = tag_filter.index(closer, start)
1743             return tag_filter.index(closer, idx_opener + 1)
1744         except ValueError:
1745             return None
1746
1747     @staticmethod
1748     def _condition(tag_filter):
1749         """Create a conditional statement from the given tag filter.
1750
1751         :param tag_filter: Filter based on tags from the element specification.
1752         :type tag_filter: str
1753         :returns: Conditional statement which can be evaluated.
1754         :rtype: str
1755         """
1756         index = 0
1757         while True:
1758             index = InputData._end_of_tag(tag_filter, index)
1759             if index is None:
1760                 return tag_filter
1761             index += 1
1762             tag_filter = tag_filter[:index] + u" in tags" + tag_filter[index:]
1763
1764     def filter_data(self, element, params=None, data=None, data_set=u"tests",
1765                     continue_on_error=False):
1766         """Filter required data from the given jobs and builds.
1767
1768         The output data structure is:
1769         - job 1
1770           - build 1
1771             - test (or suite) 1 ID:
1772               - param 1
1773               - param 2
1774               ...
1775               - param n
1776             ...
1777             - test (or suite) n ID:
1778             ...
1779           ...
1780           - build n
1781         ...
1782         - job n
1783
1784         :param element: Element which will use the filtered data.
1785         :param params: Parameters which will be included in the output. If None,
1786             all parameters are included.
1787         :param data: If not None, this data is used instead of data specified
1788             in the element.
1789         :param data_set: The set of data to be filtered: tests, suites,
1790             metadata.
1791         :param continue_on_error: Continue if there is error while reading the
1792             data. The Item will be empty then
1793         :type element: pandas.Series
1794         :type params: list
1795         :type data: dict
1796         :type data_set: str
1797         :type continue_on_error: bool
1798         :returns: Filtered data.
1799         :rtype pandas.Series
1800         """
1801
1802         try:
1803             if data_set == "suites":
1804                 cond = u"True"
1805             elif element[u"filter"] in (u"all", u"template"):
1806                 cond = u"True"
1807             else:
1808                 cond = InputData._condition(element[u"filter"])
1809             logging.debug(f"   Filter: {cond}")
1810         except KeyError:
1811             logging.error(u"  No filter defined.")
1812             return None
1813
1814         if params is None:
1815             params = element.get(u"parameters", None)
1816             if params:
1817                 params.extend((u"type", u"status"))
1818
1819         data_to_filter = data if data else element[u"data"]
1820         data = pd.Series()
1821         try:
1822             for job, builds in data_to_filter.items():
1823                 data[job] = pd.Series()
1824                 for build in builds:
1825                     data[job][str(build)] = pd.Series()
1826                     try:
1827                         data_dict = dict(
1828                             self.data[job][str(build)][data_set].items())
1829                     except KeyError:
1830                         if continue_on_error:
1831                             continue
1832                         return None
1833
1834                     for test_id, test_data in data_dict.items():
1835                         if eval(cond, {u"tags": test_data.get(u"tags", u"")}):
1836                             data[job][str(build)][test_id] = pd.Series()
1837                             if params is None:
1838                                 for param, val in test_data.items():
1839                                     data[job][str(build)][test_id][param] = val
1840                             else:
1841                                 for param in params:
1842                                     try:
1843                                         data[job][str(build)][test_id][param] =\
1844                                             test_data[param]
1845                                     except KeyError:
1846                                         data[job][str(build)][test_id][param] =\
1847                                             u"No Data"
1848             return data
1849
1850         except (KeyError, IndexError, ValueError) as err:
1851             logging.error(
1852                 f"Missing mandatory parameter in the element specification: "
1853                 f"{repr(err)}"
1854             )
1855             return None
1856         except AttributeError as err:
1857             logging.error(repr(err))
1858             return None
1859         except SyntaxError as err:
1860             logging.error(
1861                 f"The filter {cond} is not correct. Check if all tags are "
1862                 f"enclosed by apostrophes.\n{repr(err)}"
1863             )
1864             return None
1865
1866     def filter_tests_by_name(self, element, params=None, data_set=u"tests",
1867                              continue_on_error=False):
1868         """Filter required data from the given jobs and builds.
1869
1870         The output data structure is:
1871         - job 1
1872           - build 1
1873             - test (or suite) 1 ID:
1874               - param 1
1875               - param 2
1876               ...
1877               - param n
1878             ...
1879             - test (or suite) n ID:
1880             ...
1881           ...
1882           - build n
1883         ...
1884         - job n
1885
1886         :param element: Element which will use the filtered data.
1887         :param params: Parameters which will be included in the output. If None,
1888         all parameters are included.
1889         :param data_set: The set of data to be filtered: tests, suites,
1890         metadata.
1891         :param continue_on_error: Continue if there is error while reading the
1892         data. The Item will be empty then
1893         :type element: pandas.Series
1894         :type params: list
1895         :type data_set: str
1896         :type continue_on_error: bool
1897         :returns: Filtered data.
1898         :rtype pandas.Series
1899         """
1900
1901         include = element.get(u"include", None)
1902         if not include:
1903             logging.warning(u"No tests to include, skipping the element.")
1904             return None
1905
1906         if params is None:
1907             params = element.get(u"parameters", None)
1908             if params and u"type" not in params:
1909                 params.append(u"type")
1910
1911         cores = element.get(u"core", None)
1912         if cores:
1913             tests = list()
1914             for core in cores:
1915                 for test in include:
1916                     tests.append(test.format(core=core))
1917         else:
1918             tests = include
1919
1920         data = pd.Series()
1921         try:
1922             for job, builds in element[u"data"].items():
1923                 data[job] = pd.Series()
1924                 for build in builds:
1925                     data[job][str(build)] = pd.Series()
1926                     for test in tests:
1927                         try:
1928                             reg_ex = re.compile(str(test).lower())
1929                             for test_id in self.data[job][
1930                                     str(build)][data_set].keys():
1931                                 if re.match(reg_ex, str(test_id).lower()):
1932                                     test_data = self.data[job][
1933                                         str(build)][data_set][test_id]
1934                                     data[job][str(build)][test_id] = pd.Series()
1935                                     if params is None:
1936                                         for param, val in test_data.items():
1937                                             data[job][str(build)][test_id]\
1938                                                 [param] = val
1939                                     else:
1940                                         for param in params:
1941                                             try:
1942                                                 data[job][str(build)][
1943                                                     test_id][param] = \
1944                                                     test_data[param]
1945                                             except KeyError:
1946                                                 data[job][str(build)][
1947                                                     test_id][param] = u"No Data"
1948                         except KeyError as err:
1949                             if continue_on_error:
1950                                 logging.debug(repr(err))
1951                                 continue
1952                             logging.error(repr(err))
1953                             return None
1954             return data
1955
1956         except (KeyError, IndexError, ValueError) as err:
1957             logging.error(
1958                 f"Missing mandatory parameter in the element "
1959                 f"specification: {repr(err)}"
1960             )
1961             return None
1962         except AttributeError as err:
1963             logging.error(repr(err))
1964             return None
1965
1966     @staticmethod
1967     def merge_data(data):
1968         """Merge data from more jobs and builds to a simple data structure.
1969
1970         The output data structure is:
1971
1972         - test (suite) 1 ID:
1973           - param 1
1974           - param 2
1975           ...
1976           - param n
1977         ...
1978         - test (suite) n ID:
1979         ...
1980
1981         :param data: Data to merge.
1982         :type data: pandas.Series
1983         :returns: Merged data.
1984         :rtype: pandas.Series
1985         """
1986
1987         logging.info(u"    Merging data ...")
1988
1989         merged_data = pd.Series()
1990         for builds in data.values:
1991             for item in builds.values:
1992                 for item_id, item_data in item.items():
1993                     merged_data[item_id] = item_data
1994         return merged_data
1995
1996     def print_all_oper_data(self):
1997         """Print all operational data to console.
1998         """
1999
2000         for job in self._input_data.values:
2001             for build in job.values:
2002                 for test_id, test_data in build[u"tests"].items():
2003                     print(f"{test_id}")
2004                     if test_data.get(u"show-run", None) is None:
2005                         continue
2006                     for dut_name, data in test_data[u"show-run"].items():
2007                         if data.get(u"runtime", None) is None:
2008                             continue
2009                         runtime = loads(data[u"runtime"])
2010                         try:
2011                             threads_nr = len(runtime[0][u"clocks"])
2012                         except (IndexError, KeyError):
2013                             continue
2014                         threads = OrderedDict(
2015                             {idx: list() for idx in range(threads_nr)})
2016                         for item in runtime:
2017                             for idx in range(threads_nr):
2018                                 if item[u"vectors"][idx] > 0:
2019                                     clocks = item[u"clocks"][idx] / \
2020                                              item[u"vectors"][idx]
2021                                 elif item[u"calls"][idx] > 0:
2022                                     clocks = item[u"clocks"][idx] / \
2023                                              item[u"calls"][idx]
2024                                 elif item[u"suspends"][idx] > 0:
2025                                     clocks = item[u"clocks"][idx] / \
2026                                              item[u"suspends"][idx]
2027                                 else:
2028                                     clocks = 0.0
2029
2030                                 if item[u"calls"][idx] > 0:
2031                                     vectors_call = item[u"vectors"][idx] / \
2032                                                    item[u"calls"][idx]
2033                                 else:
2034                                     vectors_call = 0.0
2035
2036                                 if int(item[u"calls"][idx]) + int(
2037                                     item[u"vectors"][idx]) + \
2038                                     int(item[u"suspends"][idx]):
2039                                     threads[idx].append([
2040                                         item[u"name"],
2041                                         item[u"calls"][idx],
2042                                         item[u"vectors"][idx],
2043                                         item[u"suspends"][idx],
2044                                         clocks,
2045                                         vectors_call
2046                                     ])
2047
2048                         print(f"Host IP: {data.get(u'host', '')}, "
2049                               f"Socket: {data.get(u'socket', '')}")
2050                         for thread_nr, thread in threads.items():
2051                             txt_table = prettytable.PrettyTable(
2052                                 (
2053                                     u"Name",
2054                                     u"Nr of Vectors",
2055                                     u"Nr of Packets",
2056                                     u"Suspends",
2057                                     u"Cycles per Packet",
2058                                     u"Average Vector Size"
2059                                 )
2060                             )
2061                             avg = 0.0
2062                             for row in thread:
2063                                 txt_table.add_row(row)
2064                                 avg += row[-1]
2065                             if len(thread) == 0:
2066                                 avg = u""
2067                             else:
2068                                 avg = f", Average Vector Size per Node: " \
2069                                       f"{(avg / len(thread)):.2f}"
2070                             th_name = u"main" if thread_nr == 0 \
2071                                 else f"worker_{thread_nr}"
2072                             print(f"{dut_name}, {th_name}{avg}")
2073                             txt_table.float_format = u".2"
2074                             txt_table.align = u"r"
2075                             txt_table.align[u"Name"] = u"l"
2076                             print(f"{txt_table.get_string()}\n")