JSON: Implement latest changes
[csit.git] / resources / tools / presentation / input_data_parser.py
1 # Copyright (c) 2021 Cisco and/or its affiliates.
2 # Licensed under the Apache License, Version 2.0 (the "License");
3 # you may not use this file except in compliance with the License.
4 # You may obtain a copy of the License at:
5 #
6 #     http://www.apache.org/licenses/LICENSE-2.0
7 #
8 # Unless required by applicable law or agreed to in writing, software
9 # distributed under the License is distributed on an "AS IS" BASIS,
10 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 # See the License for the specific language governing permissions and
12 # limitations under the License.
13
14 """Data pre-processing
15
16 - extract data from output.xml files generated by Jenkins jobs and store in
17   pandas' Series,
18 - provide access to the data.
19 - filter the data using tags,
20 """
21
22 import re
23 import copy
24 import resource
25 import logging
26
27 from collections import OrderedDict
28 from os import remove, walk, listdir
29 from os.path import isfile, isdir, join
30 from datetime import datetime as dt
31 from datetime import timedelta
32 from json import loads
33 from json.decoder import JSONDecodeError
34
35 import hdrh.histogram
36 import hdrh.codec
37 import prettytable
38 import pandas as pd
39
40 from robot.api import ExecutionResult, ResultVisitor
41 from robot import errors
42
43 from resources.libraries.python import jumpavg
44 from input_data_files import download_and_unzip_data_file
45 from pal_errors import PresentationError
46
47
48 # Separator used in file names
49 SEPARATOR = u"__"
50
51
52 class ExecutionChecker(ResultVisitor):
53     """Class to traverse through the test suite structure.
54
55     The functionality implemented in this class generates a json structure:
56
57     Performance tests:
58
59     {
60         "metadata": {
61             "generated": "Timestamp",
62             "version": "SUT version",
63             "job": "Jenkins job name",
64             "build": "Information about the build"
65         },
66         "suites": {
67             "Suite long name 1": {
68                 "name": Suite name,
69                 "doc": "Suite 1 documentation",
70                 "parent": "Suite 1 parent",
71                 "level": "Level of the suite in the suite hierarchy"
72             }
73             "Suite long name N": {
74                 "name": Suite name,
75                 "doc": "Suite N documentation",
76                 "parent": "Suite 2 parent",
77                 "level": "Level of the suite in the suite hierarchy"
78             }
79         }
80         "tests": {
81             # NDRPDR tests:
82             "ID": {
83                 "name": "Test name",
84                 "parent": "Name of the parent of the test",
85                 "doc": "Test documentation",
86                 "msg": "Test message",
87                 "conf-history": "DUT1 and DUT2 VAT History",
88                 "show-run": "Show Run",
89                 "tags": ["tag 1", "tag 2", "tag n"],
90                 "type": "NDRPDR",
91                 "status": "PASS" | "FAIL",
92                 "throughput": {
93                     "NDR": {
94                         "LOWER": float,
95                         "UPPER": float
96                     },
97                     "PDR": {
98                         "LOWER": float,
99                         "UPPER": float
100                     }
101                 },
102                 "latency": {
103                     "NDR": {
104                         "direction1": {
105                             "min": float,
106                             "avg": float,
107                             "max": float,
108                             "hdrh": str
109                         },
110                         "direction2": {
111                             "min": float,
112                             "avg": float,
113                             "max": float,
114                             "hdrh": str
115                         }
116                     },
117                     "PDR": {
118                         "direction1": {
119                             "min": float,
120                             "avg": float,
121                             "max": float,
122                             "hdrh": str
123                         },
124                         "direction2": {
125                             "min": float,
126                             "avg": float,
127                             "max": float,
128                             "hdrh": str
129                         }
130                     }
131                 }
132             }
133
134             # TCP tests:
135             "ID": {
136                 "name": "Test name",
137                 "parent": "Name of the parent of the test",
138                 "doc": "Test documentation",
139                 "msg": "Test message",
140                 "tags": ["tag 1", "tag 2", "tag n"],
141                 "type": "TCP",
142                 "status": "PASS" | "FAIL",
143                 "result": int
144             }
145
146             # MRR, BMRR tests:
147             "ID": {
148                 "name": "Test name",
149                 "parent": "Name of the parent of the test",
150                 "doc": "Test documentation",
151                 "msg": "Test message",
152                 "tags": ["tag 1", "tag 2", "tag n"],
153                 "type": "MRR" | "BMRR",
154                 "status": "PASS" | "FAIL",
155                 "result": {
156                     "receive-rate": float,
157                     # Average of a list, computed using AvgStdevStats.
158                     # In CSIT-1180, replace with List[float].
159                 }
160             }
161
162             "ID" {
163                 # next test
164             }
165         }
166     }
167
168
169     Functional tests:
170
171     {
172         "metadata": {  # Optional
173             "version": "VPP version",
174             "job": "Jenkins job name",
175             "build": "Information about the build"
176         },
177         "suites": {
178             "Suite name 1": {
179                 "doc": "Suite 1 documentation",
180                 "parent": "Suite 1 parent",
181                 "level": "Level of the suite in the suite hierarchy"
182             }
183             "Suite name N": {
184                 "doc": "Suite N documentation",
185                 "parent": "Suite 2 parent",
186                 "level": "Level of the suite in the suite hierarchy"
187             }
188         }
189         "tests": {
190             "ID": {
191                 "name": "Test name",
192                 "parent": "Name of the parent of the test",
193                 "doc": "Test documentation"
194                 "msg": "Test message"
195                 "tags": ["tag 1", "tag 2", "tag n"],
196                 "conf-history": "DUT1 and DUT2 VAT History"
197                 "show-run": "Show Run"
198                 "status": "PASS" | "FAIL"
199             },
200             "ID" {
201                 # next test
202             }
203         }
204     }
205
206     .. note:: ID is the lowercase full path to the test.
207     """
208
209     REGEX_PLR_RATE = re.compile(
210         r'PLRsearch lower bound::?\s(\d+.\d+).*\n'
211         r'PLRsearch upper bound::?\s(\d+.\d+)'
212     )
213     REGEX_NDRPDR_RATE = re.compile(
214         r'NDR_LOWER:\s(\d+.\d+).*\n.*\n'
215         r'NDR_UPPER:\s(\d+.\d+).*\n'
216         r'PDR_LOWER:\s(\d+.\d+).*\n.*\n'
217         r'PDR_UPPER:\s(\d+.\d+)'
218     )
219     REGEX_NDRPDR_GBPS = re.compile(
220         r'NDR_LOWER:.*,\s(\d+.\d+).*\n.*\n'
221         r'NDR_UPPER:.*,\s(\d+.\d+).*\n'
222         r'PDR_LOWER:.*,\s(\d+.\d+).*\n.*\n'
223         r'PDR_UPPER:.*,\s(\d+.\d+)'
224     )
225     REGEX_PERF_MSG_INFO = re.compile(
226         r'NDR_LOWER:\s(\d+.\d+)\s.*\s(\d+.\d+)\s.*\n.*\n.*\n'
227         r'PDR_LOWER:\s(\d+.\d+)\s.*\s(\d+.\d+)\s.*\n.*\n.*\n'
228         r'Latency at 90% PDR:.*\[\'(.*)\', \'(.*)\'\].*\n'
229         r'Latency at 50% PDR:.*\[\'(.*)\', \'(.*)\'\].*\n'
230         r'Latency at 10% PDR:.*\[\'(.*)\', \'(.*)\'\].*\n'
231     )
232     REGEX_CPS_MSG_INFO = re.compile(
233         r'NDR_LOWER:\s(\d+.\d+)\s.*\s.*\n.*\n.*\n'
234         r'PDR_LOWER:\s(\d+.\d+)\s.*\s.*\n.*\n.*'
235     )
236     REGEX_PPS_MSG_INFO = re.compile(
237         r'NDR_LOWER:\s(\d+.\d+)\s.*\s(\d+.\d+)\s.*\n.*\n.*\n'
238         r'PDR_LOWER:\s(\d+.\d+)\s.*\s(\d+.\d+)\s.*\n.*\n.*'
239     )
240     REGEX_MRR_MSG_INFO = re.compile(r'.*\[(.*)\]')
241
242     REGEX_VSAP_MSG_INFO = re.compile(
243         r'Transfer Rate: (\d*.\d*).*\n'
244         r'Latency: (\d*.\d*).*\n'
245         r'Completed requests: (\d*).*\n'
246         r'Failed requests: (\d*).*\n'
247         r'Total data transferred: (\d*).*\n'
248         r'Connection [cr]ps rate:\s*(\d*.\d*)'
249     )
250
251     # Needed for CPS and PPS tests
252     REGEX_NDRPDR_LAT_BASE = re.compile(
253         r'LATENCY.*\[\'(.*)\', \'(.*)\'\]\s\n.*\n.*\n'
254         r'LATENCY.*\[\'(.*)\', \'(.*)\'\]'
255     )
256     REGEX_NDRPDR_LAT = re.compile(
257         r'LATENCY.*\[\'(.*)\', \'(.*)\'\]\s\n.*\n.*\n'
258         r'LATENCY.*\[\'(.*)\', \'(.*)\'\]\s\n.*\n'
259         r'Latency.*\[\'(.*)\', \'(.*)\'\]\s\n'
260         r'Latency.*\[\'(.*)\', \'(.*)\'\]\s\n'
261         r'Latency.*\[\'(.*)\', \'(.*)\'\]\s\n'
262         r'Latency.*\[\'(.*)\', \'(.*)\'\]'
263     )
264
265     REGEX_VERSION_VPP = re.compile(
266         r"(return STDOUT Version:\s*|"
267         r"VPP Version:\s*|VPP version:\s*)(.*)"
268     )
269     REGEX_VERSION_DPDK = re.compile(
270         r"(DPDK version:\s*|DPDK Version:\s*)(.*)"
271     )
272     REGEX_TCP = re.compile(
273         r'Total\s(rps|cps|throughput):\s(\d*).*$'
274     )
275     REGEX_MRR = re.compile(
276         r'MaxReceivedRate_Results\s\[pkts/(\d*)sec\]:\s'
277         r'tx\s(\d*),\srx\s(\d*)'
278     )
279     REGEX_BMRR = re.compile(
280         r'.*trial results.*: \[(.*)\]'
281     )
282     REGEX_RECONF_LOSS = re.compile(
283         r'Packets lost due to reconfig: (\d*)'
284     )
285     REGEX_RECONF_TIME = re.compile(
286         r'Implied time lost: (\d*.[\de-]*)'
287     )
288     REGEX_TC_TAG = re.compile(r'\d+[tT]\d+[cC]')
289
290     REGEX_TC_NAME_OLD = re.compile(r'-\d+[tT]\d+[cC]-')
291
292     REGEX_TC_NAME_NEW = re.compile(r'-\d+[cC]-')
293
294     REGEX_TC_NUMBER = re.compile(r'tc\d{2}-')
295
296     REGEX_TC_PAPI_CLI = re.compile(r'.*\((\d+.\d+.\d+.\d+.) - (.*)\)')
297
298     def __init__(self, metadata, mapping, ignore):
299         """Initialisation.
300
301         :param metadata: Key-value pairs to be included in "metadata" part of
302             JSON structure.
303         :param mapping: Mapping of the old names of test cases to the new
304             (actual) one.
305         :param ignore: List of TCs to be ignored.
306         :type metadata: dict
307         :type mapping: dict
308         :type ignore: list
309         """
310
311         # Type of message to parse out from the test messages
312         self._msg_type = None
313
314         # VPP version
315         self._version = None
316
317         # Timestamp
318         self._timestamp = None
319
320         # Testbed. The testbed is identified by TG node IP address.
321         self._testbed = None
322
323         # Mapping of TCs long names
324         self._mapping = mapping
325
326         # Ignore list
327         self._ignore = ignore
328
329         # Number of PAPI History messages found:
330         # 0 - no message
331         # 1 - PAPI History of DUT1
332         # 2 - PAPI History of DUT2
333         self._conf_history_lookup_nr = 0
334
335         self._sh_run_counter = 0
336
337         # Test ID of currently processed test- the lowercase full path to the
338         # test
339         self._test_id = None
340
341         # The main data structure
342         self._data = {
343             u"metadata": OrderedDict(),
344             u"suites": OrderedDict(),
345             u"tests": OrderedDict()
346         }
347
348         # Save the provided metadata
349         for key, val in metadata.items():
350             self._data[u"metadata"][key] = val
351
352         # Dictionary defining the methods used to parse different types of
353         # messages
354         self.parse_msg = {
355             u"timestamp": self._get_timestamp,
356             u"vpp-version": self._get_vpp_version,
357             u"dpdk-version": self._get_dpdk_version,
358             u"teardown-papi-history": self._get_papi_history,
359             u"test-show-runtime": self._get_show_run,
360             u"testbed": self._get_testbed
361         }
362
363     @property
364     def data(self):
365         """Getter - Data parsed from the XML file.
366
367         :returns: Data parsed from the XML file.
368         :rtype: dict
369         """
370         return self._data
371
372     def _get_data_from_mrr_test_msg(self, msg):
373         """Get info from message of MRR performance tests.
374
375         :param msg: Message to be processed.
376         :type msg: str
377         :returns: Processed message or original message if a problem occurs.
378         :rtype: str
379         """
380
381         groups = re.search(self.REGEX_MRR_MSG_INFO, msg)
382         if not groups or groups.lastindex != 1:
383             return u"Test Failed."
384
385         try:
386             data = groups.group(1).split(u", ")
387         except (AttributeError, IndexError, ValueError, KeyError):
388             return u"Test Failed."
389
390         out_str = u"["
391         try:
392             for item in data:
393                 out_str += f"{(float(item) / 1e6):.2f}, "
394             return out_str[:-2] + u"]"
395         except (AttributeError, IndexError, ValueError, KeyError):
396             return u"Test Failed."
397
398     def _get_data_from_cps_test_msg(self, msg):
399         """Get info from message of NDRPDR CPS tests.
400
401         :param msg: Message to be processed.
402         :type msg: str
403         :returns: Processed message or "Test Failed." if a problem occurs.
404         :rtype: str
405         """
406
407         groups = re.search(self.REGEX_CPS_MSG_INFO, msg)
408         if not groups or groups.lastindex != 2:
409             return u"Test Failed."
410
411         try:
412             return (
413                 f"1. {(float(groups.group(1)) / 1e6):5.2f}\n"
414                 f"2. {(float(groups.group(2)) / 1e6):5.2f}"
415             )
416         except (AttributeError, IndexError, ValueError, KeyError):
417             return u"Test Failed."
418
419     def _get_data_from_pps_test_msg(self, msg):
420         """Get info from message of NDRPDR PPS tests.
421
422         :param msg: Message to be processed.
423         :type msg: str
424         :returns: Processed message or "Test Failed." if a problem occurs.
425         :rtype: str
426         """
427
428         groups = re.search(self.REGEX_PPS_MSG_INFO, msg)
429         if not groups or groups.lastindex != 4:
430             return u"Test Failed."
431
432         try:
433             return (
434                 f"1. {(float(groups.group(1)) / 1e6):5.2f}      "
435                 f"{float(groups.group(2)):5.2f}\n"
436                 f"2. {(float(groups.group(3)) / 1e6):5.2f}      "
437                 f"{float(groups.group(4)):5.2f}"
438             )
439         except (AttributeError, IndexError, ValueError, KeyError):
440             return u"Test Failed."
441
442     def _get_data_from_perf_test_msg(self, msg):
443         """Get info from message of NDRPDR performance tests.
444
445         :param msg: Message to be processed.
446         :type msg: str
447         :returns: Processed message or "Test Failed." if a problem occurs.
448         :rtype: str
449         """
450
451         groups = re.search(self.REGEX_PERF_MSG_INFO, msg)
452         if not groups or groups.lastindex != 10:
453             return u"Test Failed."
454
455         try:
456             data = {
457                 u"ndr_low": float(groups.group(1)),
458                 u"ndr_low_b": float(groups.group(2)),
459                 u"pdr_low": float(groups.group(3)),
460                 u"pdr_low_b": float(groups.group(4)),
461                 u"pdr_lat_90_1": groups.group(5),
462                 u"pdr_lat_90_2": groups.group(6),
463                 u"pdr_lat_50_1": groups.group(7),
464                 u"pdr_lat_50_2": groups.group(8),
465                 u"pdr_lat_10_1": groups.group(9),
466                 u"pdr_lat_10_2": groups.group(10),
467             }
468         except (AttributeError, IndexError, ValueError, KeyError):
469             return u"Test Failed."
470
471         def _process_lat(in_str_1, in_str_2):
472             """Extract min, avg, max values from latency string.
473
474             :param in_str_1: Latency string for one direction produced by robot
475                 framework.
476             :param in_str_2: Latency string for second direction produced by
477                 robot framework.
478             :type in_str_1: str
479             :type in_str_2: str
480             :returns: Processed latency string or None if a problem occurs.
481             :rtype: tuple
482             """
483             in_list_1 = in_str_1.split('/', 3)
484             in_list_2 = in_str_2.split('/', 3)
485
486             if len(in_list_1) != 4 and len(in_list_2) != 4:
487                 return None
488
489             in_list_1[3] += u"=" * (len(in_list_1[3]) % 4)
490             try:
491                 hdr_lat_1 = hdrh.histogram.HdrHistogram.decode(in_list_1[3])
492             except hdrh.codec.HdrLengthException:
493                 return None
494
495             in_list_2[3] += u"=" * (len(in_list_2[3]) % 4)
496             try:
497                 hdr_lat_2 = hdrh.histogram.HdrHistogram.decode(in_list_2[3])
498             except hdrh.codec.HdrLengthException:
499                 return None
500
501             if hdr_lat_1 and hdr_lat_2:
502                 hdr_lat = (
503                     hdr_lat_1.get_value_at_percentile(50.0),
504                     hdr_lat_1.get_value_at_percentile(90.0),
505                     hdr_lat_1.get_value_at_percentile(99.0),
506                     hdr_lat_2.get_value_at_percentile(50.0),
507                     hdr_lat_2.get_value_at_percentile(90.0),
508                     hdr_lat_2.get_value_at_percentile(99.0)
509                 )
510
511                 if all(hdr_lat):
512                     return hdr_lat
513
514             return None
515
516         try:
517             out_msg = (
518                 f"1. {(data[u'ndr_low'] / 1e6):5.2f}      "
519                 f"{data[u'ndr_low_b']:5.2f}"
520                 f"\n2. {(data[u'pdr_low'] / 1e6):5.2f}      "
521                 f"{data[u'pdr_low_b']:5.2f}"
522             )
523             latency = (
524                 _process_lat(data[u'pdr_lat_10_1'], data[u'pdr_lat_10_2']),
525                 _process_lat(data[u'pdr_lat_50_1'], data[u'pdr_lat_50_2']),
526                 _process_lat(data[u'pdr_lat_90_1'], data[u'pdr_lat_90_2'])
527             )
528             if all(latency):
529                 max_len = len(str(max((max(item) for item in latency))))
530                 max_len = 4 if max_len < 4 else max_len
531
532                 for idx, lat in enumerate(latency):
533                     if not idx:
534                         out_msg += u"\n"
535                     out_msg += (
536                         f"\n{idx + 3}. "
537                         f"{lat[0]:{max_len}d} "
538                         f"{lat[1]:{max_len}d} "
539                         f"{lat[2]:{max_len}d}      "
540                         f"{lat[3]:{max_len}d} "
541                         f"{lat[4]:{max_len}d} "
542                         f"{lat[5]:{max_len}d} "
543                     )
544
545             return out_msg
546
547         except (AttributeError, IndexError, ValueError, KeyError):
548             return u"Test Failed."
549
550     def _get_testbed(self, msg):
551         """Called when extraction of testbed IP is required.
552         The testbed is identified by TG node IP address.
553
554         :param msg: Message to process.
555         :type msg: Message
556         :returns: Nothing.
557         """
558
559         if msg.message.count(u"Setup of TG node") or \
560                 msg.message.count(u"Setup of node TG host"):
561             reg_tg_ip = re.compile(
562                 r'.*TG .* (\d{1,3}.\d{1,3}.\d{1,3}.\d{1,3}).*')
563             try:
564                 self._testbed = str(re.search(reg_tg_ip, msg.message).group(1))
565             except (KeyError, ValueError, IndexError, AttributeError):
566                 pass
567             finally:
568                 self._data[u"metadata"][u"testbed"] = self._testbed
569                 self._msg_type = None
570
571     def _get_vpp_version(self, msg):
572         """Called when extraction of VPP version is required.
573
574         :param msg: Message to process.
575         :type msg: Message
576         :returns: Nothing.
577         """
578
579         if msg.message.count(u"return STDOUT Version:") or \
580                 msg.message.count(u"VPP Version:") or \
581                 msg.message.count(u"VPP version:"):
582             self._version = str(
583                 re.search(self.REGEX_VERSION_VPP, msg.message).group(2)
584             )
585             self._data[u"metadata"][u"version"] = self._version
586             self._msg_type = None
587
588     def _get_dpdk_version(self, msg):
589         """Called when extraction of DPDK version is required.
590
591         :param msg: Message to process.
592         :type msg: Message
593         :returns: Nothing.
594         """
595
596         if msg.message.count(u"DPDK Version:"):
597             try:
598                 self._version = str(re.search(
599                     self.REGEX_VERSION_DPDK, msg.message).group(2))
600                 self._data[u"metadata"][u"version"] = self._version
601             except IndexError:
602                 pass
603             finally:
604                 self._msg_type = None
605
606     def _get_timestamp(self, msg):
607         """Called when extraction of timestamp is required.
608
609         :param msg: Message to process.
610         :type msg: Message
611         :returns: Nothing.
612         """
613
614         self._timestamp = msg.timestamp[:14]
615         self._data[u"metadata"][u"generated"] = self._timestamp
616         self._msg_type = None
617
618     def _get_papi_history(self, msg):
619         """Called when extraction of PAPI command history is required.
620
621         :param msg: Message to process.
622         :type msg: Message
623         :returns: Nothing.
624         """
625         if msg.message.count(u"PAPI command history:"):
626             self._conf_history_lookup_nr += 1
627             if self._conf_history_lookup_nr == 1:
628                 self._data[u"tests"][self._test_id][u"conf-history"] = str()
629             else:
630                 self._msg_type = None
631             text = re.sub(
632                 r"\d{1,3}.\d{1,3}.\d{1,3}.\d{1,3} PAPI command history:",
633                 u"",
634                 msg.message,
635                 count=1
636             ).replace(u'"', u"'")
637             self._data[u"tests"][self._test_id][u"conf-history"] += (
638                 f"**DUT{str(self._conf_history_lookup_nr)}:** {text}"
639             )
640
641     def _get_show_run(self, msg):
642         """Called when extraction of VPP operational data (output of CLI command
643         Show Runtime) is required.
644
645         :param msg: Message to process.
646         :type msg: Message
647         :returns: Nothing.
648         """
649
650         if not msg.message.count(u"stats runtime"):
651             return
652
653         # Temporary solution
654         if self._sh_run_counter > 1:
655             return
656
657         if u"show-run" not in self._data[u"tests"][self._test_id].keys():
658             self._data[u"tests"][self._test_id][u"show-run"] = dict()
659
660         groups = re.search(self.REGEX_TC_PAPI_CLI, msg.message)
661         if not groups:
662             return
663         try:
664             host = groups.group(1)
665         except (AttributeError, IndexError):
666             host = u""
667         try:
668             sock = groups.group(2)
669         except (AttributeError, IndexError):
670             sock = u""
671
672         runtime = loads(str(msg.message).replace(u' ', u'').replace(u'\n', u'').
673                         replace(u"'", u'"').replace(u'b"', u'"').
674                         replace(u'u"', u'"').split(u":", 1)[1])
675
676         dut = u"dut{nr}".format(
677             nr=len(self._data[u'tests'][self._test_id][u'show-run'].keys()) + 1)
678
679         self._data[u'tests'][self._test_id][u'show-run'][dut] = \
680             copy.copy(
681                 {
682                     u"host": host,
683                     u"socket": sock,
684                     u"runtime": runtime,
685                 }
686             )
687
688     def _get_ndrpdr_throughput(self, msg):
689         """Get NDR_LOWER, NDR_UPPER, PDR_LOWER and PDR_UPPER from the test
690         message.
691
692         :param msg: The test message to be parsed.
693         :type msg: str
694         :returns: Parsed data as a dict and the status (PASS/FAIL).
695         :rtype: tuple(dict, str)
696         """
697
698         throughput = {
699             u"NDR": {u"LOWER": -1.0, u"UPPER": -1.0},
700             u"PDR": {u"LOWER": -1.0, u"UPPER": -1.0}
701         }
702         status = u"FAIL"
703         groups = re.search(self.REGEX_NDRPDR_RATE, msg)
704
705         if groups is not None:
706             try:
707                 throughput[u"NDR"][u"LOWER"] = float(groups.group(1))
708                 throughput[u"NDR"][u"UPPER"] = float(groups.group(2))
709                 throughput[u"PDR"][u"LOWER"] = float(groups.group(3))
710                 throughput[u"PDR"][u"UPPER"] = float(groups.group(4))
711                 status = u"PASS"
712             except (IndexError, ValueError):
713                 pass
714
715         return throughput, status
716
717     def _get_ndrpdr_throughput_gbps(self, msg):
718         """Get NDR_LOWER, NDR_UPPER, PDR_LOWER and PDR_UPPER in Gbps from the
719         test message.
720
721         :param msg: The test message to be parsed.
722         :type msg: str
723         :returns: Parsed data as a dict and the status (PASS/FAIL).
724         :rtype: tuple(dict, str)
725         """
726
727         gbps = {
728             u"NDR": {u"LOWER": -1.0, u"UPPER": -1.0},
729             u"PDR": {u"LOWER": -1.0, u"UPPER": -1.0}
730         }
731         status = u"FAIL"
732         groups = re.search(self.REGEX_NDRPDR_GBPS, msg)
733
734         if groups is not None:
735             try:
736                 gbps[u"NDR"][u"LOWER"] = float(groups.group(1))
737                 gbps[u"NDR"][u"UPPER"] = float(groups.group(2))
738                 gbps[u"PDR"][u"LOWER"] = float(groups.group(3))
739                 gbps[u"PDR"][u"UPPER"] = float(groups.group(4))
740                 status = u"PASS"
741             except (IndexError, ValueError):
742                 pass
743
744         return gbps, status
745
746     def _get_plr_throughput(self, msg):
747         """Get PLRsearch lower bound and PLRsearch upper bound from the test
748         message.
749
750         :param msg: The test message to be parsed.
751         :type msg: str
752         :returns: Parsed data as a dict and the status (PASS/FAIL).
753         :rtype: tuple(dict, str)
754         """
755
756         throughput = {
757             u"LOWER": -1.0,
758             u"UPPER": -1.0
759         }
760         status = u"FAIL"
761         groups = re.search(self.REGEX_PLR_RATE, msg)
762
763         if groups is not None:
764             try:
765                 throughput[u"LOWER"] = float(groups.group(1))
766                 throughput[u"UPPER"] = float(groups.group(2))
767                 status = u"PASS"
768             except (IndexError, ValueError):
769                 pass
770
771         return throughput, status
772
773     def _get_ndrpdr_latency(self, msg):
774         """Get LATENCY from the test message.
775
776         :param msg: The test message to be parsed.
777         :type msg: str
778         :returns: Parsed data as a dict and the status (PASS/FAIL).
779         :rtype: tuple(dict, str)
780         """
781         latency_default = {
782             u"min": -1.0,
783             u"avg": -1.0,
784             u"max": -1.0,
785             u"hdrh": u""
786         }
787         latency = {
788             u"NDR": {
789                 u"direction1": copy.copy(latency_default),
790                 u"direction2": copy.copy(latency_default)
791             },
792             u"PDR": {
793                 u"direction1": copy.copy(latency_default),
794                 u"direction2": copy.copy(latency_default)
795             },
796             u"LAT0": {
797                 u"direction1": copy.copy(latency_default),
798                 u"direction2": copy.copy(latency_default)
799             },
800             u"PDR10": {
801                 u"direction1": copy.copy(latency_default),
802                 u"direction2": copy.copy(latency_default)
803             },
804             u"PDR50": {
805                 u"direction1": copy.copy(latency_default),
806                 u"direction2": copy.copy(latency_default)
807             },
808             u"PDR90": {
809                 u"direction1": copy.copy(latency_default),
810                 u"direction2": copy.copy(latency_default)
811             },
812         }
813
814         groups = re.search(self.REGEX_NDRPDR_LAT, msg)
815         if groups is None:
816             groups = re.search(self.REGEX_NDRPDR_LAT_BASE, msg)
817         if groups is None:
818             return latency, u"FAIL"
819
820         def process_latency(in_str):
821             """Return object with parsed latency values.
822
823             TODO: Define class for the return type.
824
825             :param in_str: Input string, min/avg/max/hdrh format.
826             :type in_str: str
827             :returns: Dict with corresponding keys, except hdrh float values.
828             :rtype dict:
829             :throws IndexError: If in_str does not have enough substrings.
830             :throws ValueError: If a substring does not convert to float.
831             """
832             in_list = in_str.split('/', 3)
833
834             rval = {
835                 u"min": float(in_list[0]),
836                 u"avg": float(in_list[1]),
837                 u"max": float(in_list[2]),
838                 u"hdrh": u""
839             }
840
841             if len(in_list) == 4:
842                 rval[u"hdrh"] = str(in_list[3])
843
844             return rval
845
846         try:
847             latency[u"NDR"][u"direction1"] = process_latency(groups.group(1))
848             latency[u"NDR"][u"direction2"] = process_latency(groups.group(2))
849             latency[u"PDR"][u"direction1"] = process_latency(groups.group(3))
850             latency[u"PDR"][u"direction2"] = process_latency(groups.group(4))
851             if groups.lastindex == 4:
852                 return latency, u"PASS"
853         except (IndexError, ValueError):
854             pass
855
856         try:
857             latency[u"PDR90"][u"direction1"] = process_latency(groups.group(5))
858             latency[u"PDR90"][u"direction2"] = process_latency(groups.group(6))
859             latency[u"PDR50"][u"direction1"] = process_latency(groups.group(7))
860             latency[u"PDR50"][u"direction2"] = process_latency(groups.group(8))
861             latency[u"PDR10"][u"direction1"] = process_latency(groups.group(9))
862             latency[u"PDR10"][u"direction2"] = process_latency(groups.group(10))
863             latency[u"LAT0"][u"direction1"] = process_latency(groups.group(11))
864             latency[u"LAT0"][u"direction2"] = process_latency(groups.group(12))
865             if groups.lastindex == 12:
866                 return latency, u"PASS"
867         except (IndexError, ValueError):
868             pass
869
870         return latency, u"FAIL"
871
872     @staticmethod
873     def _get_hoststack_data(msg, tags):
874         """Get data from the hoststack test message.
875
876         :param msg: The test message to be parsed.
877         :param tags: Test tags.
878         :type msg: str
879         :type tags: list
880         :returns: Parsed data as a JSON dict and the status (PASS/FAIL).
881         :rtype: tuple(dict, str)
882         """
883         result = dict()
884         status = u"FAIL"
885
886         msg = msg.replace(u"'", u'"').replace(u" ", u"")
887         if u"LDPRELOAD" in tags:
888             try:
889                 result = loads(msg)
890                 status = u"PASS"
891             except JSONDecodeError:
892                 pass
893         elif u"VPPECHO" in tags:
894             try:
895                 msg_lst = msg.replace(u"}{", u"} {").split(u" ")
896                 result = dict(
897                     client=loads(msg_lst[0]),
898                     server=loads(msg_lst[1])
899                 )
900                 status = u"PASS"
901             except (JSONDecodeError, IndexError):
902                 pass
903
904         return result, status
905
906     def _get_vsap_data(self, msg, tags):
907         """Get data from the vsap test message.
908
909         :param msg: The test message to be parsed.
910         :param tags: Test tags.
911         :type msg: str
912         :type tags: list
913         :returns: Parsed data as a JSON dict and the status (PASS/FAIL).
914         :rtype: tuple(dict, str)
915         """
916         result = dict()
917         status = u"FAIL"
918
919         groups = re.search(self.REGEX_VSAP_MSG_INFO, msg)
920         if groups is not None:
921             try:
922                 result[u"transfer-rate"] = float(groups.group(1)) * 1e3
923                 result[u"latency"] = float(groups.group(2))
924                 result[u"completed-requests"] = int(groups.group(3))
925                 result[u"failed-requests"] = int(groups.group(4))
926                 result[u"bytes-transferred"] = int(groups.group(5))
927                 if u"TCP_CPS"in tags:
928                     result[u"cps"] = float(groups.group(6))
929                 elif u"TCP_RPS" in tags:
930                     result[u"rps"] = float(groups.group(6))
931                 else:
932                     return result, status
933                 status = u"PASS"
934             except (IndexError, ValueError):
935                 pass
936
937         return result, status
938
939     def visit_suite(self, suite):
940         """Implements traversing through the suite and its direct children.
941
942         :param suite: Suite to process.
943         :type suite: Suite
944         :returns: Nothing.
945         """
946         if self.start_suite(suite) is not False:
947             suite.suites.visit(self)
948             suite.tests.visit(self)
949             self.end_suite(suite)
950
951     def start_suite(self, suite):
952         """Called when suite starts.
953
954         :param suite: Suite to process.
955         :type suite: Suite
956         :returns: Nothing.
957         """
958
959         try:
960             parent_name = suite.parent.name
961         except AttributeError:
962             return
963
964         self._data[u"suites"][suite.longname.lower().
965                               replace(u'"', u"'").
966                               replace(u" ", u"_")] = {
967                                   u"name": suite.name.lower(),
968                                   u"doc": suite.doc,
969                                   u"parent": parent_name,
970                                   u"level": len(suite.longname.split(u"."))
971                               }
972
973         suite.keywords.visit(self)
974
975     def end_suite(self, suite):
976         """Called when suite ends.
977
978         :param suite: Suite to process.
979         :type suite: Suite
980         :returns: Nothing.
981         """
982
983     def visit_test(self, test):
984         """Implements traversing through the test.
985
986         :param test: Test to process.
987         :type test: Test
988         :returns: Nothing.
989         """
990         if self.start_test(test) is not False:
991             test.keywords.visit(self)
992             self.end_test(test)
993
994     def start_test(self, test):
995         """Called when test starts.
996
997         :param test: Test to process.
998         :type test: Test
999         :returns: Nothing.
1000         """
1001
1002         self._sh_run_counter = 0
1003
1004         longname_orig = test.longname.lower()
1005
1006         # Check the ignore list
1007         if longname_orig in self._ignore:
1008             return
1009
1010         tags = [str(tag) for tag in test.tags]
1011         test_result = dict()
1012
1013         # Change the TC long name and name if defined in the mapping table
1014         longname = self._mapping.get(longname_orig, None)
1015         if longname is not None:
1016             name = longname.split(u'.')[-1]
1017             logging.debug(
1018                 f"{self._data[u'metadata']}\n{longname_orig}\n{longname}\n"
1019                 f"{name}"
1020             )
1021         else:
1022             longname = longname_orig
1023             name = test.name.lower()
1024
1025         # Remove TC number from the TC long name (backward compatibility):
1026         self._test_id = re.sub(self.REGEX_TC_NUMBER, u"", longname)
1027         # Remove TC number from the TC name (not needed):
1028         test_result[u"name"] = re.sub(self.REGEX_TC_NUMBER, "", name)
1029
1030         test_result[u"parent"] = test.parent.name.lower()
1031         test_result[u"tags"] = tags
1032         test_result["doc"] = test.doc
1033         test_result[u"type"] = u""
1034         test_result[u"status"] = test.status
1035         test_result[u"starttime"] = test.starttime
1036         test_result[u"endtime"] = test.endtime
1037
1038         if test.status == u"PASS":
1039             if u"NDRPDR" in tags:
1040                 if u"TCP_PPS" in tags or u"UDP_PPS" in tags:
1041                     test_result[u"msg"] = self._get_data_from_pps_test_msg(
1042                         test.message)
1043                 elif u"TCP_CPS" in tags or u"UDP_CPS" in tags:
1044                     test_result[u"msg"] = self._get_data_from_cps_test_msg(
1045                         test.message)
1046                 else:
1047                     test_result[u"msg"] = self._get_data_from_perf_test_msg(
1048                         test.message)
1049             elif u"MRR" in tags or u"FRMOBL" in tags or u"BMRR" in tags:
1050                 test_result[u"msg"] = self._get_data_from_mrr_test_msg(
1051                     test.message)
1052             else:
1053                 test_result[u"msg"] = test.message
1054         else:
1055             test_result[u"msg"] = test.message
1056
1057         if u"PERFTEST" in tags:
1058             # Replace info about cores (e.g. -1c-) with the info about threads
1059             # and cores (e.g. -1t1c-) in the long test case names and in the
1060             # test case names if necessary.
1061             groups = re.search(self.REGEX_TC_NAME_OLD, self._test_id)
1062             if not groups:
1063                 tag_count = 0
1064                 tag_tc = str()
1065                 for tag in test_result[u"tags"]:
1066                     groups = re.search(self.REGEX_TC_TAG, tag)
1067                     if groups:
1068                         tag_count += 1
1069                         tag_tc = tag
1070
1071                 if tag_count == 1:
1072                     self._test_id = re.sub(
1073                         self.REGEX_TC_NAME_NEW, f"-{tag_tc.lower()}-",
1074                         self._test_id, count=1
1075                     )
1076                     test_result[u"name"] = re.sub(
1077                         self.REGEX_TC_NAME_NEW, f"-{tag_tc.lower()}-",
1078                         test_result["name"], count=1
1079                     )
1080                 else:
1081                     test_result[u"status"] = u"FAIL"
1082                     self._data[u"tests"][self._test_id] = test_result
1083                     logging.debug(
1084                         f"The test {self._test_id} has no or more than one "
1085                         f"multi-threading tags.\n"
1086                         f"Tags: {test_result[u'tags']}"
1087                     )
1088                     return
1089
1090         if u"DEVICETEST" in tags:
1091             test_result[u"type"] = u"DEVICETEST"
1092         elif u"NDRPDR" in tags:
1093             if u"TCP_CPS" in tags or u"UDP_CPS" in tags:
1094                 test_result[u"type"] = u"CPS"
1095             else:
1096                 test_result[u"type"] = u"NDRPDR"
1097             if test.status == u"PASS":
1098                 test_result[u"throughput"], test_result[u"status"] = \
1099                     self._get_ndrpdr_throughput(test.message)
1100                 test_result[u"gbps"], test_result[u"status"] = \
1101                     self._get_ndrpdr_throughput_gbps(test.message)
1102                 test_result[u"latency"], test_result[u"status"] = \
1103                     self._get_ndrpdr_latency(test.message)
1104         elif u"MRR" in tags or u"FRMOBL" in tags or u"BMRR" in tags:
1105             if u"MRR" in tags:
1106                 test_result[u"type"] = u"MRR"
1107             else:
1108                 test_result[u"type"] = u"BMRR"
1109             if test.status == u"PASS":
1110                 test_result[u"result"] = dict()
1111                 groups = re.search(self.REGEX_BMRR, test.message)
1112                 if groups is not None:
1113                     items_str = groups.group(1)
1114                     items_float = [
1115                         float(item.strip().replace(u"'", u""))
1116                         for item in items_str.split(",")
1117                     ]
1118                     # Use whole list in CSIT-1180.
1119                     stats = jumpavg.AvgStdevStats.for_runs(items_float)
1120                     test_result[u"result"][u"samples"] = items_float
1121                     test_result[u"result"][u"receive-rate"] = stats.avg
1122                     test_result[u"result"][u"receive-stdev"] = stats.stdev
1123                 else:
1124                     groups = re.search(self.REGEX_MRR, test.message)
1125                     test_result[u"result"][u"receive-rate"] = \
1126                         float(groups.group(3)) / float(groups.group(1))
1127         elif u"SOAK" in tags:
1128             test_result[u"type"] = u"SOAK"
1129             if test.status == u"PASS":
1130                 test_result[u"throughput"], test_result[u"status"] = \
1131                     self._get_plr_throughput(test.message)
1132         elif u"HOSTSTACK" in tags:
1133             test_result[u"type"] = u"HOSTSTACK"
1134             if test.status == u"PASS":
1135                 test_result[u"result"], test_result[u"status"] = \
1136                     self._get_hoststack_data(test.message, tags)
1137         elif u"LDP_NGINX" in tags:
1138             test_result[u"type"] = u"LDP_NGINX"
1139             test_result[u"result"], test_result[u"status"] = \
1140                 self._get_vsap_data(test.message, tags)
1141         # elif u"TCP" in tags:  # This might be not used
1142         #     test_result[u"type"] = u"TCP"
1143         #     if test.status == u"PASS":
1144         #         groups = re.search(self.REGEX_TCP, test.message)
1145         #         test_result[u"result"] = int(groups.group(2))
1146         elif u"RECONF" in tags:
1147             test_result[u"type"] = u"RECONF"
1148             if test.status == u"PASS":
1149                 test_result[u"result"] = None
1150                 try:
1151                     grps_loss = re.search(self.REGEX_RECONF_LOSS, test.message)
1152                     grps_time = re.search(self.REGEX_RECONF_TIME, test.message)
1153                     test_result[u"result"] = {
1154                         u"loss": int(grps_loss.group(1)),
1155                         u"time": float(grps_time.group(1))
1156                     }
1157                 except (AttributeError, IndexError, ValueError, TypeError):
1158                     test_result[u"status"] = u"FAIL"
1159         else:
1160             test_result[u"status"] = u"FAIL"
1161
1162         self._data[u"tests"][self._test_id] = test_result
1163
1164     def end_test(self, test):
1165         """Called when test ends.
1166
1167         :param test: Test to process.
1168         :type test: Test
1169         :returns: Nothing.
1170         """
1171
1172     def visit_keyword(self, keyword):
1173         """Implements traversing through the keyword and its child keywords.
1174
1175         :param keyword: Keyword to process.
1176         :type keyword: Keyword
1177         :returns: Nothing.
1178         """
1179         if self.start_keyword(keyword) is not False:
1180             self.end_keyword(keyword)
1181
1182     def start_keyword(self, keyword):
1183         """Called when keyword starts. Default implementation does nothing.
1184
1185         :param keyword: Keyword to process.
1186         :type keyword: Keyword
1187         :returns: Nothing.
1188         """
1189         try:
1190             if keyword.type == u"setup":
1191                 self.visit_setup_kw(keyword)
1192             elif keyword.type == u"teardown":
1193                 self.visit_teardown_kw(keyword)
1194             else:
1195                 self.visit_test_kw(keyword)
1196         except AttributeError:
1197             pass
1198
1199     def end_keyword(self, keyword):
1200         """Called when keyword ends. Default implementation does nothing.
1201
1202         :param keyword: Keyword to process.
1203         :type keyword: Keyword
1204         :returns: Nothing.
1205         """
1206
1207     def visit_test_kw(self, test_kw):
1208         """Implements traversing through the test keyword and its child
1209         keywords.
1210
1211         :param test_kw: Keyword to process.
1212         :type test_kw: Keyword
1213         :returns: Nothing.
1214         """
1215         for keyword in test_kw.keywords:
1216             if self.start_test_kw(keyword) is not False:
1217                 self.visit_test_kw(keyword)
1218                 self.end_test_kw(keyword)
1219
1220     def start_test_kw(self, test_kw):
1221         """Called when test keyword starts. Default implementation does
1222         nothing.
1223
1224         :param test_kw: Keyword to process.
1225         :type test_kw: Keyword
1226         :returns: Nothing.
1227         """
1228         if test_kw.name.count(u"Show Runtime On All Duts") or \
1229                 test_kw.name.count(u"Show Runtime Counters On All Duts") or \
1230                 test_kw.name.count(u"Vpp Show Runtime On All Duts"):
1231             self._msg_type = u"test-show-runtime"
1232             self._sh_run_counter += 1
1233         else:
1234             return
1235         test_kw.messages.visit(self)
1236
1237     def end_test_kw(self, test_kw):
1238         """Called when keyword ends. Default implementation does nothing.
1239
1240         :param test_kw: Keyword to process.
1241         :type test_kw: Keyword
1242         :returns: Nothing.
1243         """
1244
1245     def visit_setup_kw(self, setup_kw):
1246         """Implements traversing through the teardown keyword and its child
1247         keywords.
1248
1249         :param setup_kw: Keyword to process.
1250         :type setup_kw: Keyword
1251         :returns: Nothing.
1252         """
1253         for keyword in setup_kw.keywords:
1254             if self.start_setup_kw(keyword) is not False:
1255                 self.visit_setup_kw(keyword)
1256                 self.end_setup_kw(keyword)
1257
1258     def start_setup_kw(self, setup_kw):
1259         """Called when teardown keyword starts. Default implementation does
1260         nothing.
1261
1262         :param setup_kw: Keyword to process.
1263         :type setup_kw: Keyword
1264         :returns: Nothing.
1265         """
1266         if setup_kw.name.count(u"Show Vpp Version On All Duts") \
1267                 and not self._version:
1268             self._msg_type = u"vpp-version"
1269         elif setup_kw.name.count(u"Install Dpdk Framework On All Duts") and \
1270                 not self._version:
1271             self._msg_type = u"dpdk-version"
1272         elif setup_kw.name.count(u"Set Global Variable") \
1273                 and not self._timestamp:
1274             self._msg_type = u"timestamp"
1275         elif setup_kw.name.count(u"Setup Framework") and not self._testbed:
1276             self._msg_type = u"testbed"
1277         else:
1278             return
1279         setup_kw.messages.visit(self)
1280
1281     def end_setup_kw(self, setup_kw):
1282         """Called when keyword ends. Default implementation does nothing.
1283
1284         :param setup_kw: Keyword to process.
1285         :type setup_kw: Keyword
1286         :returns: Nothing.
1287         """
1288
1289     def visit_teardown_kw(self, teardown_kw):
1290         """Implements traversing through the teardown keyword and its child
1291         keywords.
1292
1293         :param teardown_kw: Keyword to process.
1294         :type teardown_kw: Keyword
1295         :returns: Nothing.
1296         """
1297         for keyword in teardown_kw.keywords:
1298             if self.start_teardown_kw(keyword) is not False:
1299                 self.visit_teardown_kw(keyword)
1300                 self.end_teardown_kw(keyword)
1301
1302     def start_teardown_kw(self, teardown_kw):
1303         """Called when teardown keyword starts
1304
1305         :param teardown_kw: Keyword to process.
1306         :type teardown_kw: Keyword
1307         :returns: Nothing.
1308         """
1309         if teardown_kw.name.count(u"Show Papi History On All Duts"):
1310             self._conf_history_lookup_nr = 0
1311             self._msg_type = u"teardown-papi-history"
1312             teardown_kw.messages.visit(self)
1313
1314     def end_teardown_kw(self, teardown_kw):
1315         """Called when keyword ends. Default implementation does nothing.
1316
1317         :param teardown_kw: Keyword to process.
1318         :type teardown_kw: Keyword
1319         :returns: Nothing.
1320         """
1321
1322     def visit_message(self, msg):
1323         """Implements visiting the message.
1324
1325         :param msg: Message to process.
1326         :type msg: Message
1327         :returns: Nothing.
1328         """
1329         if self.start_message(msg) is not False:
1330             self.end_message(msg)
1331
1332     def start_message(self, msg):
1333         """Called when message starts. Get required information from messages:
1334         - VPP version.
1335
1336         :param msg: Message to process.
1337         :type msg: Message
1338         :returns: Nothing.
1339         """
1340         if self._msg_type:
1341             self.parse_msg[self._msg_type](msg)
1342
1343     def end_message(self, msg):
1344         """Called when message ends. Default implementation does nothing.
1345
1346         :param msg: Message to process.
1347         :type msg: Message
1348         :returns: Nothing.
1349         """
1350
1351
1352 class InputData:
1353     """Input data
1354
1355     The data is extracted from output.xml files generated by Jenkins jobs and
1356     stored in pandas' DataFrames.
1357
1358     The data structure:
1359     - job name
1360       - build number
1361         - metadata
1362           (as described in ExecutionChecker documentation)
1363         - suites
1364           (as described in ExecutionChecker documentation)
1365         - tests
1366           (as described in ExecutionChecker documentation)
1367     """
1368
1369     def __init__(self, spec):
1370         """Initialization.
1371
1372         :param spec: Specification.
1373         :type spec: Specification
1374         """
1375
1376         # Specification:
1377         self._cfg = spec
1378
1379         # Data store:
1380         self._input_data = pd.Series()
1381
1382     @property
1383     def data(self):
1384         """Getter - Input data.
1385
1386         :returns: Input data
1387         :rtype: pandas.Series
1388         """
1389         return self._input_data
1390
1391     def metadata(self, job, build):
1392         """Getter - metadata
1393
1394         :param job: Job which metadata we want.
1395         :param build: Build which metadata we want.
1396         :type job: str
1397         :type build: str
1398         :returns: Metadata
1399         :rtype: pandas.Series
1400         """
1401         return self.data[job][build][u"metadata"]
1402
1403     def suites(self, job, build):
1404         """Getter - suites
1405
1406         :param job: Job which suites we want.
1407         :param build: Build which suites we want.
1408         :type job: str
1409         :type build: str
1410         :returns: Suites.
1411         :rtype: pandas.Series
1412         """
1413         return self.data[job][str(build)][u"suites"]
1414
1415     def tests(self, job, build):
1416         """Getter - tests
1417
1418         :param job: Job which tests we want.
1419         :param build: Build which tests we want.
1420         :type job: str
1421         :type build: str
1422         :returns: Tests.
1423         :rtype: pandas.Series
1424         """
1425         return self.data[job][build][u"tests"]
1426
1427     def _parse_tests(self, job, build):
1428         """Process data from robot output.xml file and return JSON structured
1429         data.
1430
1431         :param job: The name of job which build output data will be processed.
1432         :param build: The build which output data will be processed.
1433         :type job: str
1434         :type build: dict
1435         :returns: JSON data structure.
1436         :rtype: dict
1437         """
1438
1439         metadata = {
1440             u"job": job,
1441             u"build": build
1442         }
1443
1444         with open(build[u"file-name"], u'r') as data_file:
1445             try:
1446                 result = ExecutionResult(data_file)
1447             except errors.DataError as err:
1448                 logging.error(
1449                     f"Error occurred while parsing output.xml: {repr(err)}"
1450                 )
1451                 return None
1452         checker = ExecutionChecker(
1453             metadata, self._cfg.mapping, self._cfg.ignore
1454         )
1455         result.visit(checker)
1456
1457         return checker.data
1458
1459     def _download_and_parse_build(self, job, build, repeat, pid=10000):
1460         """Download and parse the input data file.
1461
1462         :param pid: PID of the process executing this method.
1463         :param job: Name of the Jenkins job which generated the processed input
1464             file.
1465         :param build: Information about the Jenkins build which generated the
1466             processed input file.
1467         :param repeat: Repeat the download specified number of times if not
1468             successful.
1469         :type pid: int
1470         :type job: str
1471         :type build: dict
1472         :type repeat: int
1473         """
1474
1475         logging.info(f"Processing the job/build: {job}: {build[u'build']}")
1476
1477         state = u"failed"
1478         success = False
1479         data = None
1480         do_repeat = repeat
1481         while do_repeat:
1482             success = download_and_unzip_data_file(self._cfg, job, build, pid)
1483             if success:
1484                 break
1485             do_repeat -= 1
1486         if not success:
1487             logging.error(
1488                 f"It is not possible to download the input data file from the "
1489                 f"job {job}, build {build[u'build']}, or it is damaged. "
1490                 f"Skipped."
1491             )
1492         if success:
1493             logging.info(f"  Processing data from build {build[u'build']}")
1494             data = self._parse_tests(job, build)
1495             if data is None:
1496                 logging.error(
1497                     f"Input data file from the job {job}, build "
1498                     f"{build[u'build']} is damaged. Skipped."
1499                 )
1500             else:
1501                 state = u"processed"
1502
1503             try:
1504                 remove(build[u"file-name"])
1505             except OSError as err:
1506                 logging.error(
1507                     f"Cannot remove the file {build[u'file-name']}: {repr(err)}"
1508                 )
1509
1510         # If the time-period is defined in the specification file, remove all
1511         # files which are outside the time period.
1512         is_last = False
1513         timeperiod = self._cfg.environment.get(u"time-period", None)
1514         if timeperiod and data:
1515             now = dt.utcnow()
1516             timeperiod = timedelta(int(timeperiod))
1517             metadata = data.get(u"metadata", None)
1518             if metadata:
1519                 generated = metadata.get(u"generated", None)
1520                 if generated:
1521                     generated = dt.strptime(generated, u"%Y%m%d %H:%M")
1522                     if (now - generated) > timeperiod:
1523                         # Remove the data and the file:
1524                         state = u"removed"
1525                         data = None
1526                         is_last = True
1527                         logging.info(
1528                             f"  The build {job}/{build[u'build']} is "
1529                             f"outdated, will be removed."
1530                         )
1531         return {
1532             u"data": data,
1533             u"state": state,
1534             u"job": job,
1535             u"build": build,
1536             u"last": is_last
1537         }
1538
1539     def download_and_parse_data(self, repeat=1):
1540         """Download the input data files, parse input data from input files and
1541         store in pandas' Series.
1542
1543         :param repeat: Repeat the download specified number of times if not
1544             successful.
1545         :type repeat: int
1546         """
1547
1548         logging.info(u"Downloading and parsing input files ...")
1549
1550         for job, builds in self._cfg.input.items():
1551             for build in builds:
1552
1553                 result = self._download_and_parse_build(job, build, repeat)
1554                 if result[u"last"]:
1555                     break
1556                 build_nr = result[u"build"][u"build"]
1557
1558                 if result[u"data"]:
1559                     data = result[u"data"]
1560                     build_data = pd.Series({
1561                         u"metadata": pd.Series(
1562                             list(data[u"metadata"].values()),
1563                             index=list(data[u"metadata"].keys())
1564                         ),
1565                         u"suites": pd.Series(
1566                             list(data[u"suites"].values()),
1567                             index=list(data[u"suites"].keys())
1568                         ),
1569                         u"tests": pd.Series(
1570                             list(data[u"tests"].values()),
1571                             index=list(data[u"tests"].keys())
1572                         )
1573                     })
1574
1575                     if self._input_data.get(job, None) is None:
1576                         self._input_data[job] = pd.Series()
1577                     self._input_data[job][str(build_nr)] = build_data
1578                     self._cfg.set_input_file_name(
1579                         job, build_nr, result[u"build"][u"file-name"]
1580                     )
1581                 self._cfg.set_input_state(job, build_nr, result[u"state"])
1582
1583                 mem_alloc = \
1584                     resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1000
1585                 logging.info(f"Memory allocation: {mem_alloc:.0f}MB")
1586
1587         logging.info(u"Done.")
1588
1589         msg = f"Successful downloads from the sources:\n"
1590         for source in self._cfg.environment[u"data-sources"]:
1591             if source[u"successful-downloads"]:
1592                 msg += (
1593                     f"{source[u'url']}/{source[u'path']}/"
1594                     f"{source[u'file-name']}: "
1595                     f"{source[u'successful-downloads']}\n"
1596                 )
1597         logging.info(msg)
1598
1599     def process_local_file(self, local_file, job=u"local", build_nr=1,
1600                            replace=True):
1601         """Process local XML file given as a command-line parameter.
1602
1603         :param local_file: The file to process.
1604         :param job: Job name.
1605         :param build_nr: Build number.
1606         :param replace: If True, the information about jobs and builds is
1607             replaced by the new one, otherwise the new jobs and builds are
1608             added.
1609         :type local_file: str
1610         :type job: str
1611         :type build_nr: int
1612         :type replace: bool
1613         :raises: PresentationError if an error occurs.
1614         """
1615         if not isfile(local_file):
1616             raise PresentationError(f"The file {local_file} does not exist.")
1617
1618         try:
1619             build_nr = int(local_file.split(u"/")[-1].split(u".")[0])
1620         except (IndexError, ValueError):
1621             pass
1622
1623         build = {
1624             u"build": build_nr,
1625             u"status": u"failed",
1626             u"file-name": local_file
1627         }
1628         if replace:
1629             self._cfg.input = dict()
1630         self._cfg.add_build(job, build)
1631
1632         logging.info(f"Processing {job}: {build_nr:2d}: {local_file}")
1633         data = self._parse_tests(job, build)
1634         if data is None:
1635             raise PresentationError(
1636                 f"Error occurred while parsing the file {local_file}"
1637             )
1638
1639         build_data = pd.Series({
1640             u"metadata": pd.Series(
1641                 list(data[u"metadata"].values()),
1642                 index=list(data[u"metadata"].keys())
1643             ),
1644             u"suites": pd.Series(
1645                 list(data[u"suites"].values()),
1646                 index=list(data[u"suites"].keys())
1647             ),
1648             u"tests": pd.Series(
1649                 list(data[u"tests"].values()),
1650                 index=list(data[u"tests"].keys())
1651             )
1652         })
1653
1654         if self._input_data.get(job, None) is None:
1655             self._input_data[job] = pd.Series()
1656         self._input_data[job][str(build_nr)] = build_data
1657
1658         self._cfg.set_input_state(job, build_nr, u"processed")
1659
1660     def process_local_directory(self, local_dir, replace=True):
1661         """Process local directory with XML file(s). The directory is processed
1662         as a 'job' and the XML files in it as builds.
1663         If the given directory contains only sub-directories, these
1664         sub-directories processed as jobs and corresponding XML files as builds
1665         of their job.
1666
1667         :param local_dir: Local directory to process.
1668         :param replace: If True, the information about jobs and builds is
1669             replaced by the new one, otherwise the new jobs and builds are
1670             added.
1671         :type local_dir: str
1672         :type replace: bool
1673         """
1674         if not isdir(local_dir):
1675             raise PresentationError(
1676                 f"The directory {local_dir} does not exist."
1677             )
1678
1679         # Check if the given directory includes only files, or only directories
1680         _, dirnames, filenames = next(walk(local_dir))
1681
1682         if filenames and not dirnames:
1683             filenames.sort()
1684             # local_builds:
1685             # key: dir (job) name, value: list of file names (builds)
1686             local_builds = {
1687                 local_dir: [join(local_dir, name) for name in filenames]
1688             }
1689
1690         elif dirnames and not filenames:
1691             dirnames.sort()
1692             # local_builds:
1693             # key: dir (job) name, value: list of file names (builds)
1694             local_builds = dict()
1695             for dirname in dirnames:
1696                 builds = [
1697                     join(local_dir, dirname, name)
1698                     for name in listdir(join(local_dir, dirname))
1699                     if isfile(join(local_dir, dirname, name))
1700                 ]
1701                 if builds:
1702                     local_builds[dirname] = sorted(builds)
1703
1704         elif not filenames and not dirnames:
1705             raise PresentationError(f"The directory {local_dir} is empty.")
1706         else:
1707             raise PresentationError(
1708                 f"The directory {local_dir} can include only files or only "
1709                 f"directories, not both.\nThe directory {local_dir} includes "
1710                 f"file(s):\n{filenames}\nand directories:\n{dirnames}"
1711             )
1712
1713         if replace:
1714             self._cfg.input = dict()
1715
1716         for job, files in local_builds.items():
1717             for idx, local_file in enumerate(files):
1718                 self.process_local_file(local_file, job, idx + 1, replace=False)
1719
1720     @staticmethod
1721     def _end_of_tag(tag_filter, start=0, closer=u"'"):
1722         """Return the index of character in the string which is the end of tag.
1723
1724         :param tag_filter: The string where the end of tag is being searched.
1725         :param start: The index where the searching is stated.
1726         :param closer: The character which is the tag closer.
1727         :type tag_filter: str
1728         :type start: int
1729         :type closer: str
1730         :returns: The index of the tag closer.
1731         :rtype: int
1732         """
1733         try:
1734             idx_opener = tag_filter.index(closer, start)
1735             return tag_filter.index(closer, idx_opener + 1)
1736         except ValueError:
1737             return None
1738
1739     @staticmethod
1740     def _condition(tag_filter):
1741         """Create a conditional statement from the given tag filter.
1742
1743         :param tag_filter: Filter based on tags from the element specification.
1744         :type tag_filter: str
1745         :returns: Conditional statement which can be evaluated.
1746         :rtype: str
1747         """
1748         index = 0
1749         while True:
1750             index = InputData._end_of_tag(tag_filter, index)
1751             if index is None:
1752                 return tag_filter
1753             index += 1
1754             tag_filter = tag_filter[:index] + u" in tags" + tag_filter[index:]
1755
1756     def filter_data(self, element, params=None, data=None, data_set=u"tests",
1757                     continue_on_error=False):
1758         """Filter required data from the given jobs and builds.
1759
1760         The output data structure is:
1761         - job 1
1762           - build 1
1763             - test (or suite) 1 ID:
1764               - param 1
1765               - param 2
1766               ...
1767               - param n
1768             ...
1769             - test (or suite) n ID:
1770             ...
1771           ...
1772           - build n
1773         ...
1774         - job n
1775
1776         :param element: Element which will use the filtered data.
1777         :param params: Parameters which will be included in the output. If None,
1778             all parameters are included.
1779         :param data: If not None, this data is used instead of data specified
1780             in the element.
1781         :param data_set: The set of data to be filtered: tests, suites,
1782             metadata.
1783         :param continue_on_error: Continue if there is error while reading the
1784             data. The Item will be empty then
1785         :type element: pandas.Series
1786         :type params: list
1787         :type data: dict
1788         :type data_set: str
1789         :type continue_on_error: bool
1790         :returns: Filtered data.
1791         :rtype pandas.Series
1792         """
1793
1794         try:
1795             if data_set == "suites":
1796                 cond = u"True"
1797             elif element[u"filter"] in (u"all", u"template"):
1798                 cond = u"True"
1799             else:
1800                 cond = InputData._condition(element[u"filter"])
1801             logging.debug(f"   Filter: {cond}")
1802         except KeyError:
1803             logging.error(u"  No filter defined.")
1804             return None
1805
1806         if params is None:
1807             params = element.get(u"parameters", None)
1808             if params:
1809                 params.extend((u"type", u"status"))
1810
1811         data_to_filter = data if data else element[u"data"]
1812         data = pd.Series()
1813         try:
1814             for job, builds in data_to_filter.items():
1815                 data[job] = pd.Series()
1816                 for build in builds:
1817                     data[job][str(build)] = pd.Series()
1818                     try:
1819                         data_dict = dict(
1820                             self.data[job][str(build)][data_set].items())
1821                     except KeyError:
1822                         if continue_on_error:
1823                             continue
1824                         return None
1825
1826                     for test_id, test_data in data_dict.items():
1827                         if eval(cond, {u"tags": test_data.get(u"tags", u"")}):
1828                             data[job][str(build)][test_id] = pd.Series()
1829                             if params is None:
1830                                 for param, val in test_data.items():
1831                                     data[job][str(build)][test_id][param] = val
1832                             else:
1833                                 for param in params:
1834                                     try:
1835                                         data[job][str(build)][test_id][param] =\
1836                                             test_data[param]
1837                                     except KeyError:
1838                                         data[job][str(build)][test_id][param] =\
1839                                             u"No Data"
1840             return data
1841
1842         except (KeyError, IndexError, ValueError) as err:
1843             logging.error(
1844                 f"Missing mandatory parameter in the element specification: "
1845                 f"{repr(err)}"
1846             )
1847             return None
1848         except AttributeError as err:
1849             logging.error(repr(err))
1850             return None
1851         except SyntaxError as err:
1852             logging.error(
1853                 f"The filter {cond} is not correct. Check if all tags are "
1854                 f"enclosed by apostrophes.\n{repr(err)}"
1855             )
1856             return None
1857
1858     def filter_tests_by_name(self, element, params=None, data_set=u"tests",
1859                              continue_on_error=False):
1860         """Filter required data from the given jobs and builds.
1861
1862         The output data structure is:
1863         - job 1
1864           - build 1
1865             - test (or suite) 1 ID:
1866               - param 1
1867               - param 2
1868               ...
1869               - param n
1870             ...
1871             - test (or suite) n ID:
1872             ...
1873           ...
1874           - build n
1875         ...
1876         - job n
1877
1878         :param element: Element which will use the filtered data.
1879         :param params: Parameters which will be included in the output. If None,
1880         all parameters are included.
1881         :param data_set: The set of data to be filtered: tests, suites,
1882         metadata.
1883         :param continue_on_error: Continue if there is error while reading the
1884         data. The Item will be empty then
1885         :type element: pandas.Series
1886         :type params: list
1887         :type data_set: str
1888         :type continue_on_error: bool
1889         :returns: Filtered data.
1890         :rtype pandas.Series
1891         """
1892
1893         include = element.get(u"include", None)
1894         if not include:
1895             logging.warning(u"No tests to include, skipping the element.")
1896             return None
1897
1898         if params is None:
1899             params = element.get(u"parameters", None)
1900             if params and u"type" not in params:
1901                 params.append(u"type")
1902
1903         cores = element.get(u"core", None)
1904         if cores:
1905             tests = list()
1906             for core in cores:
1907                 for test in include:
1908                     tests.append(test.format(core=core))
1909         else:
1910             tests = include
1911
1912         data = pd.Series()
1913         try:
1914             for job, builds in element[u"data"].items():
1915                 data[job] = pd.Series()
1916                 for build in builds:
1917                     data[job][str(build)] = pd.Series()
1918                     for test in tests:
1919                         try:
1920                             reg_ex = re.compile(str(test).lower())
1921                             for test_id in self.data[job][
1922                                     str(build)][data_set].keys():
1923                                 if re.match(reg_ex, str(test_id).lower()):
1924                                     test_data = self.data[job][
1925                                         str(build)][data_set][test_id]
1926                                     data[job][str(build)][test_id] = pd.Series()
1927                                     if params is None:
1928                                         for param, val in test_data.items():
1929                                             data[job][str(build)][test_id]\
1930                                                 [param] = val
1931                                     else:
1932                                         for param in params:
1933                                             try:
1934                                                 data[job][str(build)][
1935                                                     test_id][param] = \
1936                                                     test_data[param]
1937                                             except KeyError:
1938                                                 data[job][str(build)][
1939                                                     test_id][param] = u"No Data"
1940                         except KeyError as err:
1941                             if continue_on_error:
1942                                 logging.debug(repr(err))
1943                                 continue
1944                             logging.error(repr(err))
1945                             return None
1946             return data
1947
1948         except (KeyError, IndexError, ValueError) as err:
1949             logging.error(
1950                 f"Missing mandatory parameter in the element "
1951                 f"specification: {repr(err)}"
1952             )
1953             return None
1954         except AttributeError as err:
1955             logging.error(repr(err))
1956             return None
1957
1958     @staticmethod
1959     def merge_data(data):
1960         """Merge data from more jobs and builds to a simple data structure.
1961
1962         The output data structure is:
1963
1964         - test (suite) 1 ID:
1965           - param 1
1966           - param 2
1967           ...
1968           - param n
1969         ...
1970         - test (suite) n ID:
1971         ...
1972
1973         :param data: Data to merge.
1974         :type data: pandas.Series
1975         :returns: Merged data.
1976         :rtype: pandas.Series
1977         """
1978
1979         logging.info(u"    Merging data ...")
1980
1981         merged_data = pd.Series()
1982         for builds in data.values:
1983             for item in builds.values:
1984                 for item_id, item_data in item.items():
1985                     merged_data[item_id] = item_data
1986         return merged_data
1987
1988     def print_all_oper_data(self):
1989         """Print all operational data to console.
1990         """
1991
1992         for job in self._input_data.values:
1993             for build in job.values:
1994                 for test_id, test_data in build[u"tests"].items():
1995                     print(f"{test_id}")
1996                     if test_data.get(u"show-run", None) is None:
1997                         continue
1998                     for dut_name, data in test_data[u"show-run"].items():
1999                         if data.get(u"runtime", None) is None:
2000                             continue
2001                         try:
2002                             threads_nr = len(data[u"runtime"][0][u"clocks"])
2003                         except (IndexError, KeyError):
2004                             continue
2005                         threads = OrderedDict(
2006                             {idx: list() for idx in range(threads_nr)})
2007                         for item in data[u"runtime"]:
2008                             for idx in range(threads_nr):
2009                                 if item[u"vectors"][idx] > 0:
2010                                     clocks = item[u"clocks"][idx] / \
2011                                              item[u"vectors"][idx]
2012                                 elif item[u"calls"][idx] > 0:
2013                                     clocks = item[u"clocks"][idx] / \
2014                                              item[u"calls"][idx]
2015                                 elif item[u"suspends"][idx] > 0:
2016                                     clocks = item[u"clocks"][idx] / \
2017                                              item[u"suspends"][idx]
2018                                 else:
2019                                     clocks = 0.0
2020
2021                                 if item[u"calls"][idx] > 0:
2022                                     vectors_call = item[u"vectors"][idx] / \
2023                                                    item[u"calls"][idx]
2024                                 else:
2025                                     vectors_call = 0.0
2026
2027                                 if int(item[u"calls"][idx]) + int(
2028                                     item[u"vectors"][idx]) + \
2029                                     int(item[u"suspends"][idx]):
2030                                     threads[idx].append([
2031                                         item[u"name"],
2032                                         item[u"calls"][idx],
2033                                         item[u"vectors"][idx],
2034                                         item[u"suspends"][idx],
2035                                         clocks,
2036                                         vectors_call
2037                                     ])
2038
2039                         print(f"Host IP: {data.get(u'host', '')}, "
2040                               f"Socket: {data.get(u'socket', '')}")
2041                         for thread_nr, thread in threads.items():
2042                             txt_table = prettytable.PrettyTable(
2043                                 (
2044                                     u"Name",
2045                                     u"Nr of Vectors",
2046                                     u"Nr of Packets",
2047                                     u"Suspends",
2048                                     u"Cycles per Packet",
2049                                     u"Average Vector Size"
2050                                 )
2051                             )
2052                             avg = 0.0
2053                             for row in thread:
2054                                 txt_table.add_row(row)
2055                                 avg += row[-1]
2056                             if len(thread) == 0:
2057                                 avg = u""
2058                             else:
2059                                 avg = f", Average Vector Size per Node: " \
2060                                       f"{(avg / len(thread)):.2f}"
2061                             th_name = u"main" if thread_nr == 0 \
2062                                 else f"worker_{thread_nr}"
2063                             print(f"{dut_name}, {th_name}{avg}")
2064                             txt_table.float_format = u".2"
2065                             txt_table.align = u"r"
2066                             txt_table.align[u"Name"] = u"l"
2067                             print(f"{txt_table.get_string()}\n")