PAL: Add HDRH data to detailed test results
[csit.git] / resources / tools / presentation / input_data_parser.py
1 # Copyright (c) 2019 Cisco and/or its affiliates.
2 # Licensed under the Apache License, Version 2.0 (the "License");
3 # you may not use this file except in compliance with the License.
4 # You may obtain a copy of the License at:
5 #
6 #     http://www.apache.org/licenses/LICENSE-2.0
7 #
8 # Unless required by applicable law or agreed to in writing, software
9 # distributed under the License is distributed on an "AS IS" BASIS,
10 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 # See the License for the specific language governing permissions and
12 # limitations under the License.
13
14 """Data pre-processing
15
16 - extract data from output.xml files generated by Jenkins jobs and store in
17   pandas' Series,
18 - provide access to the data.
19 - filter the data using tags,
20 """
21
22 import re
23 import copy
24 import resource
25 import logging
26
27 from collections import OrderedDict
28 from os import remove
29 from datetime import datetime as dt
30 from datetime import timedelta
31 from json import loads
32
33 import hdrh.histogram
34 import hdrh.codec
35 import prettytable
36 import pandas as pd
37
38 from robot.api import ExecutionResult, ResultVisitor
39 from robot import errors
40
41 from resources.libraries.python import jumpavg
42 from input_data_files import download_and_unzip_data_file
43
44
45 # Separator used in file names
46 SEPARATOR = u"__"
47
48
49 class ExecutionChecker(ResultVisitor):
50     """Class to traverse through the test suite structure.
51
52     The functionality implemented in this class generates a json structure:
53
54     Performance tests:
55
56     {
57         "metadata": {
58             "generated": "Timestamp",
59             "version": "SUT version",
60             "job": "Jenkins job name",
61             "build": "Information about the build"
62         },
63         "suites": {
64             "Suite long name 1": {
65                 "name": Suite name,
66                 "doc": "Suite 1 documentation",
67                 "parent": "Suite 1 parent",
68                 "level": "Level of the suite in the suite hierarchy"
69             }
70             "Suite long name N": {
71                 "name": Suite name,
72                 "doc": "Suite N documentation",
73                 "parent": "Suite 2 parent",
74                 "level": "Level of the suite in the suite hierarchy"
75             }
76         }
77         "tests": {
78             # NDRPDR tests:
79             "ID": {
80                 "name": "Test name",
81                 "parent": "Name of the parent of the test",
82                 "doc": "Test documentation",
83                 "msg": "Test message",
84                 "conf-history": "DUT1 and DUT2 VAT History",
85                 "show-run": "Show Run",
86                 "tags": ["tag 1", "tag 2", "tag n"],
87                 "type": "NDRPDR",
88                 "status": "PASS" | "FAIL",
89                 "throughput": {
90                     "NDR": {
91                         "LOWER": float,
92                         "UPPER": float
93                     },
94                     "PDR": {
95                         "LOWER": float,
96                         "UPPER": float
97                     }
98                 },
99                 "latency": {
100                     "NDR": {
101                         "direction1": {
102                             "min": float,
103                             "avg": float,
104                             "max": float,
105                             "hdrh": str
106                         },
107                         "direction2": {
108                             "min": float,
109                             "avg": float,
110                             "max": float,
111                             "hdrh": str
112                         }
113                     },
114                     "PDR": {
115                         "direction1": {
116                             "min": float,
117                             "avg": float,
118                             "max": float,
119                             "hdrh": str
120                         },
121                         "direction2": {
122                             "min": float,
123                             "avg": float,
124                             "max": float,
125                             "hdrh": str
126                         }
127                     }
128                 }
129             }
130
131             # TCP tests:
132             "ID": {
133                 "name": "Test name",
134                 "parent": "Name of the parent of the test",
135                 "doc": "Test documentation",
136                 "msg": "Test message",
137                 "tags": ["tag 1", "tag 2", "tag n"],
138                 "type": "TCP",
139                 "status": "PASS" | "FAIL",
140                 "result": int
141             }
142
143             # MRR, BMRR tests:
144             "ID": {
145                 "name": "Test name",
146                 "parent": "Name of the parent of the test",
147                 "doc": "Test documentation",
148                 "msg": "Test message",
149                 "tags": ["tag 1", "tag 2", "tag n"],
150                 "type": "MRR" | "BMRR",
151                 "status": "PASS" | "FAIL",
152                 "result": {
153                     "receive-rate": float,
154                     # Average of a list, computed using AvgStdevStats.
155                     # In CSIT-1180, replace with List[float].
156                 }
157             }
158
159             "ID" {
160                 # next test
161             }
162         }
163     }
164
165
166     Functional tests:
167
168     {
169         "metadata": {  # Optional
170             "version": "VPP version",
171             "job": "Jenkins job name",
172             "build": "Information about the build"
173         },
174         "suites": {
175             "Suite name 1": {
176                 "doc": "Suite 1 documentation",
177                 "parent": "Suite 1 parent",
178                 "level": "Level of the suite in the suite hierarchy"
179             }
180             "Suite name N": {
181                 "doc": "Suite N documentation",
182                 "parent": "Suite 2 parent",
183                 "level": "Level of the suite in the suite hierarchy"
184             }
185         }
186         "tests": {
187             "ID": {
188                 "name": "Test name",
189                 "parent": "Name of the parent of the test",
190                 "doc": "Test documentation"
191                 "msg": "Test message"
192                 "tags": ["tag 1", "tag 2", "tag n"],
193                 "conf-history": "DUT1 and DUT2 VAT History"
194                 "show-run": "Show Run"
195                 "status": "PASS" | "FAIL"
196             },
197             "ID" {
198                 # next test
199             }
200         }
201     }
202
203     .. note:: ID is the lowercase full path to the test.
204     """
205
206     REGEX_PLR_RATE = re.compile(
207         r'PLRsearch lower bound::?\s(\d+.\d+).*\n'
208         r'PLRsearch upper bound::?\s(\d+.\d+)'
209     )
210     REGEX_NDRPDR_RATE = re.compile(
211         r'NDR_LOWER:\s(\d+.\d+).*\n.*\n'
212         r'NDR_UPPER:\s(\d+.\d+).*\n'
213         r'PDR_LOWER:\s(\d+.\d+).*\n.*\n'
214         r'PDR_UPPER:\s(\d+.\d+)'
215     )
216     REGEX_PERF_MSG_INFO = re.compile(
217         r'NDR_LOWER:\s(\d+.\d+)\s([a-zA-Z]*).*\s(\d+.\d+)\s([a-zA-Z]*).*\n'
218         r'LATENCY.*\[\'(.*)\', \'(.*)\'\].*\n'
219         r'NDR_UPPER:\s(\d+.\d+)\s([a-zA-Z]*).*\s(\d+.\d+)\s([a-zA-Z]*).*\n'
220         r'PDR_LOWER:\s(\d+.\d+)\s([a-zA-Z]*).*\s(\d+.\d+)\s([a-zA-Z]*).*\n'
221         r'LATENCY.*\[\'(.*)\', \'(.*)\'\].*\n'
222         r'PDR_UPPER:\s(\d+.\d+)\s([a-zA-Z]*).*\s(\d+.\d+)\s([a-zA-Z]*)'
223     )
224     # TODO: Remove when not needed
225     REGEX_NDRPDR_LAT_BASE = re.compile(
226         r'LATENCY.*\[\'(.*)\', \'(.*)\'\]\s\n.*\n.*\n'
227         r'LATENCY.*\[\'(.*)\', \'(.*)\'\]'
228     )
229     REGEX_NDRPDR_LAT = re.compile(
230         r'LATENCY.*\[\'(.*)\', \'(.*)\'\]\s\n.*\n.*\n'
231         r'LATENCY.*\[\'(.*)\', \'(.*)\'\]\s\n.*\n'
232         r'Latency.*\[\'(.*)\', \'(.*)\'\]\s\n'
233         r'Latency.*\[\'(.*)\', \'(.*)\'\]\s\n'
234         r'Latency.*\[\'(.*)\', \'(.*)\'\]\s\n'
235         r'Latency.*\[\'(.*)\', \'(.*)\'\]'
236     )
237     # TODO: Remove when not needed
238     REGEX_NDRPDR_LAT_LONG = re.compile(
239         r'LATENCY.*\[\'(.*)\', \'(.*)\'\]\s\n.*\n.*\n'
240         r'LATENCY.*\[\'(.*)\', \'(.*)\'\]\s\n.*\n'
241         r'Latency.*\[\'(.*)\', \'(.*)\'\]\s\n'
242         r'Latency.*\[\'(.*)\', \'(.*)\'\]\s\n'
243         r'Latency.*\[\'(.*)\', \'(.*)\'\]\s\n'
244         r'Latency.*\[\'(.*)\', \'(.*)\'\]\s\n'
245         r'Latency.*\[\'(.*)\', \'(.*)\'\]\s\n'
246         r'Latency.*\[\'(.*)\', \'(.*)\'\]\s\n'
247         r'Latency.*\[\'(.*)\', \'(.*)\'\]'
248     )
249     REGEX_VERSION_VPP = re.compile(
250         r"(return STDOUT Version:\s*|"
251         r"VPP Version:\s*|VPP version:\s*)(.*)"
252     )
253     REGEX_VERSION_DPDK = re.compile(
254         r"(DPDK version:\s*|DPDK Version:\s*)(.*)"
255     )
256     REGEX_TCP = re.compile(
257         r'Total\s(rps|cps|throughput):\s(\d*).*$'
258     )
259     REGEX_MRR = re.compile(
260         r'MaxReceivedRate_Results\s\[pkts/(\d*)sec\]:\s'
261         r'tx\s(\d*),\srx\s(\d*)'
262     )
263     REGEX_BMRR = re.compile(
264         r'Maximum Receive Rate trial results'
265         r' in packets per second: \[(.*)\]'
266     )
267     REGEX_RECONF_LOSS = re.compile(
268         r'Packets lost due to reconfig: (\d*)'
269     )
270     REGEX_RECONF_TIME = re.compile(
271         r'Implied time lost: (\d*.[\de-]*)'
272     )
273     REGEX_TC_TAG = re.compile(r'\d+[tT]\d+[cC]')
274
275     REGEX_TC_NAME_OLD = re.compile(r'-\d+[tT]\d+[cC]-')
276
277     REGEX_TC_NAME_NEW = re.compile(r'-\d+[cC]-')
278
279     REGEX_TC_NUMBER = re.compile(r'tc\d{2}-')
280
281     REGEX_TC_PAPI_CLI = re.compile(r'.*\((\d+.\d+.\d+.\d+.) - (.*)\)')
282
283     def __init__(self, metadata, mapping, ignore):
284         """Initialisation.
285
286         :param metadata: Key-value pairs to be included in "metadata" part of
287             JSON structure.
288         :param mapping: Mapping of the old names of test cases to the new
289             (actual) one.
290         :param ignore: List of TCs to be ignored.
291         :type metadata: dict
292         :type mapping: dict
293         :type ignore: list
294         """
295
296         # Type of message to parse out from the test messages
297         self._msg_type = None
298
299         # VPP version
300         self._version = None
301
302         # Timestamp
303         self._timestamp = None
304
305         # Testbed. The testbed is identified by TG node IP address.
306         self._testbed = None
307
308         # Mapping of TCs long names
309         self._mapping = mapping
310
311         # Ignore list
312         self._ignore = ignore
313
314         # Number of PAPI History messages found:
315         # 0 - no message
316         # 1 - PAPI History of DUT1
317         # 2 - PAPI History of DUT2
318         self._conf_history_lookup_nr = 0
319
320         # Test ID of currently processed test- the lowercase full path to the
321         # test
322         self._test_id = None
323
324         # The main data structure
325         self._data = {
326             u"metadata": OrderedDict(),
327             u"suites": OrderedDict(),
328             u"tests": OrderedDict()
329         }
330
331         # Save the provided metadata
332         for key, val in metadata.items():
333             self._data[u"metadata"][key] = val
334
335         # Dictionary defining the methods used to parse different types of
336         # messages
337         self.parse_msg = {
338             u"timestamp": self._get_timestamp,
339             u"vpp-version": self._get_vpp_version,
340             u"dpdk-version": self._get_dpdk_version,
341             # TODO: Remove when not needed:
342             u"teardown-vat-history": self._get_vat_history,
343             u"teardown-papi-history": self._get_papi_history,
344             u"test-show-runtime": self._get_show_run,
345             u"testbed": self._get_testbed
346         }
347
348     @property
349     def data(self):
350         """Getter - Data parsed from the XML file.
351
352         :returns: Data parsed from the XML file.
353         :rtype: dict
354         """
355         return self._data
356
357     def _get_data_from_perf_test_msg(self, msg):
358         """Get
359             - NDR_LOWER
360             - LATENCY
361             - NDR_UPPER
362             - PDR_LOWER
363             - LATENCY
364             - PDR_UPPER
365         from message of NDRPDR performance tests.
366
367         :param msg: Message to be processed.
368         :type msg: str
369         :returns: Processed message or original message if a problem occurs.
370         :rtype: str
371         """
372
373         groups = re.search(self.REGEX_PERF_MSG_INFO, msg)
374         if not groups or groups.lastindex != 20:
375             return msg
376
377         try:
378             data = {
379                 u"ndr_low": float(groups.group(1)),
380                 u"ndr_low_unit": groups.group(2),
381                 u"ndr_low_b": float(groups.group(3)),
382                 u"ndr_low_b_unit": groups.group(4),
383                 u"ndr_lat_1": groups.group(5),
384                 u"ndr_lat_2": groups.group(6),
385                 u"ndr_up": float(groups.group(7)),
386                 u"ndr_up_unit": groups.group(8),
387                 u"ndr_up_b": float(groups.group(9)),
388                 u"ndr_up_b_unit": groups.group(10),
389                 u"pdr_low": float(groups.group(11)),
390                 u"pdr_low_unit": groups.group(12),
391                 u"pdr_low_b": float(groups.group(13)),
392                 u"pdr_low_b_unit": groups.group(14),
393                 u"pdr_lat_1": groups.group(15),
394                 u"pdr_lat_2": groups.group(16),
395                 u"pdr_up": float(groups.group(17)),
396                 u"pdr_up_unit": groups.group(18),
397                 u"pdr_up_b": float(groups.group(19)),
398                 u"pdr_up_b_unit": groups.group(20)
399             }
400         except (AttributeError, IndexError, ValueError, KeyError):
401             return msg
402
403         def _process_lat(in_str_1, in_str_2):
404             """Extract min, avg, max values from latency string.
405
406             :param in_str_1: Latency string for one direction produced by robot
407                 framework.
408             :param in_str_2: Latency string for second direction produced by
409                 robot framework.
410             :type in_str_1: str
411             :type in_str_2: str
412             :returns: Processed latency string or original string if a problem
413                 occurs.
414             :rtype: tuple(str, str)
415             """
416             in_list_1 = in_str_1.split('/', 3)
417             if len(in_list_1) < 3:
418                 return u"Not Measured.", u"Not Measured."
419
420             in_list_2 = in_str_2.split('/', 3)
421             if len(in_list_2) < 3:
422                 return u"Not Measured.", u"Not Measured."
423
424             hdr_lat_1 = u""
425             if len(in_list_1) == 4:
426                 in_list_1[3] += u"=" * (len(in_list_1[3]) % 4)
427                 try:
428                     hdr_lat_1 = hdrh.histogram.HdrHistogram.decode(in_list_1[3])
429                 except hdrh.codec.HdrLengthException:
430                     pass
431             hdr_lat_2 = u""
432             if len(in_list_2) == 4:
433                 in_list_2[3] += u"=" * (len(in_list_2[3]) % 4)
434                 try:
435                     hdr_lat_2 = hdrh.histogram.HdrHistogram.decode(in_list_2[3])
436                 except hdrh.codec.HdrLengthException:
437                     pass
438
439             hdr_lat = u"Not Measured."
440             if hdr_lat_1 and hdr_lat_2:
441                 hdr_lat = (
442                     f"50%/90%/99%/99.9%, "
443                     f"{hdr_lat_1.get_value_at_percentile(50.0)}/"
444                     f"{hdr_lat_1.get_value_at_percentile(90.0)}/"
445                     f"{hdr_lat_1.get_value_at_percentile(99.0)}/"
446                     f"{hdr_lat_1.get_value_at_percentile(99.9)}, "
447                     f"{hdr_lat_2.get_value_at_percentile(50.0)}/"
448                     f"{hdr_lat_2.get_value_at_percentile(90.0)}/"
449                     f"{hdr_lat_2.get_value_at_percentile(99.0)}/"
450                     f"{hdr_lat_2.get_value_at_percentile(99.9)} "
451                     f"uSec."
452                 )
453
454             return (
455                 f"Min/Avg/Max, "
456                 f"{in_list_1[0]}/{in_list_1[1]}/{in_list_1[2]}, "
457                 f"{in_list_2[0]}/{in_list_2[1]}/{in_list_2[2]} uSec.",
458                 hdr_lat
459             )
460
461         try:
462             pdr_lat = _process_lat(data[u'pdr_lat_1'], data[u'pdr_lat_2'])
463             ndr_lat = _process_lat(data[u'ndr_lat_1'], data[u'ndr_lat_2'])
464             return (
465                 f"NDR Throughput: {(data[u'ndr_low'] / 1e6):.2f} "
466                 f"M{data[u'ndr_low_unit']}, "
467                 f"{data[u'ndr_low_b']:.2f} {data[u'ndr_low_b_unit']}.\n"
468                 f"One-Way Latency at NDR: {ndr_lat[0]}\n"
469                 f"One-Way Latency at NDR by percentiles: {ndr_lat[1]}\n"
470                 f"PDR Throughput: {(data[u'pdr_low'] / 1e6):.2f} "
471                 f"M{data[u'pdr_low_unit']}, "
472                 f"{data[u'pdr_low_b']:.2f} {data[u'pdr_low_b_unit']}.\n"
473                 f"One-Way Latency at PDR: {pdr_lat[0]}\n"
474                 f"One-Way Latency at PDR by percentiles: {pdr_lat[1]}"
475             )
476         except (AttributeError, IndexError, ValueError, KeyError):
477             return msg
478
479     def _get_testbed(self, msg):
480         """Called when extraction of testbed IP is required.
481         The testbed is identified by TG node IP address.
482
483         :param msg: Message to process.
484         :type msg: Message
485         :returns: Nothing.
486         """
487
488         if msg.message.count(u"Setup of TG node") or \
489                 msg.message.count(u"Setup of node TG host"):
490             reg_tg_ip = re.compile(
491                 r'.*TG .* (\d{1,3}.\d{1,3}.\d{1,3}.\d{1,3}).*')
492             try:
493                 self._testbed = str(re.search(reg_tg_ip, msg.message).group(1))
494             except (KeyError, ValueError, IndexError, AttributeError):
495                 pass
496             finally:
497                 self._data[u"metadata"][u"testbed"] = self._testbed
498                 self._msg_type = None
499
500     def _get_vpp_version(self, msg):
501         """Called when extraction of VPP version is required.
502
503         :param msg: Message to process.
504         :type msg: Message
505         :returns: Nothing.
506         """
507
508         if msg.message.count(u"return STDOUT Version:") or \
509             msg.message.count(u"VPP Version:") or \
510             msg.message.count(u"VPP version:"):
511             self._version = str(re.search(self.REGEX_VERSION_VPP, msg.message).
512                                 group(2))
513             self._data[u"metadata"][u"version"] = self._version
514             self._msg_type = None
515
516     def _get_dpdk_version(self, msg):
517         """Called when extraction of DPDK version is required.
518
519         :param msg: Message to process.
520         :type msg: Message
521         :returns: Nothing.
522         """
523
524         if msg.message.count(u"DPDK Version:"):
525             try:
526                 self._version = str(re.search(
527                     self.REGEX_VERSION_DPDK, msg.message).group(2))
528                 self._data[u"metadata"][u"version"] = self._version
529             except IndexError:
530                 pass
531             finally:
532                 self._msg_type = None
533
534     def _get_timestamp(self, msg):
535         """Called when extraction of timestamp is required.
536
537         :param msg: Message to process.
538         :type msg: Message
539         :returns: Nothing.
540         """
541
542         self._timestamp = msg.timestamp[:14]
543         self._data[u"metadata"][u"generated"] = self._timestamp
544         self._msg_type = None
545
546     def _get_vat_history(self, msg):
547         """Called when extraction of VAT command history is required.
548
549         TODO: Remove when not needed.
550
551         :param msg: Message to process.
552         :type msg: Message
553         :returns: Nothing.
554         """
555         if msg.message.count(u"VAT command history:"):
556             self._conf_history_lookup_nr += 1
557             if self._conf_history_lookup_nr == 1:
558                 self._data[u"tests"][self._test_id][u"conf-history"] = str()
559             else:
560                 self._msg_type = None
561             text = re.sub(r"\d{1,3}.\d{1,3}.\d{1,3}.\d{1,3} "
562                           r"VAT command history:", u"",
563                           msg.message, count=1).replace(u'\n', u' |br| ').\
564                 replace(u'"', u"'")
565
566             self._data[u"tests"][self._test_id][u"conf-history"] += (
567                 f" |br| **DUT{str(self._conf_history_lookup_nr)}:** {text}"
568             )
569
570     def _get_papi_history(self, msg):
571         """Called when extraction of PAPI command history is required.
572
573         :param msg: Message to process.
574         :type msg: Message
575         :returns: Nothing.
576         """
577         if msg.message.count(u"PAPI command history:"):
578             self._conf_history_lookup_nr += 1
579             if self._conf_history_lookup_nr == 1:
580                 self._data[u"tests"][self._test_id][u"conf-history"] = str()
581             else:
582                 self._msg_type = None
583             text = re.sub(r"\d{1,3}.\d{1,3}.\d{1,3}.\d{1,3} "
584                           r"PAPI command history:", u"",
585                           msg.message, count=1).replace(u'\n', u' |br| ').\
586                 replace(u'"', u"'")
587             self._data[u"tests"][self._test_id][u"conf-history"] += (
588                 f" |br| **DUT{str(self._conf_history_lookup_nr)}:** {text}"
589             )
590
591     def _get_show_run(self, msg):
592         """Called when extraction of VPP operational data (output of CLI command
593         Show Runtime) is required.
594
595         :param msg: Message to process.
596         :type msg: Message
597         :returns: Nothing.
598         """
599
600         if not msg.message.count(u"stats runtime"):
601             return
602
603         if u"show-run" not in self._data[u"tests"][self._test_id].keys():
604             self._data[u"tests"][self._test_id][u"show-run"] = dict()
605
606         groups = re.search(self.REGEX_TC_PAPI_CLI, msg.message)
607         if not groups:
608             return
609         try:
610             host = groups.group(1)
611         except (AttributeError, IndexError):
612             host = u""
613         try:
614             sock = groups.group(2)
615         except (AttributeError, IndexError):
616             sock = u""
617
618         runtime = loads(str(msg.message).replace(u' ', u'').replace(u'\n', u'').
619                         replace(u"'", u'"').replace(u'b"', u'"').
620                         replace(u'u"', u'"').split(u":", 1)[1])
621
622         try:
623             threads_nr = len(runtime[0][u"clocks"])
624         except (IndexError, KeyError):
625             return
626
627         dut = u"DUT{nr}".format(
628             nr=len(self._data[u'tests'][self._test_id][u'show-run'].keys()) + 1)
629
630         oper = {
631             u"host": host,
632             u"socket": sock,
633             u"threads": OrderedDict({idx: list() for idx in range(threads_nr)})
634         }
635
636         for item in runtime:
637             for idx in range(threads_nr):
638                 if item[u"vectors"][idx] > 0:
639                     clocks = item[u"clocks"][idx] / item[u"vectors"][idx]
640                 elif item[u"calls"][idx] > 0:
641                     clocks = item[u"clocks"][idx] / item[u"calls"][idx]
642                 elif item[u"suspends"][idx] > 0:
643                     clocks = item[u"clocks"][idx] / item[u"suspends"][idx]
644                 else:
645                     clocks = 0.0
646
647                 if item[u"calls"][idx] > 0:
648                     vectors_call = item[u"vectors"][idx] / item[u"calls"][idx]
649                 else:
650                     vectors_call = 0.0
651
652                 if int(item[u"calls"][idx]) + int(item[u"vectors"][idx]) + \
653                         int(item[u"suspends"][idx]):
654                     oper[u"threads"][idx].append([
655                         item[u"name"],
656                         item[u"calls"][idx],
657                         item[u"vectors"][idx],
658                         item[u"suspends"][idx],
659                         clocks,
660                         vectors_call
661                     ])
662
663         self._data[u'tests'][self._test_id][u'show-run'][dut] = copy.copy(oper)
664
665     def _get_ndrpdr_throughput(self, msg):
666         """Get NDR_LOWER, NDR_UPPER, PDR_LOWER and PDR_UPPER from the test
667         message.
668
669         :param msg: The test message to be parsed.
670         :type msg: str
671         :returns: Parsed data as a dict and the status (PASS/FAIL).
672         :rtype: tuple(dict, str)
673         """
674
675         throughput = {
676             u"NDR": {u"LOWER": -1.0, u"UPPER": -1.0},
677             u"PDR": {u"LOWER": -1.0, u"UPPER": -1.0}
678         }
679         status = u"FAIL"
680         groups = re.search(self.REGEX_NDRPDR_RATE, msg)
681
682         if groups is not None:
683             try:
684                 throughput[u"NDR"][u"LOWER"] = float(groups.group(1))
685                 throughput[u"NDR"][u"UPPER"] = float(groups.group(2))
686                 throughput[u"PDR"][u"LOWER"] = float(groups.group(3))
687                 throughput[u"PDR"][u"UPPER"] = float(groups.group(4))
688                 status = u"PASS"
689             except (IndexError, ValueError):
690                 pass
691
692         return throughput, status
693
694     def _get_plr_throughput(self, msg):
695         """Get PLRsearch lower bound and PLRsearch upper bound from the test
696         message.
697
698         :param msg: The test message to be parsed.
699         :type msg: str
700         :returns: Parsed data as a dict and the status (PASS/FAIL).
701         :rtype: tuple(dict, str)
702         """
703
704         throughput = {
705             u"LOWER": -1.0,
706             u"UPPER": -1.0
707         }
708         status = u"FAIL"
709         groups = re.search(self.REGEX_PLR_RATE, msg)
710
711         if groups is not None:
712             try:
713                 throughput[u"LOWER"] = float(groups.group(1))
714                 throughput[u"UPPER"] = float(groups.group(2))
715                 status = u"PASS"
716             except (IndexError, ValueError):
717                 pass
718
719         return throughput, status
720
721     def _get_ndrpdr_latency(self, msg):
722         """Get LATENCY from the test message.
723
724         :param msg: The test message to be parsed.
725         :type msg: str
726         :returns: Parsed data as a dict and the status (PASS/FAIL).
727         :rtype: tuple(dict, str)
728         """
729         latency_default = {
730             u"min": -1.0,
731             u"avg": -1.0,
732             u"max": -1.0,
733             u"hdrh": u""
734         }
735         latency = {
736             u"NDR": {
737                 u"direction1": copy.copy(latency_default),
738                 u"direction2": copy.copy(latency_default)
739             },
740             u"PDR": {
741                 u"direction1": copy.copy(latency_default),
742                 u"direction2": copy.copy(latency_default)
743             },
744             u"LAT0": {
745                 u"direction1": copy.copy(latency_default),
746                 u"direction2": copy.copy(latency_default)
747             },
748             u"PDR10": {
749                 u"direction1": copy.copy(latency_default),
750                 u"direction2": copy.copy(latency_default)
751             },
752             u"PDR50": {
753                 u"direction1": copy.copy(latency_default),
754                 u"direction2": copy.copy(latency_default)
755             },
756             u"PDR90": {
757                 u"direction1": copy.copy(latency_default),
758                 u"direction2": copy.copy(latency_default)
759             },
760         }
761
762         # TODO: Rewrite when long and base are not needed
763         groups = re.search(self.REGEX_NDRPDR_LAT_LONG, msg)
764         if groups is None:
765             groups = re.search(self.REGEX_NDRPDR_LAT, msg)
766         if groups is None:
767             groups = re.search(self.REGEX_NDRPDR_LAT_BASE, msg)
768         if groups is None:
769             return latency, u"FAIL"
770
771         def process_latency(in_str):
772             """Return object with parsed latency values.
773
774             TODO: Define class for the return type.
775
776             :param in_str: Input string, min/avg/max/hdrh format.
777             :type in_str: str
778             :returns: Dict with corresponding keys, except hdrh float values.
779             :rtype dict:
780             :throws IndexError: If in_str does not have enough substrings.
781             :throws ValueError: If a substring does not convert to float.
782             """
783             in_list = in_str.split('/', 3)
784
785             rval = {
786                 u"min": float(in_list[0]),
787                 u"avg": float(in_list[1]),
788                 u"max": float(in_list[2]),
789                 u"hdrh": u""
790             }
791
792             if len(in_list) == 4:
793                 rval[u"hdrh"] = str(in_list[3])
794
795             return rval
796
797         try:
798             latency[u"NDR"][u"direction1"] = process_latency(groups.group(1))
799             latency[u"NDR"][u"direction2"] = process_latency(groups.group(2))
800             latency[u"PDR"][u"direction1"] = process_latency(groups.group(3))
801             latency[u"PDR"][u"direction2"] = process_latency(groups.group(4))
802             if groups.lastindex == 4:
803                 return latency, u"PASS"
804         except (IndexError, ValueError):
805             pass
806
807         try:
808             latency[u"PDR90"][u"direction1"] = process_latency(groups.group(5))
809             latency[u"PDR90"][u"direction2"] = process_latency(groups.group(6))
810             latency[u"PDR50"][u"direction1"] = process_latency(groups.group(7))
811             latency[u"PDR50"][u"direction2"] = process_latency(groups.group(8))
812             latency[u"PDR10"][u"direction1"] = process_latency(groups.group(9))
813             latency[u"PDR10"][u"direction2"] = process_latency(groups.group(10))
814             latency[u"LAT0"][u"direction1"] = process_latency(groups.group(11))
815             latency[u"LAT0"][u"direction2"] = process_latency(groups.group(12))
816             if groups.lastindex == 12:
817                 return latency, u"PASS"
818         except (IndexError, ValueError):
819             pass
820
821         # TODO: Remove when not needed
822         latency[u"NDR10"] = {
823             u"direction1": copy.copy(latency_default),
824             u"direction2": copy.copy(latency_default)
825         }
826         latency[u"NDR50"] = {
827             u"direction1": copy.copy(latency_default),
828             u"direction2": copy.copy(latency_default)
829         }
830         latency[u"NDR90"] = {
831             u"direction1": copy.copy(latency_default),
832             u"direction2": copy.copy(latency_default)
833         }
834         try:
835             latency[u"LAT0"][u"direction1"] = process_latency(groups.group(5))
836             latency[u"LAT0"][u"direction2"] = process_latency(groups.group(6))
837             latency[u"NDR10"][u"direction1"] = process_latency(groups.group(7))
838             latency[u"NDR10"][u"direction2"] = process_latency(groups.group(8))
839             latency[u"NDR50"][u"direction1"] = process_latency(groups.group(9))
840             latency[u"NDR50"][u"direction2"] = process_latency(groups.group(10))
841             latency[u"NDR90"][u"direction1"] = process_latency(groups.group(11))
842             latency[u"NDR90"][u"direction2"] = process_latency(groups.group(12))
843             latency[u"PDR10"][u"direction1"] = process_latency(groups.group(13))
844             latency[u"PDR10"][u"direction2"] = process_latency(groups.group(14))
845             latency[u"PDR50"][u"direction1"] = process_latency(groups.group(15))
846             latency[u"PDR50"][u"direction2"] = process_latency(groups.group(16))
847             latency[u"PDR90"][u"direction1"] = process_latency(groups.group(17))
848             latency[u"PDR90"][u"direction2"] = process_latency(groups.group(18))
849             return latency, u"PASS"
850         except (IndexError, ValueError):
851             pass
852
853         return latency, u"FAIL"
854
855     def visit_suite(self, suite):
856         """Implements traversing through the suite and its direct children.
857
858         :param suite: Suite to process.
859         :type suite: Suite
860         :returns: Nothing.
861         """
862         if self.start_suite(suite) is not False:
863             suite.suites.visit(self)
864             suite.tests.visit(self)
865             self.end_suite(suite)
866
867     def start_suite(self, suite):
868         """Called when suite starts.
869
870         :param suite: Suite to process.
871         :type suite: Suite
872         :returns: Nothing.
873         """
874
875         try:
876             parent_name = suite.parent.name
877         except AttributeError:
878             return
879
880         doc_str = suite.doc.\
881             replace(u'"', u"'").\
882             replace(u'\n', u' ').\
883             replace(u'\r', u'').\
884             replace(u'*[', u' |br| *[').\
885             replace(u"*", u"**").\
886             replace(u' |br| *[', u'*[', 1)
887
888         self._data[u"suites"][suite.longname.lower().
889                               replace(u'"', u"'").
890                               replace(u" ", u"_")] = {
891                                   u"name": suite.name.lower(),
892                                   u"doc": doc_str,
893                                   u"parent": parent_name,
894                                   u"level": len(suite.longname.split(u"."))
895                               }
896
897         suite.keywords.visit(self)
898
899     def end_suite(self, suite):
900         """Called when suite ends.
901
902         :param suite: Suite to process.
903         :type suite: Suite
904         :returns: Nothing.
905         """
906
907     def visit_test(self, test):
908         """Implements traversing through the test.
909
910         :param test: Test to process.
911         :type test: Test
912         :returns: Nothing.
913         """
914         if self.start_test(test) is not False:
915             test.keywords.visit(self)
916             self.end_test(test)
917
918     def start_test(self, test):
919         """Called when test starts.
920
921         :param test: Test to process.
922         :type test: Test
923         :returns: Nothing.
924         """
925
926         longname_orig = test.longname.lower()
927
928         # Check the ignore list
929         if longname_orig in self._ignore:
930             return
931
932         tags = [str(tag) for tag in test.tags]
933         test_result = dict()
934
935         # Change the TC long name and name if defined in the mapping table
936         longname = self._mapping.get(longname_orig, None)
937         if longname is not None:
938             name = longname.split(u'.')[-1]
939             logging.debug(
940                 f"{self._data[u'metadata']}\n{longname_orig}\n{longname}\n"
941                 f"{name}"
942             )
943         else:
944             longname = longname_orig
945             name = test.name.lower()
946
947         # Remove TC number from the TC long name (backward compatibility):
948         self._test_id = re.sub(self.REGEX_TC_NUMBER, u"", longname)
949         # Remove TC number from the TC name (not needed):
950         test_result[u"name"] = re.sub(self.REGEX_TC_NUMBER, "", name)
951
952         test_result[u"parent"] = test.parent.name.lower()
953         test_result[u"tags"] = tags
954         test_result["doc"] = test.doc.\
955             replace(u'"', u"'").\
956             replace(u'\n', u' ').\
957             replace(u'\r', u'').\
958             replace(u'[', u' |br| [').\
959             replace(u' |br| [', u'[', 1)
960         test_result[u"msg"] = self._get_data_from_perf_test_msg(test.message).\
961             replace(u'\n', u' |br| ').\
962             replace(u'\r', u'').\
963             replace(u'"', u"'")
964         test_result[u"type"] = u"FUNC"
965         test_result[u"status"] = test.status
966
967         if u"PERFTEST" in tags:
968             # Replace info about cores (e.g. -1c-) with the info about threads
969             # and cores (e.g. -1t1c-) in the long test case names and in the
970             # test case names if necessary.
971             groups = re.search(self.REGEX_TC_NAME_OLD, self._test_id)
972             if not groups:
973                 tag_count = 0
974                 tag_tc = str()
975                 for tag in test_result[u"tags"]:
976                     groups = re.search(self.REGEX_TC_TAG, tag)
977                     if groups:
978                         tag_count += 1
979                         tag_tc = tag
980
981                 if tag_count == 1:
982                     self._test_id = re.sub(self.REGEX_TC_NAME_NEW,
983                                            f"-{tag_tc.lower()}-",
984                                            self._test_id,
985                                            count=1)
986                     test_result[u"name"] = re.sub(self.REGEX_TC_NAME_NEW,
987                                                   f"-{tag_tc.lower()}-",
988                                                   test_result["name"],
989                                                   count=1)
990                 else:
991                     test_result[u"status"] = u"FAIL"
992                     self._data[u"tests"][self._test_id] = test_result
993                     logging.debug(
994                         f"The test {self._test_id} has no or more than one "
995                         f"multi-threading tags.\n"
996                         f"Tags: {test_result[u'tags']}"
997                     )
998                     return
999
1000         if test.status == u"PASS":
1001             if u"NDRPDR" in tags:
1002                 test_result[u"type"] = u"NDRPDR"
1003                 test_result[u"throughput"], test_result[u"status"] = \
1004                     self._get_ndrpdr_throughput(test.message)
1005                 test_result[u"latency"], test_result[u"status"] = \
1006                     self._get_ndrpdr_latency(test.message)
1007             elif u"SOAK" in tags:
1008                 test_result[u"type"] = u"SOAK"
1009                 test_result[u"throughput"], test_result[u"status"] = \
1010                     self._get_plr_throughput(test.message)
1011             elif u"TCP" in tags:
1012                 test_result[u"type"] = u"TCP"
1013                 groups = re.search(self.REGEX_TCP, test.message)
1014                 test_result[u"result"] = int(groups.group(2))
1015             elif u"MRR" in tags or u"FRMOBL" in tags or u"BMRR" in tags:
1016                 if u"MRR" in tags:
1017                     test_result[u"type"] = u"MRR"
1018                 else:
1019                     test_result[u"type"] = u"BMRR"
1020
1021                 test_result[u"result"] = dict()
1022                 groups = re.search(self.REGEX_BMRR, test.message)
1023                 if groups is not None:
1024                     items_str = groups.group(1)
1025                     items_float = [float(item.strip()) for item
1026                                    in items_str.split(",")]
1027                     # Use whole list in CSIT-1180.
1028                     stats = jumpavg.AvgStdevStats.for_runs(items_float)
1029                     test_result[u"result"][u"receive-rate"] = stats.avg
1030                 else:
1031                     groups = re.search(self.REGEX_MRR, test.message)
1032                     test_result[u"result"][u"receive-rate"] = \
1033                         float(groups.group(3)) / float(groups.group(1))
1034             elif u"RECONF" in tags:
1035                 test_result[u"type"] = u"RECONF"
1036                 test_result[u"result"] = None
1037                 try:
1038                     grps_loss = re.search(self.REGEX_RECONF_LOSS, test.message)
1039                     grps_time = re.search(self.REGEX_RECONF_TIME, test.message)
1040                     test_result[u"result"] = {
1041                         u"loss": int(grps_loss.group(1)),
1042                         u"time": float(grps_time.group(1))
1043                     }
1044                 except (AttributeError, IndexError, ValueError, TypeError):
1045                     test_result[u"status"] = u"FAIL"
1046             else:
1047                 test_result[u"status"] = u"FAIL"
1048                 self._data[u"tests"][self._test_id] = test_result
1049                 return
1050
1051         self._data[u"tests"][self._test_id] = test_result
1052
1053     def end_test(self, test):
1054         """Called when test ends.
1055
1056         :param test: Test to process.
1057         :type test: Test
1058         :returns: Nothing.
1059         """
1060
1061     def visit_keyword(self, keyword):
1062         """Implements traversing through the keyword and its child keywords.
1063
1064         :param keyword: Keyword to process.
1065         :type keyword: Keyword
1066         :returns: Nothing.
1067         """
1068         if self.start_keyword(keyword) is not False:
1069             self.end_keyword(keyword)
1070
1071     def start_keyword(self, keyword):
1072         """Called when keyword starts. Default implementation does nothing.
1073
1074         :param keyword: Keyword to process.
1075         :type keyword: Keyword
1076         :returns: Nothing.
1077         """
1078         try:
1079             if keyword.type == u"setup":
1080                 self.visit_setup_kw(keyword)
1081             elif keyword.type == u"teardown":
1082                 self.visit_teardown_kw(keyword)
1083             else:
1084                 self.visit_test_kw(keyword)
1085         except AttributeError:
1086             pass
1087
1088     def end_keyword(self, keyword):
1089         """Called when keyword ends. Default implementation does nothing.
1090
1091         :param keyword: Keyword to process.
1092         :type keyword: Keyword
1093         :returns: Nothing.
1094         """
1095
1096     def visit_test_kw(self, test_kw):
1097         """Implements traversing through the test keyword and its child
1098         keywords.
1099
1100         :param test_kw: Keyword to process.
1101         :type test_kw: Keyword
1102         :returns: Nothing.
1103         """
1104         for keyword in test_kw.keywords:
1105             if self.start_test_kw(keyword) is not False:
1106                 self.visit_test_kw(keyword)
1107                 self.end_test_kw(keyword)
1108
1109     def start_test_kw(self, test_kw):
1110         """Called when test keyword starts. Default implementation does
1111         nothing.
1112
1113         :param test_kw: Keyword to process.
1114         :type test_kw: Keyword
1115         :returns: Nothing.
1116         """
1117         if test_kw.name.count(u"Show Runtime On All Duts") or \
1118                 test_kw.name.count(u"Show Runtime Counters On All Duts"):
1119             self._msg_type = u"test-show-runtime"
1120         elif test_kw.name.count(u"Install Dpdk Test") and not self._version:
1121             self._msg_type = u"dpdk-version"
1122         else:
1123             return
1124         test_kw.messages.visit(self)
1125
1126     def end_test_kw(self, test_kw):
1127         """Called when keyword ends. Default implementation does nothing.
1128
1129         :param test_kw: Keyword to process.
1130         :type test_kw: Keyword
1131         :returns: Nothing.
1132         """
1133
1134     def visit_setup_kw(self, setup_kw):
1135         """Implements traversing through the teardown keyword and its child
1136         keywords.
1137
1138         :param setup_kw: Keyword to process.
1139         :type setup_kw: Keyword
1140         :returns: Nothing.
1141         """
1142         for keyword in setup_kw.keywords:
1143             if self.start_setup_kw(keyword) is not False:
1144                 self.visit_setup_kw(keyword)
1145                 self.end_setup_kw(keyword)
1146
1147     def start_setup_kw(self, setup_kw):
1148         """Called when teardown keyword starts. Default implementation does
1149         nothing.
1150
1151         :param setup_kw: Keyword to process.
1152         :type setup_kw: Keyword
1153         :returns: Nothing.
1154         """
1155         if setup_kw.name.count(u"Show Vpp Version On All Duts") \
1156                 and not self._version:
1157             self._msg_type = u"vpp-version"
1158         elif setup_kw.name.count(u"Set Global Variable") \
1159                 and not self._timestamp:
1160             self._msg_type = u"timestamp"
1161         elif setup_kw.name.count(u"Setup Framework") and not self._testbed:
1162             self._msg_type = u"testbed"
1163         else:
1164             return
1165         setup_kw.messages.visit(self)
1166
1167     def end_setup_kw(self, setup_kw):
1168         """Called when keyword ends. Default implementation does nothing.
1169
1170         :param setup_kw: Keyword to process.
1171         :type setup_kw: Keyword
1172         :returns: Nothing.
1173         """
1174
1175     def visit_teardown_kw(self, teardown_kw):
1176         """Implements traversing through the teardown keyword and its child
1177         keywords.
1178
1179         :param teardown_kw: Keyword to process.
1180         :type teardown_kw: Keyword
1181         :returns: Nothing.
1182         """
1183         for keyword in teardown_kw.keywords:
1184             if self.start_teardown_kw(keyword) is not False:
1185                 self.visit_teardown_kw(keyword)
1186                 self.end_teardown_kw(keyword)
1187
1188     def start_teardown_kw(self, teardown_kw):
1189         """Called when teardown keyword starts
1190
1191         :param teardown_kw: Keyword to process.
1192         :type teardown_kw: Keyword
1193         :returns: Nothing.
1194         """
1195
1196         if teardown_kw.name.count(u"Show Vat History On All Duts"):
1197             # TODO: Remove when not needed:
1198             self._conf_history_lookup_nr = 0
1199             self._msg_type = u"teardown-vat-history"
1200             teardown_kw.messages.visit(self)
1201         elif teardown_kw.name.count(u"Show Papi History On All Duts"):
1202             self._conf_history_lookup_nr = 0
1203             self._msg_type = u"teardown-papi-history"
1204             teardown_kw.messages.visit(self)
1205
1206     def end_teardown_kw(self, teardown_kw):
1207         """Called when keyword ends. Default implementation does nothing.
1208
1209         :param teardown_kw: Keyword to process.
1210         :type teardown_kw: Keyword
1211         :returns: Nothing.
1212         """
1213
1214     def visit_message(self, msg):
1215         """Implements visiting the message.
1216
1217         :param msg: Message to process.
1218         :type msg: Message
1219         :returns: Nothing.
1220         """
1221         if self.start_message(msg) is not False:
1222             self.end_message(msg)
1223
1224     def start_message(self, msg):
1225         """Called when message starts. Get required information from messages:
1226         - VPP version.
1227
1228         :param msg: Message to process.
1229         :type msg: Message
1230         :returns: Nothing.
1231         """
1232
1233         if self._msg_type:
1234             self.parse_msg[self._msg_type](msg)
1235
1236     def end_message(self, msg):
1237         """Called when message ends. Default implementation does nothing.
1238
1239         :param msg: Message to process.
1240         :type msg: Message
1241         :returns: Nothing.
1242         """
1243
1244
1245 class InputData:
1246     """Input data
1247
1248     The data is extracted from output.xml files generated by Jenkins jobs and
1249     stored in pandas' DataFrames.
1250
1251     The data structure:
1252     - job name
1253       - build number
1254         - metadata
1255           (as described in ExecutionChecker documentation)
1256         - suites
1257           (as described in ExecutionChecker documentation)
1258         - tests
1259           (as described in ExecutionChecker documentation)
1260     """
1261
1262     def __init__(self, spec):
1263         """Initialization.
1264
1265         :param spec: Specification.
1266         :type spec: Specification
1267         """
1268
1269         # Specification:
1270         self._cfg = spec
1271
1272         # Data store:
1273         self._input_data = pd.Series()
1274
1275     @property
1276     def data(self):
1277         """Getter - Input data.
1278
1279         :returns: Input data
1280         :rtype: pandas.Series
1281         """
1282         return self._input_data
1283
1284     def metadata(self, job, build):
1285         """Getter - metadata
1286
1287         :param job: Job which metadata we want.
1288         :param build: Build which metadata we want.
1289         :type job: str
1290         :type build: str
1291         :returns: Metadata
1292         :rtype: pandas.Series
1293         """
1294
1295         return self.data[job][build][u"metadata"]
1296
1297     def suites(self, job, build):
1298         """Getter - suites
1299
1300         :param job: Job which suites we want.
1301         :param build: Build which suites we want.
1302         :type job: str
1303         :type build: str
1304         :returns: Suites.
1305         :rtype: pandas.Series
1306         """
1307
1308         return self.data[job][str(build)][u"suites"]
1309
1310     def tests(self, job, build):
1311         """Getter - tests
1312
1313         :param job: Job which tests we want.
1314         :param build: Build which tests we want.
1315         :type job: str
1316         :type build: str
1317         :returns: Tests.
1318         :rtype: pandas.Series
1319         """
1320
1321         return self.data[job][build][u"tests"]
1322
1323     def _parse_tests(self, job, build, log):
1324         """Process data from robot output.xml file and return JSON structured
1325         data.
1326
1327         :param job: The name of job which build output data will be processed.
1328         :param build: The build which output data will be processed.
1329         :param log: List of log messages.
1330         :type job: str
1331         :type build: dict
1332         :type log: list of tuples (severity, msg)
1333         :returns: JSON data structure.
1334         :rtype: dict
1335         """
1336
1337         metadata = {
1338             u"job": job,
1339             u"build": build
1340         }
1341
1342         with open(build[u"file-name"], u'r') as data_file:
1343             try:
1344                 result = ExecutionResult(data_file)
1345             except errors.DataError as err:
1346                 log.append(
1347                     (u"ERROR", f"Error occurred while parsing output.xml: "
1348                                f"{repr(err)}")
1349                 )
1350                 return None
1351         checker = ExecutionChecker(metadata, self._cfg.mapping,
1352                                    self._cfg.ignore)
1353         result.visit(checker)
1354
1355         return checker.data
1356
1357     def _download_and_parse_build(self, job, build, repeat, pid=10000):
1358         """Download and parse the input data file.
1359
1360         :param pid: PID of the process executing this method.
1361         :param job: Name of the Jenkins job which generated the processed input
1362             file.
1363         :param build: Information about the Jenkins build which generated the
1364             processed input file.
1365         :param repeat: Repeat the download specified number of times if not
1366             successful.
1367         :type pid: int
1368         :type job: str
1369         :type build: dict
1370         :type repeat: int
1371         """
1372
1373         logs = list()
1374
1375         logs.append(
1376             (u"INFO", f"  Processing the job/build: {job}: {build[u'build']}")
1377         )
1378
1379         state = u"failed"
1380         success = False
1381         data = None
1382         do_repeat = repeat
1383         while do_repeat:
1384             success = download_and_unzip_data_file(self._cfg, job, build, pid,
1385                                                    logs)
1386             if success:
1387                 break
1388             do_repeat -= 1
1389         if not success:
1390             logs.append(
1391                 (u"ERROR",
1392                  f"It is not possible to download the input data file from the "
1393                  f"job {job}, build {build[u'build']}, or it is damaged. "
1394                  f"Skipped.")
1395             )
1396         if success:
1397             logs.append(
1398                 (u"INFO",
1399                  f"    Processing data from the build {build[u'build']} ...")
1400             )
1401             data = self._parse_tests(job, build, logs)
1402             if data is None:
1403                 logs.append(
1404                     (u"ERROR",
1405                      f"Input data file from the job {job}, build "
1406                      f"{build[u'build']} is damaged. Skipped.")
1407                 )
1408             else:
1409                 state = u"processed"
1410
1411             try:
1412                 remove(build[u"file-name"])
1413             except OSError as err:
1414                 logs.append(
1415                     ("ERROR", f"Cannot remove the file {build[u'file-name']}: "
1416                               f"{repr(err)}")
1417                 )
1418
1419         # If the time-period is defined in the specification file, remove all
1420         # files which are outside the time period.
1421         timeperiod = self._cfg.input.get(u"time-period", None)
1422         if timeperiod and data:
1423             now = dt.utcnow()
1424             timeperiod = timedelta(int(timeperiod))
1425             metadata = data.get(u"metadata", None)
1426             if metadata:
1427                 generated = metadata.get(u"generated", None)
1428                 if generated:
1429                     generated = dt.strptime(generated, u"%Y%m%d %H:%M")
1430                     if (now - generated) > timeperiod:
1431                         # Remove the data and the file:
1432                         state = u"removed"
1433                         data = None
1434                         logs.append(
1435                             (u"INFO",
1436                              f"    The build {job}/{build[u'build']} is "
1437                              f"outdated, will be removed.")
1438                         )
1439         logs.append((u"INFO", u"  Done."))
1440
1441         for level, line in logs:
1442             if level == u"INFO":
1443                 logging.info(line)
1444             elif level == u"ERROR":
1445                 logging.error(line)
1446             elif level == u"DEBUG":
1447                 logging.debug(line)
1448             elif level == u"CRITICAL":
1449                 logging.critical(line)
1450             elif level == u"WARNING":
1451                 logging.warning(line)
1452
1453         return {u"data": data, u"state": state, u"job": job, u"build": build}
1454
1455     def download_and_parse_data(self, repeat=1):
1456         """Download the input data files, parse input data from input files and
1457         store in pandas' Series.
1458
1459         :param repeat: Repeat the download specified number of times if not
1460             successful.
1461         :type repeat: int
1462         """
1463
1464         logging.info(u"Downloading and parsing input files ...")
1465
1466         for job, builds in self._cfg.builds.items():
1467             for build in builds:
1468
1469                 result = self._download_and_parse_build(job, build, repeat)
1470                 build_nr = result[u"build"][u"build"]
1471
1472                 if result[u"data"]:
1473                     data = result[u"data"]
1474                     build_data = pd.Series({
1475                         u"metadata": pd.Series(
1476                             list(data[u"metadata"].values()),
1477                             index=list(data[u"metadata"].keys())
1478                         ),
1479                         u"suites": pd.Series(
1480                             list(data[u"suites"].values()),
1481                             index=list(data[u"suites"].keys())
1482                         ),
1483                         u"tests": pd.Series(
1484                             list(data[u"tests"].values()),
1485                             index=list(data[u"tests"].keys())
1486                         )
1487                     })
1488
1489                     if self._input_data.get(job, None) is None:
1490                         self._input_data[job] = pd.Series()
1491                     self._input_data[job][str(build_nr)] = build_data
1492
1493                     self._cfg.set_input_file_name(
1494                         job, build_nr, result[u"build"][u"file-name"])
1495
1496                 self._cfg.set_input_state(job, build_nr, result[u"state"])
1497
1498                 mem_alloc = \
1499                     resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1000
1500                 logging.info(f"Memory allocation: {mem_alloc:.0f}MB")
1501
1502         logging.info(u"Done.")
1503
1504     @staticmethod
1505     def _end_of_tag(tag_filter, start=0, closer=u"'"):
1506         """Return the index of character in the string which is the end of tag.
1507
1508         :param tag_filter: The string where the end of tag is being searched.
1509         :param start: The index where the searching is stated.
1510         :param closer: The character which is the tag closer.
1511         :type tag_filter: str
1512         :type start: int
1513         :type closer: str
1514         :returns: The index of the tag closer.
1515         :rtype: int
1516         """
1517
1518         try:
1519             idx_opener = tag_filter.index(closer, start)
1520             return tag_filter.index(closer, idx_opener + 1)
1521         except ValueError:
1522             return None
1523
1524     @staticmethod
1525     def _condition(tag_filter):
1526         """Create a conditional statement from the given tag filter.
1527
1528         :param tag_filter: Filter based on tags from the element specification.
1529         :type tag_filter: str
1530         :returns: Conditional statement which can be evaluated.
1531         :rtype: str
1532         """
1533
1534         index = 0
1535         while True:
1536             index = InputData._end_of_tag(tag_filter, index)
1537             if index is None:
1538                 return tag_filter
1539             index += 1
1540             tag_filter = tag_filter[:index] + u" in tags" + tag_filter[index:]
1541
1542     def filter_data(self, element, params=None, data=None, data_set=u"tests",
1543                     continue_on_error=False):
1544         """Filter required data from the given jobs and builds.
1545
1546         The output data structure is:
1547
1548         - job 1
1549           - build 1
1550             - test (or suite) 1 ID:
1551               - param 1
1552               - param 2
1553               ...
1554               - param n
1555             ...
1556             - test (or suite) n ID:
1557             ...
1558           ...
1559           - build n
1560         ...
1561         - job n
1562
1563         :param element: Element which will use the filtered data.
1564         :param params: Parameters which will be included in the output. If None,
1565             all parameters are included.
1566         :param data: If not None, this data is used instead of data specified
1567             in the element.
1568         :param data_set: The set of data to be filtered: tests, suites,
1569             metadata.
1570         :param continue_on_error: Continue if there is error while reading the
1571             data. The Item will be empty then
1572         :type element: pandas.Series
1573         :type params: list
1574         :type data: dict
1575         :type data_set: str
1576         :type continue_on_error: bool
1577         :returns: Filtered data.
1578         :rtype pandas.Series
1579         """
1580
1581         try:
1582             if data_set == "suites":
1583                 cond = u"True"
1584             elif element[u"filter"] in (u"all", u"template"):
1585                 cond = u"True"
1586             else:
1587                 cond = InputData._condition(element[u"filter"])
1588             logging.debug(f"   Filter: {cond}")
1589         except KeyError:
1590             logging.error(u"  No filter defined.")
1591             return None
1592
1593         if params is None:
1594             params = element.get(u"parameters", None)
1595             if params:
1596                 params.append(u"type")
1597
1598         data_to_filter = data if data else element[u"data"]
1599         data = pd.Series()
1600         try:
1601             for job, builds in data_to_filter.items():
1602                 data[job] = pd.Series()
1603                 for build in builds:
1604                     data[job][str(build)] = pd.Series()
1605                     try:
1606                         data_dict = dict(
1607                             self.data[job][str(build)][data_set].items())
1608                     except KeyError:
1609                         if continue_on_error:
1610                             continue
1611                         return None
1612
1613                     for test_id, test_data in data_dict.items():
1614                         if eval(cond, {u"tags": test_data.get(u"tags", u"")}):
1615                             data[job][str(build)][test_id] = pd.Series()
1616                             if params is None:
1617                                 for param, val in test_data.items():
1618                                     data[job][str(build)][test_id][param] = val
1619                             else:
1620                                 for param in params:
1621                                     try:
1622                                         data[job][str(build)][test_id][param] =\
1623                                             test_data[param]
1624                                     except KeyError:
1625                                         data[job][str(build)][test_id][param] =\
1626                                             u"No Data"
1627             return data
1628
1629         except (KeyError, IndexError, ValueError) as err:
1630             logging.error(
1631                 f"Missing mandatory parameter in the element specification: "
1632                 f"{repr(err)}"
1633             )
1634             return None
1635         except AttributeError as err:
1636             logging.error(repr(err))
1637             return None
1638         except SyntaxError as err:
1639             logging.error(
1640                 f"The filter {cond} is not correct. Check if all tags are "
1641                 f"enclosed by apostrophes.\n{repr(err)}"
1642             )
1643             return None
1644
1645     def filter_tests_by_name(self, element, params=None, data_set=u"tests",
1646                              continue_on_error=False):
1647         """Filter required data from the given jobs and builds.
1648
1649         The output data structure is:
1650
1651         - job 1
1652           - build 1
1653             - test (or suite) 1 ID:
1654               - param 1
1655               - param 2
1656               ...
1657               - param n
1658             ...
1659             - test (or suite) n ID:
1660             ...
1661           ...
1662           - build n
1663         ...
1664         - job n
1665
1666         :param element: Element which will use the filtered data.
1667         :param params: Parameters which will be included in the output. If None,
1668         all parameters are included.
1669         :param data_set: The set of data to be filtered: tests, suites,
1670         metadata.
1671         :param continue_on_error: Continue if there is error while reading the
1672         data. The Item will be empty then
1673         :type element: pandas.Series
1674         :type params: list
1675         :type data_set: str
1676         :type continue_on_error: bool
1677         :returns: Filtered data.
1678         :rtype pandas.Series
1679         """
1680
1681         include = element.get(u"include", None)
1682         if not include:
1683             logging.warning(u"No tests to include, skipping the element.")
1684             return None
1685
1686         if params is None:
1687             params = element.get(u"parameters", None)
1688             if params:
1689                 params.append(u"type")
1690
1691         data = pd.Series()
1692         try:
1693             for job, builds in element[u"data"].items():
1694                 data[job] = pd.Series()
1695                 for build in builds:
1696                     data[job][str(build)] = pd.Series()
1697                     for test in include:
1698                         try:
1699                             reg_ex = re.compile(str(test).lower())
1700                             for test_id in self.data[job][
1701                                     str(build)][data_set].keys():
1702                                 if re.match(reg_ex, str(test_id).lower()):
1703                                     test_data = self.data[job][
1704                                         str(build)][data_set][test_id]
1705                                     data[job][str(build)][test_id] = pd.Series()
1706                                     if params is None:
1707                                         for param, val in test_data.items():
1708                                             data[job][str(build)][test_id]\
1709                                                 [param] = val
1710                                     else:
1711                                         for param in params:
1712                                             try:
1713                                                 data[job][str(build)][
1714                                                     test_id][param] = \
1715                                                     test_data[param]
1716                                             except KeyError:
1717                                                 data[job][str(build)][
1718                                                     test_id][param] = u"No Data"
1719                         except KeyError as err:
1720                             logging.error(repr(err))
1721                             if continue_on_error:
1722                                 continue
1723                             return None
1724             return data
1725
1726         except (KeyError, IndexError, ValueError) as err:
1727             logging.error(
1728                 f"Missing mandatory parameter in the element "
1729                 f"specification: {repr(err)}"
1730             )
1731             return None
1732         except AttributeError as err:
1733             logging.error(repr(err))
1734             return None
1735
1736     @staticmethod
1737     def merge_data(data):
1738         """Merge data from more jobs and builds to a simple data structure.
1739
1740         The output data structure is:
1741
1742         - test (suite) 1 ID:
1743           - param 1
1744           - param 2
1745           ...
1746           - param n
1747         ...
1748         - test (suite) n ID:
1749         ...
1750
1751         :param data: Data to merge.
1752         :type data: pandas.Series
1753         :returns: Merged data.
1754         :rtype: pandas.Series
1755         """
1756
1757         logging.info(u"    Merging data ...")
1758
1759         merged_data = pd.Series()
1760         for builds in data.values:
1761             for item in builds.values:
1762                 for item_id, item_data in item.items():
1763                     merged_data[item_id] = item_data
1764
1765         return merged_data
1766
1767     def print_all_oper_data(self):
1768         """Print all operational data to console.
1769         """
1770
1771         tbl_hdr = (
1772             u"Name",
1773             u"Nr of Vectors",
1774             u"Nr of Packets",
1775             u"Suspends",
1776             u"Cycles per Packet",
1777             u"Average Vector Size"
1778         )
1779
1780         for job in self._input_data.values:
1781             for build in job.values:
1782                 for test_id, test_data in build[u"tests"].items():
1783                     print(f"{test_id}")
1784                     if test_data.get(u"show-run", None) is None:
1785                         continue
1786                     for dut_name, data in test_data[u"show-run"].items():
1787                         if data.get(u"threads", None) is None:
1788                             continue
1789                         print(f"Host IP: {data.get(u'host', '')}, "
1790                               f"Socket: {data.get(u'socket', '')}")
1791                         for thread_nr, thread in data[u"threads"].items():
1792                             txt_table = prettytable.PrettyTable(tbl_hdr)
1793                             avg = 0.0
1794                             for row in thread:
1795                                 txt_table.add_row(row)
1796                                 avg += row[-1]
1797                             if len(thread) == 0:
1798                                 avg = u""
1799                             else:
1800                                 avg = f", Average Vector Size per Node: " \
1801                                       f"{(avg / len(thread)):.2f}"
1802                             th_name = u"main" if thread_nr == 0 \
1803                                 else f"worker_{thread_nr}"
1804                             print(f"{dut_name}, {th_name}{avg}")
1805                             txt_table.float_format = u".2"
1806                             txt_table.align = u"r"
1807                             txt_table.align[u"Name"] = u"l"
1808                             print(f"{txt_table.get_string()}\n")