6ce71ae0f6693c1635576bc68f1203949f17ee6b
[csit.git] / resources / tools / presentation / input_data_parser.py
1 # Copyright (c) 2019 Cisco and/or its affiliates.
2 # Licensed under the Apache License, Version 2.0 (the "License");
3 # you may not use this file except in compliance with the License.
4 # You may obtain a copy of the License at:
5 #
6 #     http://www.apache.org/licenses/LICENSE-2.0
7 #
8 # Unless required by applicable law or agreed to in writing, software
9 # distributed under the License is distributed on an "AS IS" BASIS,
10 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 # See the License for the specific language governing permissions and
12 # limitations under the License.
13
14 """Data pre-processing
15
16 - extract data from output.xml files generated by Jenkins jobs and store in
17   pandas' Series,
18 - provide access to the data.
19 - filter the data using tags,
20 """
21
22 import re
23 import copy
24 import resource
25 import logging
26
27 from collections import OrderedDict
28 from os import remove
29 from datetime import datetime as dt
30 from datetime import timedelta
31 from json import loads
32
33 import hdrh.histogram
34 import hdrh.codec
35 import prettytable
36 import pandas as pd
37
38 from robot.api import ExecutionResult, ResultVisitor
39 from robot import errors
40
41 from resources.libraries.python import jumpavg
42 from input_data_files import download_and_unzip_data_file
43
44
45 # Separator used in file names
46 SEPARATOR = u"__"
47
48
49 class ExecutionChecker(ResultVisitor):
50     """Class to traverse through the test suite structure.
51
52     The functionality implemented in this class generates a json structure:
53
54     Performance tests:
55
56     {
57         "metadata": {
58             "generated": "Timestamp",
59             "version": "SUT version",
60             "job": "Jenkins job name",
61             "build": "Information about the build"
62         },
63         "suites": {
64             "Suite long name 1": {
65                 "name": Suite name,
66                 "doc": "Suite 1 documentation",
67                 "parent": "Suite 1 parent",
68                 "level": "Level of the suite in the suite hierarchy"
69             }
70             "Suite long name N": {
71                 "name": Suite name,
72                 "doc": "Suite N documentation",
73                 "parent": "Suite 2 parent",
74                 "level": "Level of the suite in the suite hierarchy"
75             }
76         }
77         "tests": {
78             # NDRPDR tests:
79             "ID": {
80                 "name": "Test name",
81                 "parent": "Name of the parent of the test",
82                 "doc": "Test documentation",
83                 "msg": "Test message",
84                 "conf-history": "DUT1 and DUT2 VAT History",
85                 "show-run": "Show Run",
86                 "tags": ["tag 1", "tag 2", "tag n"],
87                 "type": "NDRPDR",
88                 "status": "PASS" | "FAIL",
89                 "throughput": {
90                     "NDR": {
91                         "LOWER": float,
92                         "UPPER": float
93                     },
94                     "PDR": {
95                         "LOWER": float,
96                         "UPPER": float
97                     }
98                 },
99                 "latency": {
100                     "NDR": {
101                         "direction1": {
102                             "min": float,
103                             "avg": float,
104                             "max": float,
105                             "hdrh": str
106                         },
107                         "direction2": {
108                             "min": float,
109                             "avg": float,
110                             "max": float,
111                             "hdrh": str
112                         }
113                     },
114                     "PDR": {
115                         "direction1": {
116                             "min": float,
117                             "avg": float,
118                             "max": float,
119                             "hdrh": str
120                         },
121                         "direction2": {
122                             "min": float,
123                             "avg": float,
124                             "max": float,
125                             "hdrh": str
126                         }
127                     }
128                 }
129             }
130
131             # TCP tests:
132             "ID": {
133                 "name": "Test name",
134                 "parent": "Name of the parent of the test",
135                 "doc": "Test documentation",
136                 "msg": "Test message",
137                 "tags": ["tag 1", "tag 2", "tag n"],
138                 "type": "TCP",
139                 "status": "PASS" | "FAIL",
140                 "result": int
141             }
142
143             # MRR, BMRR tests:
144             "ID": {
145                 "name": "Test name",
146                 "parent": "Name of the parent of the test",
147                 "doc": "Test documentation",
148                 "msg": "Test message",
149                 "tags": ["tag 1", "tag 2", "tag n"],
150                 "type": "MRR" | "BMRR",
151                 "status": "PASS" | "FAIL",
152                 "result": {
153                     "receive-rate": float,
154                     # Average of a list, computed using AvgStdevStats.
155                     # In CSIT-1180, replace with List[float].
156                 }
157             }
158
159             "ID" {
160                 # next test
161             }
162         }
163     }
164
165
166     Functional tests:
167
168     {
169         "metadata": {  # Optional
170             "version": "VPP version",
171             "job": "Jenkins job name",
172             "build": "Information about the build"
173         },
174         "suites": {
175             "Suite name 1": {
176                 "doc": "Suite 1 documentation",
177                 "parent": "Suite 1 parent",
178                 "level": "Level of the suite in the suite hierarchy"
179             }
180             "Suite name N": {
181                 "doc": "Suite N documentation",
182                 "parent": "Suite 2 parent",
183                 "level": "Level of the suite in the suite hierarchy"
184             }
185         }
186         "tests": {
187             "ID": {
188                 "name": "Test name",
189                 "parent": "Name of the parent of the test",
190                 "doc": "Test documentation"
191                 "msg": "Test message"
192                 "tags": ["tag 1", "tag 2", "tag n"],
193                 "conf-history": "DUT1 and DUT2 VAT History"
194                 "show-run": "Show Run"
195                 "status": "PASS" | "FAIL"
196             },
197             "ID" {
198                 # next test
199             }
200         }
201     }
202
203     .. note:: ID is the lowercase full path to the test.
204     """
205
206     REGEX_PLR_RATE = re.compile(
207         r'PLRsearch lower bound::?\s(\d+.\d+).*\n'
208         r'PLRsearch upper bound::?\s(\d+.\d+)'
209     )
210     REGEX_NDRPDR_RATE = re.compile(
211         r'NDR_LOWER:\s(\d+.\d+).*\n.*\n'
212         r'NDR_UPPER:\s(\d+.\d+).*\n'
213         r'PDR_LOWER:\s(\d+.\d+).*\n.*\n'
214         r'PDR_UPPER:\s(\d+.\d+)'
215     )
216     REGEX_PERF_MSG_INFO = re.compile(
217         r'NDR_LOWER:\s(\d+.\d+)\s.*\s(\d+.\d+)\s.*\n.*\n.*\n'
218         r'PDR_LOWER:\s(\d+.\d+)\s.*\s(\d+.\d+)\s.*\n.*\n.*\n'
219         r'Latency at 90% PDR:.*\[\'(.*)\', \'(.*)\'\].*\n'
220         r'Latency at 50% PDR:.*\[\'(.*)\', \'(.*)\'\].*\n'
221         r'Latency at 10% PDR:.*\[\'(.*)\', \'(.*)\'\].*\n'
222     )
223     REGEX_MRR_MSG_INFO = re.compile(r'.*\[(.*)\]')
224
225     # TODO: Remove when not needed
226     REGEX_NDRPDR_LAT_BASE = re.compile(
227         r'LATENCY.*\[\'(.*)\', \'(.*)\'\]\s\n.*\n.*\n'
228         r'LATENCY.*\[\'(.*)\', \'(.*)\'\]'
229     )
230     REGEX_NDRPDR_LAT = re.compile(
231         r'LATENCY.*\[\'(.*)\', \'(.*)\'\]\s\n.*\n.*\n'
232         r'LATENCY.*\[\'(.*)\', \'(.*)\'\]\s\n.*\n'
233         r'Latency.*\[\'(.*)\', \'(.*)\'\]\s\n'
234         r'Latency.*\[\'(.*)\', \'(.*)\'\]\s\n'
235         r'Latency.*\[\'(.*)\', \'(.*)\'\]\s\n'
236         r'Latency.*\[\'(.*)\', \'(.*)\'\]'
237     )
238     # TODO: Remove when not needed
239     REGEX_NDRPDR_LAT_LONG = re.compile(
240         r'LATENCY.*\[\'(.*)\', \'(.*)\'\]\s\n.*\n.*\n'
241         r'LATENCY.*\[\'(.*)\', \'(.*)\'\]\s\n.*\n'
242         r'Latency.*\[\'(.*)\', \'(.*)\'\]\s\n'
243         r'Latency.*\[\'(.*)\', \'(.*)\'\]\s\n'
244         r'Latency.*\[\'(.*)\', \'(.*)\'\]\s\n'
245         r'Latency.*\[\'(.*)\', \'(.*)\'\]\s\n'
246         r'Latency.*\[\'(.*)\', \'(.*)\'\]\s\n'
247         r'Latency.*\[\'(.*)\', \'(.*)\'\]\s\n'
248         r'Latency.*\[\'(.*)\', \'(.*)\'\]'
249     )
250     REGEX_VERSION_VPP = re.compile(
251         r"(return STDOUT Version:\s*|"
252         r"VPP Version:\s*|VPP version:\s*)(.*)"
253     )
254     REGEX_VERSION_DPDK = re.compile(
255         r"(DPDK version:\s*|DPDK Version:\s*)(.*)"
256     )
257     REGEX_TCP = re.compile(
258         r'Total\s(rps|cps|throughput):\s(\d*).*$'
259     )
260     REGEX_MRR = re.compile(
261         r'MaxReceivedRate_Results\s\[pkts/(\d*)sec\]:\s'
262         r'tx\s(\d*),\srx\s(\d*)'
263     )
264     REGEX_BMRR = re.compile(
265         r'Maximum Receive Rate trial results'
266         r' in packets per second: \[(.*)\]'
267     )
268     REGEX_RECONF_LOSS = re.compile(
269         r'Packets lost due to reconfig: (\d*)'
270     )
271     REGEX_RECONF_TIME = re.compile(
272         r'Implied time lost: (\d*.[\de-]*)'
273     )
274     REGEX_TC_TAG = re.compile(r'\d+[tT]\d+[cC]')
275
276     REGEX_TC_NAME_OLD = re.compile(r'-\d+[tT]\d+[cC]-')
277
278     REGEX_TC_NAME_NEW = re.compile(r'-\d+[cC]-')
279
280     REGEX_TC_NUMBER = re.compile(r'tc\d{2}-')
281
282     REGEX_TC_PAPI_CLI = re.compile(r'.*\((\d+.\d+.\d+.\d+.) - (.*)\)')
283
284     def __init__(self, metadata, mapping, ignore):
285         """Initialisation.
286
287         :param metadata: Key-value pairs to be included in "metadata" part of
288             JSON structure.
289         :param mapping: Mapping of the old names of test cases to the new
290             (actual) one.
291         :param ignore: List of TCs to be ignored.
292         :type metadata: dict
293         :type mapping: dict
294         :type ignore: list
295         """
296
297         # Type of message to parse out from the test messages
298         self._msg_type = None
299
300         # VPP version
301         self._version = None
302
303         # Timestamp
304         self._timestamp = None
305
306         # Testbed. The testbed is identified by TG node IP address.
307         self._testbed = None
308
309         # Mapping of TCs long names
310         self._mapping = mapping
311
312         # Ignore list
313         self._ignore = ignore
314
315         # Number of PAPI History messages found:
316         # 0 - no message
317         # 1 - PAPI History of DUT1
318         # 2 - PAPI History of DUT2
319         self._conf_history_lookup_nr = 0
320
321         self._sh_run_counter = 0
322
323         # Test ID of currently processed test- the lowercase full path to the
324         # test
325         self._test_id = None
326
327         # The main data structure
328         self._data = {
329             u"metadata": OrderedDict(),
330             u"suites": OrderedDict(),
331             u"tests": OrderedDict()
332         }
333
334         # Save the provided metadata
335         for key, val in metadata.items():
336             self._data[u"metadata"][key] = val
337
338         # Dictionary defining the methods used to parse different types of
339         # messages
340         self.parse_msg = {
341             u"timestamp": self._get_timestamp,
342             u"vpp-version": self._get_vpp_version,
343             u"dpdk-version": self._get_dpdk_version,
344             # TODO: Remove when not needed:
345             u"teardown-vat-history": self._get_vat_history,
346             u"teardown-papi-history": self._get_papi_history,
347             u"test-show-runtime": self._get_show_run,
348             u"testbed": self._get_testbed
349         }
350
351     @property
352     def data(self):
353         """Getter - Data parsed from the XML file.
354
355         :returns: Data parsed from the XML file.
356         :rtype: dict
357         """
358         return self._data
359
360     def _get_data_from_mrr_test_msg(self, msg):
361         """Get info from message of MRR performance tests.
362
363         :param msg: Message to be processed.
364         :type msg: str
365         :returns: Processed message or original message if a problem occurs.
366         :rtype: str
367         """
368
369         groups = re.search(self.REGEX_MRR_MSG_INFO, msg)
370         if not groups or groups.lastindex != 1:
371             return msg
372
373         try:
374             data = groups.group(1).split(u", ")
375         except (AttributeError, IndexError, ValueError, KeyError):
376             return msg
377
378         out_str = u"["
379         try:
380             for item in data:
381                 out_str += f"{(float(item) / 1e6):.2f}, "
382             return out_str[:-2] + u"]"
383         except (AttributeError, IndexError, ValueError, KeyError):
384             return msg
385
386     def _get_data_from_perf_test_msg(self, msg):
387         """Get info from message of NDRPDR performance tests.
388
389         :param msg: Message to be processed.
390         :type msg: str
391         :returns: Processed message or original message if a problem occurs.
392         :rtype: str
393         """
394
395         groups = re.search(self.REGEX_PERF_MSG_INFO, msg)
396         if not groups or groups.lastindex != 10:
397             return msg
398
399         try:
400             data = {
401                 u"ndr_low": float(groups.group(1)),
402                 u"ndr_low_b": float(groups.group(2)),
403                 u"pdr_low": float(groups.group(3)),
404                 u"pdr_low_b": float(groups.group(4)),
405                 u"pdr_lat_90_1": groups.group(5),
406                 u"pdr_lat_90_2": groups.group(6),
407                 u"pdr_lat_50_1": groups.group(7),
408                 u"pdr_lat_50_2": groups.group(8),
409                 u"pdr_lat_10_1": groups.group(9),
410                 u"pdr_lat_10_2": groups.group(10),
411             }
412         except (AttributeError, IndexError, ValueError, KeyError):
413             return msg
414
415         def _process_lat(in_str_1, in_str_2):
416             """Extract min, avg, max values from latency string.
417
418             :param in_str_1: Latency string for one direction produced by robot
419                 framework.
420             :param in_str_2: Latency string for second direction produced by
421                 robot framework.
422             :type in_str_1: str
423             :type in_str_2: str
424             :returns: Processed latency string or empty string if a problem
425                 occurs.
426             :rtype: tuple(str, str)
427             """
428             in_list_1 = in_str_1.split('/', 3)
429             in_list_2 = in_str_2.split('/', 3)
430
431             if len(in_list_1) != 4 and len(in_list_2) != 4:
432                 return u""
433
434             in_list_1[3] += u"=" * (len(in_list_1[3]) % 4)
435             try:
436                 hdr_lat_1 = hdrh.histogram.HdrHistogram.decode(in_list_1[3])
437             except hdrh.codec.HdrLengthException:
438                 return u""
439
440             in_list_2[3] += u"=" * (len(in_list_2[3]) % 4)
441             try:
442                 hdr_lat_2 = hdrh.histogram.HdrHistogram.decode(in_list_2[3])
443             except hdrh.codec.HdrLengthException:
444                 return u""
445
446             if hdr_lat_1 and hdr_lat_2:
447                 hdr_lat_1_50 = hdr_lat_1.get_value_at_percentile(50.0)
448                 hdr_lat_1_90 = hdr_lat_1.get_value_at_percentile(90.0)
449                 hdr_lat_1_99 = hdr_lat_1.get_value_at_percentile(99.0)
450                 hdr_lat_2_50 = hdr_lat_2.get_value_at_percentile(50.0)
451                 hdr_lat_2_90 = hdr_lat_2.get_value_at_percentile(90.0)
452                 hdr_lat_2_99 = hdr_lat_2.get_value_at_percentile(99.0)
453
454                 if (hdr_lat_1_50 + hdr_lat_1_90 + hdr_lat_1_99 +
455                         hdr_lat_2_50 + hdr_lat_2_90 + hdr_lat_2_99):
456                     return (
457                         f"{hdr_lat_1_50} {hdr_lat_1_90} {hdr_lat_1_99} , "
458                         f"{hdr_lat_2_50} {hdr_lat_2_90} {hdr_lat_2_99}"
459                     )
460
461             return u""
462
463         try:
464             pdr_lat_10 = _process_lat(data[u'pdr_lat_10_1'],
465                                       data[u'pdr_lat_10_2'])
466             pdr_lat_50 = _process_lat(data[u'pdr_lat_50_1'],
467                                       data[u'pdr_lat_50_2'])
468             pdr_lat_90 = _process_lat(data[u'pdr_lat_90_1'],
469                                       data[u'pdr_lat_90_2'])
470             pdr_lat_10 = f"\n3. {pdr_lat_10}" if pdr_lat_10 else u""
471             pdr_lat_50 = f"\n4. {pdr_lat_50}" if pdr_lat_50 else u""
472             pdr_lat_90 = f"\n5. {pdr_lat_90}" if pdr_lat_90 else u""
473
474             return (
475                 f"1. {(data[u'ndr_low'] / 1e6):.2f} {data[u'ndr_low_b']:.2f}"
476                 f"\n2. {(data[u'pdr_low'] / 1e6):.2f} {data[u'pdr_low_b']:.2f}"
477                 f"{pdr_lat_10}"
478                 f"{pdr_lat_50}"
479                 f"{pdr_lat_90}"
480             )
481         except (AttributeError, IndexError, ValueError, KeyError):
482             return msg
483
484     def _get_testbed(self, msg):
485         """Called when extraction of testbed IP is required.
486         The testbed is identified by TG node IP address.
487
488         :param msg: Message to process.
489         :type msg: Message
490         :returns: Nothing.
491         """
492
493         if msg.message.count(u"Setup of TG node") or \
494                 msg.message.count(u"Setup of node TG host"):
495             reg_tg_ip = re.compile(
496                 r'.*TG .* (\d{1,3}.\d{1,3}.\d{1,3}.\d{1,3}).*')
497             try:
498                 self._testbed = str(re.search(reg_tg_ip, msg.message).group(1))
499             except (KeyError, ValueError, IndexError, AttributeError):
500                 pass
501             finally:
502                 self._data[u"metadata"][u"testbed"] = self._testbed
503                 self._msg_type = None
504
505     def _get_vpp_version(self, msg):
506         """Called when extraction of VPP version is required.
507
508         :param msg: Message to process.
509         :type msg: Message
510         :returns: Nothing.
511         """
512
513         if msg.message.count(u"return STDOUT Version:") or \
514             msg.message.count(u"VPP Version:") or \
515             msg.message.count(u"VPP version:"):
516             self._version = str(re.search(self.REGEX_VERSION_VPP, msg.message).
517                                 group(2))
518             self._data[u"metadata"][u"version"] = self._version
519             self._msg_type = None
520
521     def _get_dpdk_version(self, msg):
522         """Called when extraction of DPDK version is required.
523
524         :param msg: Message to process.
525         :type msg: Message
526         :returns: Nothing.
527         """
528
529         if msg.message.count(u"DPDK Version:"):
530             try:
531                 self._version = str(re.search(
532                     self.REGEX_VERSION_DPDK, msg.message).group(2))
533                 self._data[u"metadata"][u"version"] = self._version
534             except IndexError:
535                 pass
536             finally:
537                 self._msg_type = None
538
539     def _get_timestamp(self, msg):
540         """Called when extraction of timestamp is required.
541
542         :param msg: Message to process.
543         :type msg: Message
544         :returns: Nothing.
545         """
546
547         self._timestamp = msg.timestamp[:14]
548         self._data[u"metadata"][u"generated"] = self._timestamp
549         self._msg_type = None
550
551     def _get_vat_history(self, msg):
552         """Called when extraction of VAT command history is required.
553
554         TODO: Remove when not needed.
555
556         :param msg: Message to process.
557         :type msg: Message
558         :returns: Nothing.
559         """
560         if msg.message.count(u"VAT command history:"):
561             self._conf_history_lookup_nr += 1
562             if self._conf_history_lookup_nr == 1:
563                 self._data[u"tests"][self._test_id][u"conf-history"] = str()
564             else:
565                 self._msg_type = None
566             text = re.sub(r"\d{1,3}.\d{1,3}.\d{1,3}.\d{1,3} "
567                           r"VAT command history:", u"",
568                           msg.message, count=1).replace(u'\n', u' |br| ').\
569                 replace(u'"', u"'")
570
571             self._data[u"tests"][self._test_id][u"conf-history"] += (
572                 f" |br| **DUT{str(self._conf_history_lookup_nr)}:** {text}"
573             )
574
575     def _get_papi_history(self, msg):
576         """Called when extraction of PAPI command history is required.
577
578         :param msg: Message to process.
579         :type msg: Message
580         :returns: Nothing.
581         """
582         if msg.message.count(u"PAPI command history:"):
583             self._conf_history_lookup_nr += 1
584             if self._conf_history_lookup_nr == 1:
585                 self._data[u"tests"][self._test_id][u"conf-history"] = str()
586             else:
587                 self._msg_type = None
588             text = re.sub(r"\d{1,3}.\d{1,3}.\d{1,3}.\d{1,3} "
589                           r"PAPI command history:", u"",
590                           msg.message, count=1).replace(u'\n', u' |br| ').\
591                 replace(u'"', u"'")
592             self._data[u"tests"][self._test_id][u"conf-history"] += (
593                 f" |br| **DUT{str(self._conf_history_lookup_nr)}:** {text}"
594             )
595
596     def _get_show_run(self, msg):
597         """Called when extraction of VPP operational data (output of CLI command
598         Show Runtime) is required.
599
600         :param msg: Message to process.
601         :type msg: Message
602         :returns: Nothing.
603         """
604
605         if not msg.message.count(u"stats runtime"):
606             return
607
608         # Temporary solution
609         if self._sh_run_counter > 1:
610             return
611
612         if u"show-run" not in self._data[u"tests"][self._test_id].keys():
613             self._data[u"tests"][self._test_id][u"show-run"] = dict()
614
615         groups = re.search(self.REGEX_TC_PAPI_CLI, msg.message)
616         if not groups:
617             return
618         try:
619             host = groups.group(1)
620         except (AttributeError, IndexError):
621             host = u""
622         try:
623             sock = groups.group(2)
624         except (AttributeError, IndexError):
625             sock = u""
626
627         runtime = loads(str(msg.message).replace(u' ', u'').replace(u'\n', u'').
628                         replace(u"'", u'"').replace(u'b"', u'"').
629                         replace(u'u"', u'"').split(u":", 1)[1])
630
631         try:
632             threads_nr = len(runtime[0][u"clocks"])
633         except (IndexError, KeyError):
634             return
635
636         dut = u"DUT{nr}".format(
637             nr=len(self._data[u'tests'][self._test_id][u'show-run'].keys()) + 1)
638
639         oper = {
640             u"host": host,
641             u"socket": sock,
642             u"threads": OrderedDict({idx: list() for idx in range(threads_nr)})
643         }
644
645         for item in runtime:
646             for idx in range(threads_nr):
647                 if item[u"vectors"][idx] > 0:
648                     clocks = item[u"clocks"][idx] / item[u"vectors"][idx]
649                 elif item[u"calls"][idx] > 0:
650                     clocks = item[u"clocks"][idx] / item[u"calls"][idx]
651                 elif item[u"suspends"][idx] > 0:
652                     clocks = item[u"clocks"][idx] / item[u"suspends"][idx]
653                 else:
654                     clocks = 0.0
655
656                 if item[u"calls"][idx] > 0:
657                     vectors_call = item[u"vectors"][idx] / item[u"calls"][idx]
658                 else:
659                     vectors_call = 0.0
660
661                 if int(item[u"calls"][idx]) + int(item[u"vectors"][idx]) + \
662                         int(item[u"suspends"][idx]):
663                     oper[u"threads"][idx].append([
664                         item[u"name"],
665                         item[u"calls"][idx],
666                         item[u"vectors"][idx],
667                         item[u"suspends"][idx],
668                         clocks,
669                         vectors_call
670                     ])
671
672         self._data[u'tests'][self._test_id][u'show-run'][dut] = copy.copy(oper)
673
674     def _get_ndrpdr_throughput(self, msg):
675         """Get NDR_LOWER, NDR_UPPER, PDR_LOWER and PDR_UPPER from the test
676         message.
677
678         :param msg: The test message to be parsed.
679         :type msg: str
680         :returns: Parsed data as a dict and the status (PASS/FAIL).
681         :rtype: tuple(dict, str)
682         """
683
684         throughput = {
685             u"NDR": {u"LOWER": -1.0, u"UPPER": -1.0},
686             u"PDR": {u"LOWER": -1.0, u"UPPER": -1.0}
687         }
688         status = u"FAIL"
689         groups = re.search(self.REGEX_NDRPDR_RATE, msg)
690
691         if groups is not None:
692             try:
693                 throughput[u"NDR"][u"LOWER"] = float(groups.group(1))
694                 throughput[u"NDR"][u"UPPER"] = float(groups.group(2))
695                 throughput[u"PDR"][u"LOWER"] = float(groups.group(3))
696                 throughput[u"PDR"][u"UPPER"] = float(groups.group(4))
697                 status = u"PASS"
698             except (IndexError, ValueError):
699                 pass
700
701         return throughput, status
702
703     def _get_plr_throughput(self, msg):
704         """Get PLRsearch lower bound and PLRsearch upper bound from the test
705         message.
706
707         :param msg: The test message to be parsed.
708         :type msg: str
709         :returns: Parsed data as a dict and the status (PASS/FAIL).
710         :rtype: tuple(dict, str)
711         """
712
713         throughput = {
714             u"LOWER": -1.0,
715             u"UPPER": -1.0
716         }
717         status = u"FAIL"
718         groups = re.search(self.REGEX_PLR_RATE, msg)
719
720         if groups is not None:
721             try:
722                 throughput[u"LOWER"] = float(groups.group(1))
723                 throughput[u"UPPER"] = float(groups.group(2))
724                 status = u"PASS"
725             except (IndexError, ValueError):
726                 pass
727
728         return throughput, status
729
730     def _get_ndrpdr_latency(self, msg):
731         """Get LATENCY from the test message.
732
733         :param msg: The test message to be parsed.
734         :type msg: str
735         :returns: Parsed data as a dict and the status (PASS/FAIL).
736         :rtype: tuple(dict, str)
737         """
738         latency_default = {
739             u"min": -1.0,
740             u"avg": -1.0,
741             u"max": -1.0,
742             u"hdrh": u""
743         }
744         latency = {
745             u"NDR": {
746                 u"direction1": copy.copy(latency_default),
747                 u"direction2": copy.copy(latency_default)
748             },
749             u"PDR": {
750                 u"direction1": copy.copy(latency_default),
751                 u"direction2": copy.copy(latency_default)
752             },
753             u"LAT0": {
754                 u"direction1": copy.copy(latency_default),
755                 u"direction2": copy.copy(latency_default)
756             },
757             u"PDR10": {
758                 u"direction1": copy.copy(latency_default),
759                 u"direction2": copy.copy(latency_default)
760             },
761             u"PDR50": {
762                 u"direction1": copy.copy(latency_default),
763                 u"direction2": copy.copy(latency_default)
764             },
765             u"PDR90": {
766                 u"direction1": copy.copy(latency_default),
767                 u"direction2": copy.copy(latency_default)
768             },
769         }
770
771         # TODO: Rewrite when long and base are not needed
772         groups = re.search(self.REGEX_NDRPDR_LAT_LONG, msg)
773         if groups is None:
774             groups = re.search(self.REGEX_NDRPDR_LAT, msg)
775         if groups is None:
776             groups = re.search(self.REGEX_NDRPDR_LAT_BASE, msg)
777         if groups is None:
778             return latency, u"FAIL"
779
780         def process_latency(in_str):
781             """Return object with parsed latency values.
782
783             TODO: Define class for the return type.
784
785             :param in_str: Input string, min/avg/max/hdrh format.
786             :type in_str: str
787             :returns: Dict with corresponding keys, except hdrh float values.
788             :rtype dict:
789             :throws IndexError: If in_str does not have enough substrings.
790             :throws ValueError: If a substring does not convert to float.
791             """
792             in_list = in_str.split('/', 3)
793
794             rval = {
795                 u"min": float(in_list[0]),
796                 u"avg": float(in_list[1]),
797                 u"max": float(in_list[2]),
798                 u"hdrh": u""
799             }
800
801             if len(in_list) == 4:
802                 rval[u"hdrh"] = str(in_list[3])
803
804             return rval
805
806         try:
807             latency[u"NDR"][u"direction1"] = process_latency(groups.group(1))
808             latency[u"NDR"][u"direction2"] = process_latency(groups.group(2))
809             latency[u"PDR"][u"direction1"] = process_latency(groups.group(3))
810             latency[u"PDR"][u"direction2"] = process_latency(groups.group(4))
811             if groups.lastindex == 4:
812                 return latency, u"PASS"
813         except (IndexError, ValueError):
814             pass
815
816         try:
817             latency[u"PDR90"][u"direction1"] = process_latency(groups.group(5))
818             latency[u"PDR90"][u"direction2"] = process_latency(groups.group(6))
819             latency[u"PDR50"][u"direction1"] = process_latency(groups.group(7))
820             latency[u"PDR50"][u"direction2"] = process_latency(groups.group(8))
821             latency[u"PDR10"][u"direction1"] = process_latency(groups.group(9))
822             latency[u"PDR10"][u"direction2"] = process_latency(groups.group(10))
823             latency[u"LAT0"][u"direction1"] = process_latency(groups.group(11))
824             latency[u"LAT0"][u"direction2"] = process_latency(groups.group(12))
825             if groups.lastindex == 12:
826                 return latency, u"PASS"
827         except (IndexError, ValueError):
828             pass
829
830         # TODO: Remove when not needed
831         latency[u"NDR10"] = {
832             u"direction1": copy.copy(latency_default),
833             u"direction2": copy.copy(latency_default)
834         }
835         latency[u"NDR50"] = {
836             u"direction1": copy.copy(latency_default),
837             u"direction2": copy.copy(latency_default)
838         }
839         latency[u"NDR90"] = {
840             u"direction1": copy.copy(latency_default),
841             u"direction2": copy.copy(latency_default)
842         }
843         try:
844             latency[u"LAT0"][u"direction1"] = process_latency(groups.group(5))
845             latency[u"LAT0"][u"direction2"] = process_latency(groups.group(6))
846             latency[u"NDR10"][u"direction1"] = process_latency(groups.group(7))
847             latency[u"NDR10"][u"direction2"] = process_latency(groups.group(8))
848             latency[u"NDR50"][u"direction1"] = process_latency(groups.group(9))
849             latency[u"NDR50"][u"direction2"] = process_latency(groups.group(10))
850             latency[u"NDR90"][u"direction1"] = process_latency(groups.group(11))
851             latency[u"NDR90"][u"direction2"] = process_latency(groups.group(12))
852             latency[u"PDR10"][u"direction1"] = process_latency(groups.group(13))
853             latency[u"PDR10"][u"direction2"] = process_latency(groups.group(14))
854             latency[u"PDR50"][u"direction1"] = process_latency(groups.group(15))
855             latency[u"PDR50"][u"direction2"] = process_latency(groups.group(16))
856             latency[u"PDR90"][u"direction1"] = process_latency(groups.group(17))
857             latency[u"PDR90"][u"direction2"] = process_latency(groups.group(18))
858             return latency, u"PASS"
859         except (IndexError, ValueError):
860             pass
861
862         return latency, u"FAIL"
863
864     def visit_suite(self, suite):
865         """Implements traversing through the suite and its direct children.
866
867         :param suite: Suite to process.
868         :type suite: Suite
869         :returns: Nothing.
870         """
871         if self.start_suite(suite) is not False:
872             suite.suites.visit(self)
873             suite.tests.visit(self)
874             self.end_suite(suite)
875
876     def start_suite(self, suite):
877         """Called when suite starts.
878
879         :param suite: Suite to process.
880         :type suite: Suite
881         :returns: Nothing.
882         """
883
884         try:
885             parent_name = suite.parent.name
886         except AttributeError:
887             return
888
889         doc_str = suite.doc.\
890             replace(u'"', u"'").\
891             replace(u'\n', u' ').\
892             replace(u'\r', u'').\
893             replace(u'*[', u' |br| *[').\
894             replace(u"*", u"**").\
895             replace(u' |br| *[', u'*[', 1)
896
897         self._data[u"suites"][suite.longname.lower().
898                               replace(u'"', u"'").
899                               replace(u" ", u"_")] = {
900                                   u"name": suite.name.lower(),
901                                   u"doc": doc_str,
902                                   u"parent": parent_name,
903                                   u"level": len(suite.longname.split(u"."))
904                               }
905
906         suite.keywords.visit(self)
907
908     def end_suite(self, suite):
909         """Called when suite ends.
910
911         :param suite: Suite to process.
912         :type suite: Suite
913         :returns: Nothing.
914         """
915
916     def visit_test(self, test):
917         """Implements traversing through the test.
918
919         :param test: Test to process.
920         :type test: Test
921         :returns: Nothing.
922         """
923         if self.start_test(test) is not False:
924             test.keywords.visit(self)
925             self.end_test(test)
926
927     def start_test(self, test):
928         """Called when test starts.
929
930         :param test: Test to process.
931         :type test: Test
932         :returns: Nothing.
933         """
934
935         self._sh_run_counter = 0
936
937         longname_orig = test.longname.lower()
938
939         # Check the ignore list
940         if longname_orig in self._ignore:
941             return
942
943         tags = [str(tag) for tag in test.tags]
944         test_result = dict()
945
946         # Change the TC long name and name if defined in the mapping table
947         longname = self._mapping.get(longname_orig, None)
948         if longname is not None:
949             name = longname.split(u'.')[-1]
950             logging.debug(
951                 f"{self._data[u'metadata']}\n{longname_orig}\n{longname}\n"
952                 f"{name}"
953             )
954         else:
955             longname = longname_orig
956             name = test.name.lower()
957
958         # Remove TC number from the TC long name (backward compatibility):
959         self._test_id = re.sub(self.REGEX_TC_NUMBER, u"", longname)
960         # Remove TC number from the TC name (not needed):
961         test_result[u"name"] = re.sub(self.REGEX_TC_NUMBER, "", name)
962
963         test_result[u"parent"] = test.parent.name.lower()
964         test_result[u"tags"] = tags
965         test_result["doc"] = test.doc.\
966             replace(u'"', u"'").\
967             replace(u'\n', u' ').\
968             replace(u'\r', u'').\
969             replace(u'[', u' |br| [').\
970             replace(u' |br| [', u'[', 1)
971         test_result[u"msg"] = test.message.\
972             replace(u'\n', u' |br| ').\
973             replace(u'\r', u'').\
974             replace(u'"', u"'")
975         test_result[u"type"] = u"FUNC"
976         test_result[u"status"] = test.status
977
978         if u"PERFTEST" in tags:
979             # Replace info about cores (e.g. -1c-) with the info about threads
980             # and cores (e.g. -1t1c-) in the long test case names and in the
981             # test case names if necessary.
982             groups = re.search(self.REGEX_TC_NAME_OLD, self._test_id)
983             if not groups:
984                 tag_count = 0
985                 tag_tc = str()
986                 for tag in test_result[u"tags"]:
987                     groups = re.search(self.REGEX_TC_TAG, tag)
988                     if groups:
989                         tag_count += 1
990                         tag_tc = tag
991
992                 if tag_count == 1:
993                     self._test_id = re.sub(self.REGEX_TC_NAME_NEW,
994                                            f"-{tag_tc.lower()}-",
995                                            self._test_id,
996                                            count=1)
997                     test_result[u"name"] = re.sub(self.REGEX_TC_NAME_NEW,
998                                                   f"-{tag_tc.lower()}-",
999                                                   test_result["name"],
1000                                                   count=1)
1001                 else:
1002                     test_result[u"status"] = u"FAIL"
1003                     self._data[u"tests"][self._test_id] = test_result
1004                     logging.debug(
1005                         f"The test {self._test_id} has no or more than one "
1006                         f"multi-threading tags.\n"
1007                         f"Tags: {test_result[u'tags']}"
1008                     )
1009                     return
1010
1011         if test.status == u"PASS":
1012             if u"NDRPDR" in tags:
1013                 test_result[u"msg"] = self._get_data_from_perf_test_msg(
1014                     test.message). \
1015                     replace(u'\n', u' |br| '). \
1016                     replace(u'\r', u''). \
1017                     replace(u'"', u"'")
1018                 test_result[u"type"] = u"NDRPDR"
1019                 test_result[u"throughput"], test_result[u"status"] = \
1020                     self._get_ndrpdr_throughput(test.message)
1021                 test_result[u"latency"], test_result[u"status"] = \
1022                     self._get_ndrpdr_latency(test.message)
1023             elif u"SOAK" in tags:
1024                 test_result[u"type"] = u"SOAK"
1025                 test_result[u"throughput"], test_result[u"status"] = \
1026                     self._get_plr_throughput(test.message)
1027             elif u"TCP" in tags:
1028                 test_result[u"type"] = u"TCP"
1029                 groups = re.search(self.REGEX_TCP, test.message)
1030                 test_result[u"result"] = int(groups.group(2))
1031             elif u"MRR" in tags or u"FRMOBL" in tags or u"BMRR" in tags:
1032                 test_result[u"msg"] = self._get_data_from_mrr_test_msg(
1033                     test.message). \
1034                     replace(u'\n', u' |br| '). \
1035                     replace(u'\r', u''). \
1036                     replace(u'"', u"'")
1037                 if u"MRR" in tags:
1038                     test_result[u"type"] = u"MRR"
1039                 else:
1040                     test_result[u"type"] = u"BMRR"
1041
1042                 test_result[u"result"] = dict()
1043                 groups = re.search(self.REGEX_BMRR, test.message)
1044                 if groups is not None:
1045                     items_str = groups.group(1)
1046                     items_float = [float(item.strip()) for item
1047                                    in items_str.split(",")]
1048                     # Use whole list in CSIT-1180.
1049                     stats = jumpavg.AvgStdevStats.for_runs(items_float)
1050                     test_result[u"result"][u"receive-rate"] = stats.avg
1051                 else:
1052                     groups = re.search(self.REGEX_MRR, test.message)
1053                     test_result[u"result"][u"receive-rate"] = \
1054                         float(groups.group(3)) / float(groups.group(1))
1055             elif u"RECONF" in tags:
1056                 test_result[u"type"] = u"RECONF"
1057                 test_result[u"result"] = None
1058                 try:
1059                     grps_loss = re.search(self.REGEX_RECONF_LOSS, test.message)
1060                     grps_time = re.search(self.REGEX_RECONF_TIME, test.message)
1061                     test_result[u"result"] = {
1062                         u"loss": int(grps_loss.group(1)),
1063                         u"time": float(grps_time.group(1))
1064                     }
1065                 except (AttributeError, IndexError, ValueError, TypeError):
1066                     test_result[u"status"] = u"FAIL"
1067             elif u"DEVICETEST" in tags:
1068                 test_result[u"type"] = u"DEVICETEST"
1069             else:
1070                 test_result[u"status"] = u"FAIL"
1071                 self._data[u"tests"][self._test_id] = test_result
1072                 return
1073
1074         self._data[u"tests"][self._test_id] = test_result
1075
1076     def end_test(self, test):
1077         """Called when test ends.
1078
1079         :param test: Test to process.
1080         :type test: Test
1081         :returns: Nothing.
1082         """
1083
1084     def visit_keyword(self, keyword):
1085         """Implements traversing through the keyword and its child keywords.
1086
1087         :param keyword: Keyword to process.
1088         :type keyword: Keyword
1089         :returns: Nothing.
1090         """
1091         if self.start_keyword(keyword) is not False:
1092             self.end_keyword(keyword)
1093
1094     def start_keyword(self, keyword):
1095         """Called when keyword starts. Default implementation does nothing.
1096
1097         :param keyword: Keyword to process.
1098         :type keyword: Keyword
1099         :returns: Nothing.
1100         """
1101         try:
1102             if keyword.type == u"setup":
1103                 self.visit_setup_kw(keyword)
1104             elif keyword.type == u"teardown":
1105                 self.visit_teardown_kw(keyword)
1106             else:
1107                 self.visit_test_kw(keyword)
1108         except AttributeError:
1109             pass
1110
1111     def end_keyword(self, keyword):
1112         """Called when keyword ends. Default implementation does nothing.
1113
1114         :param keyword: Keyword to process.
1115         :type keyword: Keyword
1116         :returns: Nothing.
1117         """
1118
1119     def visit_test_kw(self, test_kw):
1120         """Implements traversing through the test keyword and its child
1121         keywords.
1122
1123         :param test_kw: Keyword to process.
1124         :type test_kw: Keyword
1125         :returns: Nothing.
1126         """
1127         for keyword in test_kw.keywords:
1128             if self.start_test_kw(keyword) is not False:
1129                 self.visit_test_kw(keyword)
1130                 self.end_test_kw(keyword)
1131
1132     def start_test_kw(self, test_kw):
1133         """Called when test keyword starts. Default implementation does
1134         nothing.
1135
1136         :param test_kw: Keyword to process.
1137         :type test_kw: Keyword
1138         :returns: Nothing.
1139         """
1140         if test_kw.name.count(u"Show Runtime On All Duts") or \
1141                 test_kw.name.count(u"Show Runtime Counters On All Duts"):
1142             self._msg_type = u"test-show-runtime"
1143             self._sh_run_counter += 1
1144         elif test_kw.name.count(u"Install Dpdk Test") and not self._version:
1145             self._msg_type = u"dpdk-version"
1146         else:
1147             return
1148         test_kw.messages.visit(self)
1149
1150     def end_test_kw(self, test_kw):
1151         """Called when keyword ends. Default implementation does nothing.
1152
1153         :param test_kw: Keyword to process.
1154         :type test_kw: Keyword
1155         :returns: Nothing.
1156         """
1157
1158     def visit_setup_kw(self, setup_kw):
1159         """Implements traversing through the teardown keyword and its child
1160         keywords.
1161
1162         :param setup_kw: Keyword to process.
1163         :type setup_kw: Keyword
1164         :returns: Nothing.
1165         """
1166         for keyword in setup_kw.keywords:
1167             if self.start_setup_kw(keyword) is not False:
1168                 self.visit_setup_kw(keyword)
1169                 self.end_setup_kw(keyword)
1170
1171     def start_setup_kw(self, setup_kw):
1172         """Called when teardown keyword starts. Default implementation does
1173         nothing.
1174
1175         :param setup_kw: Keyword to process.
1176         :type setup_kw: Keyword
1177         :returns: Nothing.
1178         """
1179         if setup_kw.name.count(u"Show Vpp Version On All Duts") \
1180                 and not self._version:
1181             self._msg_type = u"vpp-version"
1182         elif setup_kw.name.count(u"Set Global Variable") \
1183                 and not self._timestamp:
1184             self._msg_type = u"timestamp"
1185         elif setup_kw.name.count(u"Setup Framework") and not self._testbed:
1186             self._msg_type = u"testbed"
1187         else:
1188             return
1189         setup_kw.messages.visit(self)
1190
1191     def end_setup_kw(self, setup_kw):
1192         """Called when keyword ends. Default implementation does nothing.
1193
1194         :param setup_kw: Keyword to process.
1195         :type setup_kw: Keyword
1196         :returns: Nothing.
1197         """
1198
1199     def visit_teardown_kw(self, teardown_kw):
1200         """Implements traversing through the teardown keyword and its child
1201         keywords.
1202
1203         :param teardown_kw: Keyword to process.
1204         :type teardown_kw: Keyword
1205         :returns: Nothing.
1206         """
1207         for keyword in teardown_kw.keywords:
1208             if self.start_teardown_kw(keyword) is not False:
1209                 self.visit_teardown_kw(keyword)
1210                 self.end_teardown_kw(keyword)
1211
1212     def start_teardown_kw(self, teardown_kw):
1213         """Called when teardown keyword starts
1214
1215         :param teardown_kw: Keyword to process.
1216         :type teardown_kw: Keyword
1217         :returns: Nothing.
1218         """
1219
1220         if teardown_kw.name.count(u"Show Vat History On All Duts"):
1221             # TODO: Remove when not needed:
1222             self._conf_history_lookup_nr = 0
1223             self._msg_type = u"teardown-vat-history"
1224             teardown_kw.messages.visit(self)
1225         elif teardown_kw.name.count(u"Show Papi History On All Duts"):
1226             self._conf_history_lookup_nr = 0
1227             self._msg_type = u"teardown-papi-history"
1228             teardown_kw.messages.visit(self)
1229
1230     def end_teardown_kw(self, teardown_kw):
1231         """Called when keyword ends. Default implementation does nothing.
1232
1233         :param teardown_kw: Keyword to process.
1234         :type teardown_kw: Keyword
1235         :returns: Nothing.
1236         """
1237
1238     def visit_message(self, msg):
1239         """Implements visiting the message.
1240
1241         :param msg: Message to process.
1242         :type msg: Message
1243         :returns: Nothing.
1244         """
1245         if self.start_message(msg) is not False:
1246             self.end_message(msg)
1247
1248     def start_message(self, msg):
1249         """Called when message starts. Get required information from messages:
1250         - VPP version.
1251
1252         :param msg: Message to process.
1253         :type msg: Message
1254         :returns: Nothing.
1255         """
1256
1257         if self._msg_type:
1258             self.parse_msg[self._msg_type](msg)
1259
1260     def end_message(self, msg):
1261         """Called when message ends. Default implementation does nothing.
1262
1263         :param msg: Message to process.
1264         :type msg: Message
1265         :returns: Nothing.
1266         """
1267
1268
1269 class InputData:
1270     """Input data
1271
1272     The data is extracted from output.xml files generated by Jenkins jobs and
1273     stored in pandas' DataFrames.
1274
1275     The data structure:
1276     - job name
1277       - build number
1278         - metadata
1279           (as described in ExecutionChecker documentation)
1280         - suites
1281           (as described in ExecutionChecker documentation)
1282         - tests
1283           (as described in ExecutionChecker documentation)
1284     """
1285
1286     def __init__(self, spec):
1287         """Initialization.
1288
1289         :param spec: Specification.
1290         :type spec: Specification
1291         """
1292
1293         # Specification:
1294         self._cfg = spec
1295
1296         # Data store:
1297         self._input_data = pd.Series()
1298
1299     @property
1300     def data(self):
1301         """Getter - Input data.
1302
1303         :returns: Input data
1304         :rtype: pandas.Series
1305         """
1306         return self._input_data
1307
1308     def metadata(self, job, build):
1309         """Getter - metadata
1310
1311         :param job: Job which metadata we want.
1312         :param build: Build which metadata we want.
1313         :type job: str
1314         :type build: str
1315         :returns: Metadata
1316         :rtype: pandas.Series
1317         """
1318
1319         return self.data[job][build][u"metadata"]
1320
1321     def suites(self, job, build):
1322         """Getter - suites
1323
1324         :param job: Job which suites we want.
1325         :param build: Build which suites we want.
1326         :type job: str
1327         :type build: str
1328         :returns: Suites.
1329         :rtype: pandas.Series
1330         """
1331
1332         return self.data[job][str(build)][u"suites"]
1333
1334     def tests(self, job, build):
1335         """Getter - tests
1336
1337         :param job: Job which tests we want.
1338         :param build: Build which tests we want.
1339         :type job: str
1340         :type build: str
1341         :returns: Tests.
1342         :rtype: pandas.Series
1343         """
1344
1345         return self.data[job][build][u"tests"]
1346
1347     def _parse_tests(self, job, build, log):
1348         """Process data from robot output.xml file and return JSON structured
1349         data.
1350
1351         :param job: The name of job which build output data will be processed.
1352         :param build: The build which output data will be processed.
1353         :param log: List of log messages.
1354         :type job: str
1355         :type build: dict
1356         :type log: list of tuples (severity, msg)
1357         :returns: JSON data structure.
1358         :rtype: dict
1359         """
1360
1361         metadata = {
1362             u"job": job,
1363             u"build": build
1364         }
1365
1366         with open(build[u"file-name"], u'r') as data_file:
1367             try:
1368                 result = ExecutionResult(data_file)
1369             except errors.DataError as err:
1370                 log.append(
1371                     (u"ERROR", f"Error occurred while parsing output.xml: "
1372                                f"{repr(err)}")
1373                 )
1374                 return None
1375         checker = ExecutionChecker(metadata, self._cfg.mapping,
1376                                    self._cfg.ignore)
1377         result.visit(checker)
1378
1379         return checker.data
1380
1381     def _download_and_parse_build(self, job, build, repeat, pid=10000):
1382         """Download and parse the input data file.
1383
1384         :param pid: PID of the process executing this method.
1385         :param job: Name of the Jenkins job which generated the processed input
1386             file.
1387         :param build: Information about the Jenkins build which generated the
1388             processed input file.
1389         :param repeat: Repeat the download specified number of times if not
1390             successful.
1391         :type pid: int
1392         :type job: str
1393         :type build: dict
1394         :type repeat: int
1395         """
1396
1397         logs = list()
1398
1399         logs.append(
1400             (u"INFO", f"  Processing the job/build: {job}: {build[u'build']}")
1401         )
1402
1403         state = u"failed"
1404         success = False
1405         data = None
1406         do_repeat = repeat
1407         while do_repeat:
1408             success = download_and_unzip_data_file(self._cfg, job, build, pid,
1409                                                    logs)
1410             if success:
1411                 break
1412             do_repeat -= 1
1413         if not success:
1414             logs.append(
1415                 (u"ERROR",
1416                  f"It is not possible to download the input data file from the "
1417                  f"job {job}, build {build[u'build']}, or it is damaged. "
1418                  f"Skipped.")
1419             )
1420         if success:
1421             logs.append(
1422                 (u"INFO",
1423                  f"    Processing data from the build {build[u'build']} ...")
1424             )
1425             data = self._parse_tests(job, build, logs)
1426             if data is None:
1427                 logs.append(
1428                     (u"ERROR",
1429                      f"Input data file from the job {job}, build "
1430                      f"{build[u'build']} is damaged. Skipped.")
1431                 )
1432             else:
1433                 state = u"processed"
1434
1435             try:
1436                 remove(build[u"file-name"])
1437             except OSError as err:
1438                 logs.append(
1439                     ("ERROR", f"Cannot remove the file {build[u'file-name']}: "
1440                               f"{repr(err)}")
1441                 )
1442
1443         # If the time-period is defined in the specification file, remove all
1444         # files which are outside the time period.
1445         timeperiod = self._cfg.input.get(u"time-period", None)
1446         if timeperiod and data:
1447             now = dt.utcnow()
1448             timeperiod = timedelta(int(timeperiod))
1449             metadata = data.get(u"metadata", None)
1450             if metadata:
1451                 generated = metadata.get(u"generated", None)
1452                 if generated:
1453                     generated = dt.strptime(generated, u"%Y%m%d %H:%M")
1454                     if (now - generated) > timeperiod:
1455                         # Remove the data and the file:
1456                         state = u"removed"
1457                         data = None
1458                         logs.append(
1459                             (u"INFO",
1460                              f"    The build {job}/{build[u'build']} is "
1461                              f"outdated, will be removed.")
1462                         )
1463         logs.append((u"INFO", u"  Done."))
1464
1465         for level, line in logs:
1466             if level == u"INFO":
1467                 logging.info(line)
1468             elif level == u"ERROR":
1469                 logging.error(line)
1470             elif level == u"DEBUG":
1471                 logging.debug(line)
1472             elif level == u"CRITICAL":
1473                 logging.critical(line)
1474             elif level == u"WARNING":
1475                 logging.warning(line)
1476
1477         return {u"data": data, u"state": state, u"job": job, u"build": build}
1478
1479     def download_and_parse_data(self, repeat=1):
1480         """Download the input data files, parse input data from input files and
1481         store in pandas' Series.
1482
1483         :param repeat: Repeat the download specified number of times if not
1484             successful.
1485         :type repeat: int
1486         """
1487
1488         logging.info(u"Downloading and parsing input files ...")
1489
1490         for job, builds in self._cfg.builds.items():
1491             for build in builds:
1492
1493                 result = self._download_and_parse_build(job, build, repeat)
1494                 build_nr = result[u"build"][u"build"]
1495
1496                 if result[u"data"]:
1497                     data = result[u"data"]
1498                     build_data = pd.Series({
1499                         u"metadata": pd.Series(
1500                             list(data[u"metadata"].values()),
1501                             index=list(data[u"metadata"].keys())
1502                         ),
1503                         u"suites": pd.Series(
1504                             list(data[u"suites"].values()),
1505                             index=list(data[u"suites"].keys())
1506                         ),
1507                         u"tests": pd.Series(
1508                             list(data[u"tests"].values()),
1509                             index=list(data[u"tests"].keys())
1510                         )
1511                     })
1512
1513                     if self._input_data.get(job, None) is None:
1514                         self._input_data[job] = pd.Series()
1515                     self._input_data[job][str(build_nr)] = build_data
1516
1517                     self._cfg.set_input_file_name(
1518                         job, build_nr, result[u"build"][u"file-name"])
1519
1520                 self._cfg.set_input_state(job, build_nr, result[u"state"])
1521
1522                 mem_alloc = \
1523                     resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1000
1524                 logging.info(f"Memory allocation: {mem_alloc:.0f}MB")
1525
1526         logging.info(u"Done.")
1527
1528     @staticmethod
1529     def _end_of_tag(tag_filter, start=0, closer=u"'"):
1530         """Return the index of character in the string which is the end of tag.
1531
1532         :param tag_filter: The string where the end of tag is being searched.
1533         :param start: The index where the searching is stated.
1534         :param closer: The character which is the tag closer.
1535         :type tag_filter: str
1536         :type start: int
1537         :type closer: str
1538         :returns: The index of the tag closer.
1539         :rtype: int
1540         """
1541
1542         try:
1543             idx_opener = tag_filter.index(closer, start)
1544             return tag_filter.index(closer, idx_opener + 1)
1545         except ValueError:
1546             return None
1547
1548     @staticmethod
1549     def _condition(tag_filter):
1550         """Create a conditional statement from the given tag filter.
1551
1552         :param tag_filter: Filter based on tags from the element specification.
1553         :type tag_filter: str
1554         :returns: Conditional statement which can be evaluated.
1555         :rtype: str
1556         """
1557
1558         index = 0
1559         while True:
1560             index = InputData._end_of_tag(tag_filter, index)
1561             if index is None:
1562                 return tag_filter
1563             index += 1
1564             tag_filter = tag_filter[:index] + u" in tags" + tag_filter[index:]
1565
1566     def filter_data(self, element, params=None, data=None, data_set=u"tests",
1567                     continue_on_error=False):
1568         """Filter required data from the given jobs and builds.
1569
1570         The output data structure is:
1571
1572         - job 1
1573           - build 1
1574             - test (or suite) 1 ID:
1575               - param 1
1576               - param 2
1577               ...
1578               - param n
1579             ...
1580             - test (or suite) n ID:
1581             ...
1582           ...
1583           - build n
1584         ...
1585         - job n
1586
1587         :param element: Element which will use the filtered data.
1588         :param params: Parameters which will be included in the output. If None,
1589             all parameters are included.
1590         :param data: If not None, this data is used instead of data specified
1591             in the element.
1592         :param data_set: The set of data to be filtered: tests, suites,
1593             metadata.
1594         :param continue_on_error: Continue if there is error while reading the
1595             data. The Item will be empty then
1596         :type element: pandas.Series
1597         :type params: list
1598         :type data: dict
1599         :type data_set: str
1600         :type continue_on_error: bool
1601         :returns: Filtered data.
1602         :rtype pandas.Series
1603         """
1604
1605         try:
1606             if data_set == "suites":
1607                 cond = u"True"
1608             elif element[u"filter"] in (u"all", u"template"):
1609                 cond = u"True"
1610             else:
1611                 cond = InputData._condition(element[u"filter"])
1612             logging.debug(f"   Filter: {cond}")
1613         except KeyError:
1614             logging.error(u"  No filter defined.")
1615             return None
1616
1617         if params is None:
1618             params = element.get(u"parameters", None)
1619             if params:
1620                 params.append(u"type")
1621
1622         data_to_filter = data if data else element[u"data"]
1623         data = pd.Series()
1624         try:
1625             for job, builds in data_to_filter.items():
1626                 data[job] = pd.Series()
1627                 for build in builds:
1628                     data[job][str(build)] = pd.Series()
1629                     try:
1630                         data_dict = dict(
1631                             self.data[job][str(build)][data_set].items())
1632                     except KeyError:
1633                         if continue_on_error:
1634                             continue
1635                         return None
1636
1637                     for test_id, test_data in data_dict.items():
1638                         if eval(cond, {u"tags": test_data.get(u"tags", u"")}):
1639                             data[job][str(build)][test_id] = pd.Series()
1640                             if params is None:
1641                                 for param, val in test_data.items():
1642                                     data[job][str(build)][test_id][param] = val
1643                             else:
1644                                 for param in params:
1645                                     try:
1646                                         data[job][str(build)][test_id][param] =\
1647                                             test_data[param]
1648                                     except KeyError:
1649                                         data[job][str(build)][test_id][param] =\
1650                                             u"No Data"
1651             return data
1652
1653         except (KeyError, IndexError, ValueError) as err:
1654             logging.error(
1655                 f"Missing mandatory parameter in the element specification: "
1656                 f"{repr(err)}"
1657             )
1658             return None
1659         except AttributeError as err:
1660             logging.error(repr(err))
1661             return None
1662         except SyntaxError as err:
1663             logging.error(
1664                 f"The filter {cond} is not correct. Check if all tags are "
1665                 f"enclosed by apostrophes.\n{repr(err)}"
1666             )
1667             return None
1668
1669     def filter_tests_by_name(self, element, params=None, data_set=u"tests",
1670                              continue_on_error=False):
1671         """Filter required data from the given jobs and builds.
1672
1673         The output data structure is:
1674
1675         - job 1
1676           - build 1
1677             - test (or suite) 1 ID:
1678               - param 1
1679               - param 2
1680               ...
1681               - param n
1682             ...
1683             - test (or suite) n ID:
1684             ...
1685           ...
1686           - build n
1687         ...
1688         - job n
1689
1690         :param element: Element which will use the filtered data.
1691         :param params: Parameters which will be included in the output. If None,
1692         all parameters are included.
1693         :param data_set: The set of data to be filtered: tests, suites,
1694         metadata.
1695         :param continue_on_error: Continue if there is error while reading the
1696         data. The Item will be empty then
1697         :type element: pandas.Series
1698         :type params: list
1699         :type data_set: str
1700         :type continue_on_error: bool
1701         :returns: Filtered data.
1702         :rtype pandas.Series
1703         """
1704
1705         include = element.get(u"include", None)
1706         if not include:
1707             logging.warning(u"No tests to include, skipping the element.")
1708             return None
1709
1710         if params is None:
1711             params = element.get(u"parameters", None)
1712             if params:
1713                 params.append(u"type")
1714
1715         data = pd.Series()
1716         try:
1717             for job, builds in element[u"data"].items():
1718                 data[job] = pd.Series()
1719                 for build in builds:
1720                     data[job][str(build)] = pd.Series()
1721                     for test in include:
1722                         try:
1723                             reg_ex = re.compile(str(test).lower())
1724                             for test_id in self.data[job][
1725                                     str(build)][data_set].keys():
1726                                 if re.match(reg_ex, str(test_id).lower()):
1727                                     test_data = self.data[job][
1728                                         str(build)][data_set][test_id]
1729                                     data[job][str(build)][test_id] = pd.Series()
1730                                     if params is None:
1731                                         for param, val in test_data.items():
1732                                             data[job][str(build)][test_id]\
1733                                                 [param] = val
1734                                     else:
1735                                         for param in params:
1736                                             try:
1737                                                 data[job][str(build)][
1738                                                     test_id][param] = \
1739                                                     test_data[param]
1740                                             except KeyError:
1741                                                 data[job][str(build)][
1742                                                     test_id][param] = u"No Data"
1743                         except KeyError as err:
1744                             logging.error(repr(err))
1745                             if continue_on_error:
1746                                 continue
1747                             return None
1748             return data
1749
1750         except (KeyError, IndexError, ValueError) as err:
1751             logging.error(
1752                 f"Missing mandatory parameter in the element "
1753                 f"specification: {repr(err)}"
1754             )
1755             return None
1756         except AttributeError as err:
1757             logging.error(repr(err))
1758             return None
1759
1760     @staticmethod
1761     def merge_data(data):
1762         """Merge data from more jobs and builds to a simple data structure.
1763
1764         The output data structure is:
1765
1766         - test (suite) 1 ID:
1767           - param 1
1768           - param 2
1769           ...
1770           - param n
1771         ...
1772         - test (suite) n ID:
1773         ...
1774
1775         :param data: Data to merge.
1776         :type data: pandas.Series
1777         :returns: Merged data.
1778         :rtype: pandas.Series
1779         """
1780
1781         logging.info(u"    Merging data ...")
1782
1783         merged_data = pd.Series()
1784         for builds in data.values:
1785             for item in builds.values:
1786                 for item_id, item_data in item.items():
1787                     merged_data[item_id] = item_data
1788
1789         return merged_data
1790
1791     def print_all_oper_data(self):
1792         """Print all operational data to console.
1793         """
1794
1795         tbl_hdr = (
1796             u"Name",
1797             u"Nr of Vectors",
1798             u"Nr of Packets",
1799             u"Suspends",
1800             u"Cycles per Packet",
1801             u"Average Vector Size"
1802         )
1803
1804         for job in self._input_data.values:
1805             for build in job.values:
1806                 for test_id, test_data in build[u"tests"].items():
1807                     print(f"{test_id}")
1808                     if test_data.get(u"show-run", None) is None:
1809                         continue
1810                     for dut_name, data in test_data[u"show-run"].items():
1811                         if data.get(u"threads", None) is None:
1812                             continue
1813                         print(f"Host IP: {data.get(u'host', '')}, "
1814                               f"Socket: {data.get(u'socket', '')}")
1815                         for thread_nr, thread in data[u"threads"].items():
1816                             txt_table = prettytable.PrettyTable(tbl_hdr)
1817                             avg = 0.0
1818                             for row in thread:
1819                                 txt_table.add_row(row)
1820                                 avg += row[-1]
1821                             if len(thread) == 0:
1822                                 avg = u""
1823                             else:
1824                                 avg = f", Average Vector Size per Node: " \
1825                                       f"{(avg / len(thread)):.2f}"
1826                             th_name = u"main" if thread_nr == 0 \
1827                                 else f"worker_{thread_nr}"
1828                             print(f"{dut_name}, {th_name}{avg}")
1829                             txt_table.float_format = u".2"
1830                             txt_table.align = u"r"
1831                             txt_table.align[u"Name"] = u"l"
1832                             print(f"{txt_table.get_string()}\n")