resources/tools/presentation/input_data_parser.py

   1 # Copyright (c) 2023 Cisco and/or its affiliates.
   2 # Licensed under the Apache License, Version 2.0 (the "License");
   3 # you may not use this file except in compliance with the License.
   4 # You may obtain a copy of the License at:
   5 #
   6 #     http://www.apache.org/licenses/LICENSE-2.0
   7 #
   8 # Unless required by applicable law or agreed to in writing, software
   9 # distributed under the License is distributed on an "AS IS" BASIS,
  10 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  11 # See the License for the specific language governing permissions and
  12 # limitations under the License.
  13
  14 """Data pre-processing
  15
  16 - extract data from output.xml files generated by Jenkins jobs and store in
  17   pandas' Series,
  18 - provide access to the data.
  19 - filter the data using tags,
  20 """
  21
  22 import re
  23 import copy
  24 import resource
  25 import logging
  26
  27 from collections import OrderedDict
  28 from os import remove, walk, listdir
  29 from os.path import isfile, isdir, join
  30 from datetime import datetime as dt
  31 from datetime import timedelta
  32 from json import loads
  33 from json.decoder import JSONDecodeError
  34
  35 import hdrh.histogram
  36 import hdrh.codec
  37 import prettytable
  38 import pandas as pd
  39
  40 from robot.api import ExecutionResult, ResultVisitor
  41 from robot import errors
  42
  43 from resources.libraries.python import jumpavg
  44 from input_data_files import download_and_unzip_data_file
  45 from pal_errors import PresentationError
  46
  47
  48 # Separator used in file names
  49 SEPARATOR = "__"
  50
  51
  52 class ExecutionChecker(ResultVisitor):
  53     """Class to traverse through the test suite structure.
  54     """
  55
  56     REGEX_PLR_RATE = re.compile(
  57         r'PLRsearch lower bound::?\s(\d+.\d+).*\n'
  58         r'PLRsearch upper bound::?\s(\d+.\d+)'
  59     )
  60     REGEX_NDRPDR_RATE = re.compile(
  61         r'NDR_LOWER:\s(\d+.\d+).*\n.*\n'
  62         r'NDR_UPPER:\s(\d+.\d+).*\n'
  63         r'PDR_LOWER:\s(\d+.\d+).*\n.*\n'
  64         r'PDR_UPPER:\s(\d+.\d+)'
  65     )
  66     REGEX_NDRPDR_GBPS = re.compile(
  67         r'NDR_LOWER:.*,\s(\d+.\d+).*\n.*\n'
  68         r'NDR_UPPER:.*,\s(\d+.\d+).*\n'
  69         r'PDR_LOWER:.*,\s(\d+.\d+).*\n.*\n'
  70         r'PDR_UPPER:.*,\s(\d+.\d+)'
  71     )
  72     REGEX_PERF_MSG_INFO = re.compile(
  73         r'NDR_LOWER:\s(\d+.\d+)\s.*\s(\d+.\d+)\s.*\n.*\n.*\n'
  74         r'PDR_LOWER:\s(\d+.\d+)\s.*\s(\d+.\d+)\s.*\n.*\n.*\n'
  75         r'Latency at 90% PDR:.*\[\'(.*)\', \'(.*)\'\].*\n'
  76         r'Latency at 50% PDR:.*\[\'(.*)\', \'(.*)\'\].*\n'
  77         r'Latency at 10% PDR:.*\[\'(.*)\', \'(.*)\'\].*\n'
  78     )
  79     REGEX_CPS_MSG_INFO = re.compile(
  80         r'NDR_LOWER:\s(\d+.\d+)\s.*\s.*\n.*\n.*\n'
  81         r'PDR_LOWER:\s(\d+.\d+)\s.*\s.*\n.*\n.*'
  82     )
  83     REGEX_PPS_MSG_INFO = re.compile(
  84         r'NDR_LOWER:\s(\d+.\d+)\s.*\s(\d+.\d+)\s.*\n.*\n.*\n'
  85         r'PDR_LOWER:\s(\d+.\d+)\s.*\s(\d+.\d+)\s.*\n.*\n.*'
  86     )
  87     REGEX_MRR_MSG_INFO = re.compile(r'.*\[(.*)\]')
  88
  89     REGEX_VSAP_MSG_INFO = re.compile(
  90         r'Transfer Rate: (\d*.\d*).*\n'
  91         r'Latency: (\d*.\d*).*\n'
  92         r'Connection [c|r]ps rate: (\d*).*\n'
  93         r'Total data transferred: (\d*).*\n'
  94         r'Completed requests: (\d*).*\n'
  95         r'Failed requests:\s*(\d*.\d*)'
  96     )
  97
  98     # Needed for CPS and PPS tests
  99     REGEX_NDRPDR_LAT_BASE = re.compile(
 100         r'LATENCY.*\[\'(.*)\', \'(.*)\'\]\s\n.*\n.*\n'
 101         r'LATENCY.*\[\'(.*)\', \'(.*)\'\]'
 102     )
 103     REGEX_NDRPDR_LAT = re.compile(
 104         r'LATENCY.*\[\'(.*)\', \'(.*)\'\]\s\n.*\n.*\n'
 105         r'LATENCY.*\[\'(.*)\', \'(.*)\'\]\s\n.*\n'
 106         r'Latency.*\[\'(.*)\', \'(.*)\'\]\s\n'
 107         r'Latency.*\[\'(.*)\', \'(.*)\'\]\s\n'
 108         r'Latency.*\[\'(.*)\', \'(.*)\'\]\s\n'
 109         r'Latency.*\[\'(.*)\', \'(.*)\'\]'
 110     )
 111
 112     REGEX_VERSION_VPP = re.compile(
 113         r"(VPP Version:\s*|VPP version:\s*)(.*)"
 114     )
 115     REGEX_VERSION_DPDK = re.compile(
 116         r"(DPDK version:\s*|DPDK Version:\s*)(.*)"
 117     )
 118     REGEX_TCP = re.compile(
 119         r'Total\s(rps|cps|throughput):\s(\d*).*$'
 120     )
 121     REGEX_MRR = re.compile(
 122         r'MaxReceivedRate_Results\s\[pkts/(\d*)sec\]:\s'
 123         r'tx\s(\d*),\srx\s(\d*)'
 124     )
 125     REGEX_BMRR = re.compile(
 126         r'.*trial results.*: \[(.*)\]'
 127     )
 128     REGEX_RECONF_LOSS = re.compile(
 129         r'Packets lost due to reconfig: (\d*)'
 130     )
 131     REGEX_RECONF_TIME = re.compile(
 132         r'Implied time lost: (\d*.[\de-]*)'
 133     )
 134     REGEX_TC_TAG = re.compile(r'\d+[tT]\d+[cC]')
 135
 136     REGEX_TC_NAME_NEW = re.compile(r'-\d+[cC]-')
 137
 138     REGEX_TC_NUMBER = re.compile(r'tc\d{2}-')
 139
 140     REGEX_TC_PAPI_CLI = re.compile(r'.*\((\d+.\d+.\d+.\d+.) - (.*)\)')
 141
 142     REGEX_SH_RUN_HOST = re.compile(
 143         r'hostname=\"(\d{1,3}.\d{1,3}.\d{1,3}.\d{1,3})\",hook=\"(.*)\"'
 144     )
 145
 146     def __init__(self, metadata, mapping, ignore, process_oper):
 147         """Initialisation.
 148
 149         :param metadata: Key-value pairs to be included in "metadata" part of
 150             JSON structure.
 151         :param mapping: Mapping of the old names of test cases to the new
 152             (actual) one.
 153         :param ignore: List of TCs to be ignored.
 154         :param process_oper: If True, operational data (show run, telemetry) is
 155             processed.
 156         :type metadata: dict
 157         :type mapping: dict
 158         :type ignore: list
 159         :type process_oper: bool
 160         """
 161
 162         # Mapping of TCs long names
 163         self._mapping = mapping
 164
 165         # Ignore list
 166         self._ignore = ignore
 167
 168         # Process operational data
 169         self._process_oper = process_oper
 170
 171         # Name of currently processed keyword
 172         self._kw_name = None
 173
 174         # VPP version
 175         self._version = None
 176
 177         # Timestamp
 178         self._timestamp = None
 179
 180         # Testbed. The testbed is identified by TG node IP address.
 181         self._testbed = None
 182
 183         # Number of PAPI History messages found:
 184         # 0 - no message
 185         # 1 - PAPI History of DUT1
 186         # 2 - PAPI History of DUT2
 187         self._conf_history_lookup_nr = 0
 188
 189         self._sh_run_counter = 0
 190         self._telemetry_kw_counter = 0
 191         self._telemetry_msg_counter = 0
 192
 193         # Test ID of currently processed test- the lowercase full path to the
 194         # test
 195         self._test_id = None
 196
 197         # The main data structure
 198         self._data = {
 199             "metadata": dict(),
 200             "suites": dict(),
 201             "tests": dict()
 202         }
 203
 204         # Save the provided metadata
 205         for key, val in metadata.items():
 206             self._data["metadata"][key] = val
 207
 208     @property
 209     def data(self):
 210         """Getter - Data parsed from the XML file.
 211
 212         :returns: Data parsed from the XML file.
 213         :rtype: dict
 214         """
 215         return self._data
 216
 217     def _get_data_from_mrr_test_msg(self, msg):
 218         """Get info from message of MRR performance tests.
 219
 220         :param msg: Message to be processed.
 221         :type msg: str
 222         :returns: Processed message or original message if a problem occurs.
 223         :rtype: str
 224         """
 225
 226         groups = re.search(self.REGEX_MRR_MSG_INFO, msg)
 227         if not groups or groups.lastindex != 1:
 228             return "Test Failed."
 229
 230         try:
 231             data = groups.group(1).split(", ")
 232         except (AttributeError, IndexError, ValueError, KeyError):
 233             return "Test Failed."
 234
 235         out_str = "["
 236         try:
 237             for item in data:
 238                 out_str += f"{(float(item) / 1e6):.2f}, "
 239             return out_str[:-2] + "]"
 240         except (AttributeError, IndexError, ValueError, KeyError):
 241             return "Test Failed."
 242
 243     def _get_data_from_cps_test_msg(self, msg):
 244         """Get info from message of NDRPDR CPS tests.
 245
 246         :param msg: Message to be processed.
 247         :type msg: str
 248         :returns: Processed message or "Test Failed." if a problem occurs.
 249         :rtype: str
 250         """
 251
 252         groups = re.search(self.REGEX_CPS_MSG_INFO, msg)
 253         if not groups or groups.lastindex != 2:
 254             return "Test Failed."
 255
 256         try:
 257             return (
 258                 f"1. {(float(groups.group(1)) / 1e6):5.2f}\n"
 259                 f"2. {(float(groups.group(2)) / 1e6):5.2f}"
 260             )
 261         except (AttributeError, IndexError, ValueError, KeyError):
 262             return "Test Failed."
 263
 264     def _get_data_from_pps_test_msg(self, msg):
 265         """Get info from message of NDRPDR PPS tests.
 266
 267         :param msg: Message to be processed.
 268         :type msg: str
 269         :returns: Processed message or "Test Failed." if a problem occurs.
 270         :rtype: str
 271         """
 272
 273         groups = re.search(self.REGEX_PPS_MSG_INFO, msg)
 274         if not groups or groups.lastindex != 4:
 275             return "Test Failed."
 276
 277         try:
 278             return (
 279                 f"1. {(float(groups.group(1)) / 1e6):5.2f}      "
 280                 f"{float(groups.group(2)):5.2f}\n"
 281                 f"2. {(float(groups.group(3)) / 1e6):5.2f}      "
 282                 f"{float(groups.group(4)):5.2f}"
 283             )
 284         except (AttributeError, IndexError, ValueError, KeyError):
 285             return "Test Failed."
 286
 287     def _get_data_from_perf_test_msg(self, msg):
 288         """Get info from message of NDRPDR performance tests.
 289
 290         :param msg: Message to be processed.
 291         :type msg: str
 292         :returns: Processed message or "Test Failed." if a problem occurs.
 293         :rtype: str
 294         """
 295
 296         groups = re.search(self.REGEX_PERF_MSG_INFO, msg)
 297         if not groups or groups.lastindex != 10:
 298             return "Test Failed."
 299
 300         try:
 301             data = {
 302                 "ndr_low": float(groups.group(1)),
 303                 "ndr_low_b": float(groups.group(2)),
 304                 "pdr_low": float(groups.group(3)),
 305                 "pdr_low_b": float(groups.group(4)),
 306                 "pdr_lat_90_1": groups.group(5),
 307                 "pdr_lat_90_2": groups.group(6),
 308                 "pdr_lat_50_1": groups.group(7),
 309                 "pdr_lat_50_2": groups.group(8),
 310                 "pdr_lat_10_1": groups.group(9),
 311                 "pdr_lat_10_2": groups.group(10),
 312             }
 313         except (AttributeError, IndexError, ValueError, KeyError):
 314             return "Test Failed."
 315
 316         def _process_lat(in_str_1, in_str_2):
 317             """Extract P50, P90 and P99 latencies or min, avg, max values from
 318             latency string.
 319
 320             :param in_str_1: Latency string for one direction produced by robot
 321                 framework.
 322             :param in_str_2: Latency string for second direction produced by
 323                 robot framework.
 324             :type in_str_1: str
 325             :type in_str_2: str
 326             :returns: Processed latency string or None if a problem occurs.
 327             :rtype: tuple
 328             """
 329             in_list_1 = in_str_1.split('/', 3)
 330             in_list_2 = in_str_2.split('/', 3)
 331
 332             if len(in_list_1) != 4 and len(in_list_2) != 4:
 333                 return None
 334
 335             in_list_1[3] += "=" * (len(in_list_1[3]) % 4)
 336             try:
 337                 hdr_lat_1 = hdrh.histogram.HdrHistogram.decode(in_list_1[3])
 338             except hdrh.codec.HdrLengthException:
 339                 hdr_lat_1 = None
 340
 341             in_list_2[3] += "=" * (len(in_list_2[3]) % 4)
 342             try:
 343                 hdr_lat_2 = hdrh.histogram.HdrHistogram.decode(in_list_2[3])
 344             except hdrh.codec.HdrLengthException:
 345                 hdr_lat_2 = None
 346
 347             if hdr_lat_1 and hdr_lat_2:
 348                 hdr_lat = (
 349                     hdr_lat_1.get_value_at_percentile(50.0),
 350                     hdr_lat_1.get_value_at_percentile(90.0),
 351                     hdr_lat_1.get_value_at_percentile(99.0),
 352                     hdr_lat_2.get_value_at_percentile(50.0),
 353                     hdr_lat_2.get_value_at_percentile(90.0),
 354                     hdr_lat_2.get_value_at_percentile(99.0)
 355                 )
 356                 if all(hdr_lat):
 357                     return hdr_lat
 358
 359             hdr_lat = (
 360                 int(in_list_1[0]), int(in_list_1[1]), int(in_list_1[2]),
 361                 int(in_list_2[0]), int(in_list_2[1]), int(in_list_2[2])
 362             )
 363             for item in hdr_lat:
 364                 if item in (-1, 4294967295, 0):
 365                     return None
 366             return hdr_lat
 367
 368         try:
 369             out_msg = (
 370                 f"1. {(data['ndr_low'] / 1e6):5.2f}      "
 371                 f"{data['ndr_low_b']:5.2f}"
 372                 f"\n2. {(data['pdr_low'] / 1e6):5.2f}      "
 373                 f"{data['pdr_low_b']:5.2f}"
 374             )
 375             latency = (
 376                 _process_lat(data['pdr_lat_10_1'], data['pdr_lat_10_2']),
 377                 _process_lat(data['pdr_lat_50_1'], data['pdr_lat_50_2']),
 378                 _process_lat(data['pdr_lat_90_1'], data['pdr_lat_90_2'])
 379             )
 380             if all(latency):
 381                 max_len = len(str(max((max(item) for item in latency))))
 382                 max_len = 4 if max_len < 4 else max_len
 383
 384                 for idx, lat in enumerate(latency):
 385                     if not idx:
 386                         out_msg += "\n"
 387                     out_msg += (
 388                         f"\n{idx + 3}. "
 389                         f"{lat[0]:{max_len}d} "
 390                         f"{lat[1]:{max_len}d} "
 391                         f"{lat[2]:{max_len}d}      "
 392                         f"{lat[3]:{max_len}d} "
 393                         f"{lat[4]:{max_len}d} "
 394                         f"{lat[5]:{max_len}d} "
 395                     )
 396
 397             return out_msg
 398
 399         except (AttributeError, IndexError, ValueError, KeyError):
 400             return "Test Failed."
 401
 402     def _get_testbed(self, msg):
 403         """Called when extraction of testbed IP is required.
 404         The testbed is identified by TG node IP address.
 405
 406         :param msg: Message to process.
 407         :type msg: Message
 408         :returns: Nothing.
 409         """
 410
 411         if msg.message.count("Setup of TG node") or \
 412                 msg.message.count("Setup of node TG host"):
 413             reg_tg_ip = re.compile(
 414                 r'.*TG .* (\d{1,3}.\d{1,3}.\d{1,3}.\d{1,3}).*')
 415             try:
 416                 self._testbed = str(re.search(reg_tg_ip, msg.message).group(1))
 417             except (KeyError, ValueError, IndexError, AttributeError):
 418                 pass
 419             finally:
 420                 self._data["metadata"]["testbed"] = self._testbed
 421
 422     def _get_vpp_version(self, msg):
 423         """Called when extraction of VPP version is required.
 424
 425         :param msg: Message to process.
 426         :type msg: Message
 427         :returns: Nothing.
 428         """
 429
 430         if msg.message.count("VPP version:") or \
 431                 msg.message.count("VPP Version:"):
 432             self._version = str(
 433                 re.search(self.REGEX_VERSION_VPP, msg.message).group(2)
 434             )
 435             self._data["metadata"]["version"] = self._version
 436
 437     def _get_dpdk_version(self, msg):
 438         """Called when extraction of DPDK version is required.
 439
 440         :param msg: Message to process.
 441         :type msg: Message
 442         :returns: Nothing.
 443         """
 444
 445         if msg.message.count("DPDK Version:"):
 446             try:
 447                 self._version = str(re.search(
 448                     self.REGEX_VERSION_DPDK, msg.message).group(2))
 449                 self._data["metadata"]["version"] = self._version
 450             except IndexError:
 451                 pass
 452
 453     def _get_papi_history(self, msg):
 454         """Called when extraction of PAPI command history is required.
 455
 456         :param msg: Message to process.
 457         :type msg: Message
 458         :returns: Nothing.
 459         """
 460         if msg.message.count("PAPI command history:"):
 461             self._conf_history_lookup_nr += 1
 462             if self._conf_history_lookup_nr == 1:
 463                 self._data["tests"][self._test_id]["conf-history"] = str()
 464             text = re.sub(
 465                 r"\d{1,3}.\d{1,3}.\d{1,3}.\d{1,3} PAPI command history:",
 466                 "",
 467                 msg.message,
 468                 count=1
 469             ).replace('"', "'")
 470             self._data["tests"][self._test_id]["conf-history"] += \
 471                 f"**DUT{str(self._conf_history_lookup_nr)}:** {text}"
 472
 473     def _get_show_run(self, msg):
 474         """Called when extraction of VPP operational data (output of CLI command
 475         Show Runtime) is required.
 476
 477         :param msg: Message to process.
 478         :type msg: Message
 479         :returns: Nothing.
 480         """
 481
 482         if not msg.message.count("stats runtime"):
 483             return
 484
 485         # Temporary solution
 486         if self._sh_run_counter > 1:
 487             return
 488
 489         if "show-run" not in self._data["tests"][self._test_id].keys():
 490             self._data["tests"][self._test_id]["show-run"] = dict()
 491
 492         groups = re.search(self.REGEX_TC_PAPI_CLI, msg.message)
 493         if not groups:
 494             return
 495         try:
 496             host = groups.group(1)
 497         except (AttributeError, IndexError):
 498             host = ""
 499         try:
 500             sock = groups.group(2)
 501         except (AttributeError, IndexError):
 502             sock = ""
 503
 504         dut = "dut{nr}".format(
 505             nr=len(self._data['tests'][self._test_id]['show-run'].keys()) + 1)
 506
 507         self._data['tests'][self._test_id]['show-run'][dut] = \
 508             copy.copy(
 509                 {
 510                     "host": host,
 511                     "socket": sock,
 512                     "runtime": str(msg.message).replace(' ', '').
 513                                 replace('\n', '').replace("'", '"').
 514                                 replace('b"', '"').replace('"', '"').
 515                                 split(":", 1)[1]
 516                 }
 517             )
 518
 519     def _get_telemetry(self, msg):
 520         """Called when extraction of VPP telemetry data is required.
 521
 522         :param msg: Message to process.
 523         :type msg: Message
 524         :returns: Nothing.
 525         """
 526
 527         if self._telemetry_kw_counter > 1:
 528             return
 529         if not msg.message.count("# TYPE vpp_runtime_calls"):
 530             return
 531
 532         if "telemetry-show-run" not in \
 533                 self._data["tests"][self._test_id].keys():
 534             self._data["tests"][self._test_id]["telemetry-show-run"] = dict()
 535
 536         self._telemetry_msg_counter += 1
 537         groups = re.search(self.REGEX_SH_RUN_HOST, msg.message)
 538         if not groups:
 539             return
 540         try:
 541             host = groups.group(1)
 542         except (AttributeError, IndexError):
 543             host = ""
 544         try:
 545             sock = groups.group(2)
 546         except (AttributeError, IndexError):
 547             sock = ""
 548         runtime = {
 549             "source_type": "node",
 550             "source_id": host,
 551             "msg_type": "metric",
 552             "log_level": "INFO",
 553             "timestamp": msg.timestamp,
 554             "msg": "show_runtime",
 555             "host": host,
 556             "socket": sock,
 557             "data": list()
 558         }
 559         for line in msg.message.splitlines():
 560             if not line.startswith("vpp_runtime_"):
 561                 continue
 562             try:
 563                 params, value, timestamp = line.rsplit(" ", maxsplit=2)
 564                 cut = params.index("{")
 565                 name = params[:cut].split("_", maxsplit=2)[-1]
 566                 labels = eval(
 567                     "dict" + params[cut:].replace('{', '(').replace('}', ')')
 568                 )
 569                 labels["graph_node"] = labels.pop("name")
 570                 runtime["data"].append(
 571                     {
 572                         "name": name,
 573                         "value": value,
 574                         "timestamp": timestamp,
 575                         "labels": labels
 576                     }
 577                 )
 578             except (TypeError, ValueError, IndexError):
 579                 continue
 580         self._data['tests'][self._test_id]['telemetry-show-run']\
 581             [f"dut{self._telemetry_msg_counter}"] = copy.copy(
 582                 {
 583                     "host": host,
 584                     "socket": sock,
 585                     "runtime": runtime
 586                 }
 587             )
 588
 589     def _get_ndrpdr_throughput(self, msg):
 590         """Get NDR_LOWER, NDR_UPPER, PDR_LOWER and PDR_UPPER from the test
 591         message.
 592
 593         :param msg: The test message to be parsed.
 594         :type msg: str
 595         :returns: Parsed data as a dict and the status (PASS/FAIL).
 596         :rtype: tuple(dict, str)
 597         """
 598
 599         throughput = {
 600             "NDR": {"LOWER": -1.0, "UPPER": -1.0},
 601             "PDR": {"LOWER": -1.0, "UPPER": -1.0}
 602         }
 603         status = "FAIL"
 604         groups = re.search(self.REGEX_NDRPDR_RATE, msg)
 605
 606         if groups is not None:
 607             try:
 608                 throughput["NDR"]["LOWER"] = float(groups.group(1))
 609                 throughput["NDR"]["UPPER"] = float(groups.group(2))
 610                 throughput["PDR"]["LOWER"] = float(groups.group(3))
 611                 throughput["PDR"]["UPPER"] = float(groups.group(4))
 612                 status = "PASS"
 613             except (IndexError, ValueError):
 614                 pass
 615
 616         return throughput, status
 617
 618     def _get_ndrpdr_throughput_gbps(self, msg):
 619         """Get NDR_LOWER, NDR_UPPER, PDR_LOWER and PDR_UPPER in Gbps from the
 620         test message.
 621
 622         :param msg: The test message to be parsed.
 623         :type msg: str
 624         :returns: Parsed data as a dict and the status (PASS/FAIL).
 625         :rtype: tuple(dict, str)
 626         """
 627
 628         gbps = {
 629             "NDR": {"LOWER": -1.0, "UPPER": -1.0},
 630             "PDR": {"LOWER": -1.0, "UPPER": -1.0}
 631         }
 632         status = "FAIL"
 633         groups = re.search(self.REGEX_NDRPDR_GBPS, msg)
 634
 635         if groups is not None:
 636             try:
 637                 gbps["NDR"]["LOWER"] = float(groups.group(1))
 638                 gbps["NDR"]["UPPER"] = float(groups.group(2))
 639                 gbps["PDR"]["LOWER"] = float(groups.group(3))
 640                 gbps["PDR"]["UPPER"] = float(groups.group(4))
 641                 status = "PASS"
 642             except (IndexError, ValueError):
 643                 pass
 644
 645         return gbps, status
 646
 647     def _get_plr_throughput(self, msg):
 648         """Get PLRsearch lower bound and PLRsearch upper bound from the test
 649         message.
 650
 651         :param msg: The test message to be parsed.
 652         :type msg: str
 653         :returns: Parsed data as a dict and the status (PASS/FAIL).
 654         :rtype: tuple(dict, str)
 655         """
 656
 657         throughput = {
 658             "LOWER": -1.0,
 659             "UPPER": -1.0
 660         }
 661         status = "FAIL"
 662         groups = re.search(self.REGEX_PLR_RATE, msg)
 663
 664         if groups is not None:
 665             try:
 666                 throughput["LOWER"] = float(groups.group(1))
 667                 throughput["UPPER"] = float(groups.group(2))
 668                 status = "PASS"
 669             except (IndexError, ValueError):
 670                 pass
 671
 672         return throughput, status
 673
 674     def _get_ndrpdr_latency(self, msg):
 675         """Get LATENCY from the test message.
 676
 677         :param msg: The test message to be parsed.
 678         :type msg: str
 679         :returns: Parsed data as a dict and the status (PASS/FAIL).
 680         :rtype: tuple(dict, str)
 681         """
 682         latency_default = {
 683             "min": -1.0,
 684             "avg": -1.0,
 685             "max": -1.0,
 686             "hdrh": ""
 687         }
 688         latency = {
 689             "NDR": {
 690                 "direction1": copy.copy(latency_default),
 691                 "direction2": copy.copy(latency_default)
 692             },
 693             "PDR": {
 694                 "direction1": copy.copy(latency_default),
 695                 "direction2": copy.copy(latency_default)
 696             },
 697             "LAT0": {
 698                 "direction1": copy.copy(latency_default),
 699                 "direction2": copy.copy(latency_default)
 700             },
 701             "PDR10": {
 702                 "direction1": copy.copy(latency_default),
 703                 "direction2": copy.copy(latency_default)
 704             },
 705             "PDR50": {
 706                 "direction1": copy.copy(latency_default),
 707                 "direction2": copy.copy(latency_default)
 708             },
 709             "PDR90": {
 710                 "direction1": copy.copy(latency_default),
 711                 "direction2": copy.copy(latency_default)
 712             },
 713         }
 714
 715         groups = re.search(self.REGEX_NDRPDR_LAT, msg)
 716         if groups is None:
 717             groups = re.search(self.REGEX_NDRPDR_LAT_BASE, msg)
 718         if groups is None:
 719             return latency, "FAIL"
 720
 721         def process_latency(in_str):
 722             """Return object with parsed latency values.
 723
 724             TODO: Define class for the return type.
 725
 726             :param in_str: Input string, min/avg/max/hdrh format.
 727             :type in_str: str
 728             :returns: Dict with corresponding keys, except hdrh float values.
 729             :rtype dict:
 730             :throws IndexError: If in_str does not have enough substrings.
 731             :throws ValueError: If a substring does not convert to float.
 732             """
 733             in_list = in_str.split('/', 3)
 734
 735             rval = {
 736                 "min": float(in_list[0]),
 737                 "avg": float(in_list[1]),
 738                 "max": float(in_list[2]),
 739                 "hdrh": ""
 740             }
 741
 742             if len(in_list) == 4:
 743                 rval["hdrh"] = str(in_list[3])
 744
 745             return rval
 746
 747         try:
 748             latency["NDR"]["direction1"] = process_latency(groups.group(1))
 749             latency["NDR"]["direction2"] = process_latency(groups.group(2))
 750             latency["PDR"]["direction1"] = process_latency(groups.group(3))
 751             latency["PDR"]["direction2"] = process_latency(groups.group(4))
 752             if groups.lastindex == 4:
 753                 return latency, "PASS"
 754         except (IndexError, ValueError):
 755             pass
 756
 757         try:
 758             latency["PDR90"]["direction1"] = process_latency(groups.group(5))
 759             latency["PDR90"]["direction2"] = process_latency(groups.group(6))
 760             latency["PDR50"]["direction1"] = process_latency(groups.group(7))
 761             latency["PDR50"]["direction2"] = process_latency(groups.group(8))
 762             latency["PDR10"]["direction1"] = process_latency(groups.group(9))
 763             latency["PDR10"]["direction2"] = process_latency(groups.group(10))
 764             latency["LAT0"]["direction1"] = process_latency(groups.group(11))
 765             latency["LAT0"]["direction2"] = process_latency(groups.group(12))
 766             if groups.lastindex == 12:
 767                 return latency, "PASS"
 768         except (IndexError, ValueError):
 769             pass
 770
 771         return latency, "FAIL"
 772
 773     @staticmethod
 774     def _get_hoststack_data(msg, tags):
 775         """Get data from the hoststack test message.
 776
 777         :param msg: The test message to be parsed.
 778         :param tags: Test tags.
 779         :type msg: str
 780         :type tags: list
 781         :returns: Parsed data as a JSON dict and the status (PASS/FAIL).
 782         :rtype: tuple(dict, str)
 783         """
 784         result = dict()
 785         status = "FAIL"
 786
 787         msg = msg.replace("'", '"').replace(" ", "")
 788         if "LDPRELOAD" in tags:
 789             try:
 790                 result = loads(msg)
 791                 status = "PASS"
 792             except JSONDecodeError:
 793                 pass
 794         elif "VPPECHO" in tags:
 795             try:
 796                 msg_lst = msg.replace("}{", "} {").split(" ")
 797                 result = dict(
 798                     client=loads(msg_lst[0]),
 799                     server=loads(msg_lst[1])
 800                 )
 801                 status = "PASS"
 802             except (JSONDecodeError, IndexError):
 803                 pass
 804
 805         return result, status
 806
 807     def _get_vsap_data(self, msg, tags):
 808         """Get data from the vsap test message.
 809
 810         :param msg: The test message to be parsed.
 811         :param tags: Test tags.
 812         :type msg: str
 813         :type tags: list
 814         :returns: Parsed data as a JSON dict and the status (PASS/FAIL).
 815         :rtype: tuple(dict, str)
 816         """
 817         result = dict()
 818         status = "FAIL"
 819
 820         groups = re.search(self.REGEX_VSAP_MSG_INFO, msg)
 821         if groups is not None:
 822             try:
 823                 result["transfer-rate"] = float(groups.group(1)) * 1e3
 824                 result["latency"] = float(groups.group(2))
 825                 result["completed-requests"] = int(groups.group(5))
 826                 result["failed-requests"] = int(groups.group(6))
 827                 result["bytes-transferred"] = int(groups.group(4))
 828                 if "TCP_CPS" in tags:
 829                     result["cps"] = float(groups.group(3))
 830                 elif "TCP_RPS" in tags:
 831                     result["rps"] = float(groups.group(3))
 832                 else:
 833                     return result, status
 834                 status = "PASS"
 835             except (IndexError, ValueError) as err:
 836                 logging.warning(err)
 837         return result, status
 838
 839     def visit_suite(self, suite):
 840         """Implements traversing through the suite and its direct children.
 841
 842         :param suite: Suite to process.
 843         :type suite: Suite
 844         :returns: Nothing.
 845         """
 846         if self.start_suite(suite) is not False:
 847             suite.setup.visit(self)
 848             suite.suites.visit(self)
 849             suite.tests.visit(self)
 850             suite.teardown.visit(self)
 851             self.end_suite(suite)
 852
 853     def start_suite(self, suite):
 854         """Called when suite starts.
 855
 856         :param suite: Suite to process.
 857         :type suite: Suite
 858         :returns: Nothing.
 859         """
 860         try:
 861             parent_name = suite.parent.name
 862         except AttributeError:
 863             return
 864
 865         self._data["suites"][suite.longname.lower().replace('"', "'").\
 866             replace(" ", "_")] = {
 867                 "name": suite.name.lower(),
 868                 "doc": suite.doc,
 869                 "parent": parent_name,
 870                 "level": len(suite.longname.split("."))
 871             }
 872
 873     def visit_test(self, test):
 874         """Implements traversing through the test.
 875
 876         :param test: Test to process.
 877         :type test: Test
 878         :returns: Nothing.
 879         """
 880         if self.start_test(test) is not False:
 881             test.setup.visit(self)
 882             test.body.visit(self)
 883             test.teardown.visit(self)
 884             self.end_test(test)
 885
 886     def start_test(self, test):
 887         """Called when test starts.
 888
 889         :param test: Test to process.
 890         :type test: Test
 891         :returns: Nothing.
 892         """
 893
 894         self._sh_run_counter = 0
 895         self._telemetry_kw_counter = 0
 896         self._telemetry_msg_counter = 0
 897
 898         longname_orig = test.longname.lower()
 899
 900         # Check the ignore list
 901         if longname_orig in self._ignore:
 902             return
 903
 904         tags = [str(tag) for tag in test.tags]
 905         test_result = dict()
 906
 907         # Change the TC long name and name if defined in the mapping table
 908         longname = self._mapping.get(longname_orig, None)
 909         if longname is not None:
 910             name = longname.split('.')[-1]
 911         else:
 912             longname = longname_orig
 913             name = test.name.lower()
 914
 915         # Remove TC number from the TC long name (backward compatibility):
 916         self._test_id = re.sub(self.REGEX_TC_NUMBER, "", longname)
 917         # Remove TC number from the TC name (not needed):
 918         test_result["name"] = re.sub(self.REGEX_TC_NUMBER, "", name)
 919
 920         test_result["parent"] = test.parent.name.lower()
 921         test_result["tags"] = tags
 922         test_result["doc"] = test.doc
 923         test_result["type"] = ""
 924         test_result["status"] = test.status
 925         test_result["starttime"] = test.starttime
 926         test_result["endtime"] = test.endtime
 927
 928         if test.status == "PASS":
 929             if "NDRPDR" in tags:
 930                 if "TCP_PPS" in tags or "UDP_PPS" in tags:
 931                     test_result["msg"] = self._get_data_from_pps_test_msg(
 932                         test.message)
 933                 elif "TCP_CPS" in tags or "UDP_CPS" in tags:
 934                     test_result["msg"] = self._get_data_from_cps_test_msg(
 935                         test.message)
 936                 else:
 937                     test_result["msg"] = self._get_data_from_perf_test_msg(
 938                         test.message)
 939             elif "MRR" in tags or "FRMOBL" in tags or "BMRR" in tags:
 940                 test_result["msg"] = self._get_data_from_mrr_test_msg(
 941                     test.message)
 942             else:
 943                 test_result["msg"] = test.message
 944         else:
 945             test_result["msg"] = test.message
 946
 947         if "PERFTEST" in tags and "TREX" not in tags:
 948             # Replace info about cores (e.g. -1c-) with the info about threads
 949             # and cores (e.g. -1t1c-) in the long test case names and in the
 950             # test case names if necessary.
 951             tag_count = 0
 952             tag_tc = str()
 953             for tag in test_result["tags"]:
 954                 groups = re.search(self.REGEX_TC_TAG, tag)
 955                 if groups:
 956                     tag_count += 1
 957                     tag_tc = tag
 958
 959             if tag_count == 1:
 960                 self._test_id = re.sub(
 961                     self.REGEX_TC_NAME_NEW, f"-{tag_tc.lower()}-",
 962                     self._test_id, count=1
 963                 )
 964                 test_result["name"] = re.sub(
 965                     self.REGEX_TC_NAME_NEW, f"-{tag_tc.lower()}-",
 966                     test_result["name"], count=1
 967                 )
 968             else:
 969                 test_result["status"] = "FAIL"
 970                 self._data["tests"][self._test_id] = test_result
 971                 logging.debug(
 972                     f"The test {self._test_id} has no or more than one "
 973                     f"multi-threading tags.\n"
 974                     f"Tags: {test_result['tags']}"
 975                 )
 976                 return
 977
 978         if "DEVICETEST" in tags:
 979             test_result["type"] = "DEVICETEST"
 980         elif "NDRPDR" in tags:
 981             if "TCP_CPS" in tags or "UDP_CPS" in tags:
 982                 test_result["type"] = "CPS"
 983             else:
 984                 test_result["type"] = "NDRPDR"
 985             if test.status == "PASS":
 986                 test_result["throughput"], test_result["status"] = \
 987                     self._get_ndrpdr_throughput(test.message)
 988                 test_result["gbps"], test_result["status"] = \
 989                     self._get_ndrpdr_throughput_gbps(test.message)
 990                 test_result["latency"], test_result["status"] = \
 991                     self._get_ndrpdr_latency(test.message)
 992         elif "MRR" in tags or "FRMOBL" in tags or "BMRR" in tags:
 993             if "MRR" in tags:
 994                 test_result["type"] = "MRR"
 995             else:
 996                 test_result["type"] = "BMRR"
 997             if test.status == "PASS":
 998                 test_result["result"] = dict()
 999                 groups = re.search(self.REGEX_BMRR, test.message)
1000                 if groups is not None:
1001                     items_str = groups.group(1)
1002                     items_float = [
1003                         float(item.strip().replace("'", ""))
1004                         for item in items_str.split(",")
1005                     ]
1006                     # Use whole list in CSIT-1180.
1007                     stats = jumpavg.AvgStdevStats.for_runs(items_float)
1008                     test_result["result"]["samples"] = items_float
1009                     test_result["result"]["receive-rate"] = stats.avg
1010                     test_result["result"]["receive-stdev"] = stats.stdev
1011                 else:
1012                     groups = re.search(self.REGEX_MRR, test.message)
1013                     test_result["result"]["receive-rate"] = \
1014                         float(groups.group(3)) / float(groups.group(1))
1015         elif "SOAK" in tags:
1016             test_result["type"] = "SOAK"
1017             if test.status == "PASS":
1018                 test_result["throughput"], test_result["status"] = \
1019                     self._get_plr_throughput(test.message)
1020         elif "LDP_NGINX" in tags:
1021             test_result["type"] = "LDP_NGINX"
1022             test_result["result"], test_result["status"] = \
1023                 self._get_vsap_data(test.message, tags)
1024         elif "HOSTSTACK" in tags:
1025             test_result["type"] = "HOSTSTACK"
1026             if test.status == "PASS":
1027                 test_result["result"], test_result["status"] = \
1028                     self._get_hoststack_data(test.message, tags)
1029         elif "RECONF" in tags:
1030             test_result["type"] = "RECONF"
1031             if test.status == "PASS":
1032                 test_result["result"] = None
1033                 try:
1034                     grps_loss = re.search(self.REGEX_RECONF_LOSS, test.message)
1035                     grps_time = re.search(self.REGEX_RECONF_TIME, test.message)
1036                     test_result["result"] = {
1037                         "loss": int(grps_loss.group(1)),
1038                         "time": float(grps_time.group(1))
1039                     }
1040                 except (AttributeError, IndexError, ValueError, TypeError):
1041                     test_result["status"] = "FAIL"
1042         else:
1043             test_result["status"] = "FAIL"
1044
1045         self._data["tests"][self._test_id] = test_result
1046
1047     def visit_keyword(self, kw):
1048         """Implements traversing through the keyword and its child keywords.
1049
1050         :param keyword: Keyword to process.
1051         :type keyword: Keyword
1052         :returns: Nothing.
1053         """
1054         if self.start_keyword(kw) is not False:
1055             if hasattr(kw, "body"):
1056                 kw.body.visit(self)
1057             kw.teardown.visit(self)
1058             self.end_keyword(kw)
1059
1060     def start_keyword(self, keyword):
1061         """Called when keyword starts. Default implementation does nothing.
1062
1063         :param keyword: Keyword to process.
1064         :type keyword: Keyword
1065         :returns: Nothing.
1066         """
1067         self._kw_name = keyword.name
1068
1069     def end_keyword(self, keyword):
1070         """Called when keyword ends. Default implementation does nothing.
1071
1072         :param keyword: Keyword to process.
1073         :type keyword: Keyword
1074         :returns: Nothing.
1075         """
1076         _ = keyword
1077         self._kw_name = None
1078
1079     def visit_message(self, msg):
1080         """Implements visiting the message.
1081
1082         :param msg: Message to process.
1083         :type msg: Message
1084         :returns: Nothing.
1085         """
1086         if self.start_message(msg) is not False:
1087             self.end_message(msg)
1088
1089     def start_message(self, msg):
1090         """Called when message starts. Get required information from messages:
1091         - VPP version.
1092
1093         :param msg: Message to process.
1094         :type msg: Message
1095         :returns: Nothing.
1096         """
1097         if self._kw_name is None:
1098             return
1099         elif self._kw_name.count("Run Telemetry On All Duts"):
1100             if self._process_oper:
1101                 self._telemetry_kw_counter += 1
1102                 self._get_telemetry(msg)
1103         elif self._kw_name.count("Show Runtime On All Duts"):
1104             if self._process_oper:
1105                 self._sh_run_counter += 1
1106                 self._get_show_run(msg)
1107         elif self._kw_name.count("Show Vpp Version On All Duts"):
1108             if not self._version:
1109                 self._get_vpp_version(msg)
1110         elif self._kw_name.count("Install Dpdk Framework On All Duts"):
1111             if not self._version:
1112                 self._get_dpdk_version(msg)
1113         elif self._kw_name.count("Setup Framework"):
1114             if not self._testbed:
1115                 self._get_testbed(msg)
1116         elif self._kw_name.count("Show Papi History On All Duts"):
1117             self._conf_history_lookup_nr = 0
1118             self._get_papi_history(msg)
1119
1120
1121 class InputData:
1122     """Input data
1123
1124     The data is extracted from output.xml files generated by Jenkins jobs and
1125     stored in pandas' DataFrames.
1126
1127     The data structure:
1128     - job name
1129       - build number
1130         - metadata
1131           (as described in ExecutionChecker documentation)
1132         - suites
1133           (as described in ExecutionChecker documentation)
1134         - tests
1135           (as described in ExecutionChecker documentation)
1136     """
1137
1138     def __init__(self, spec, for_output):
1139         """Initialization.
1140
1141         :param spec: Specification.
1142         :param for_output: Output to be generated from downloaded data.
1143         :type spec: Specification
1144         :type for_output: str
1145         """
1146
1147         # Specification:
1148         self._cfg = spec
1149
1150         self._for_output = for_output
1151
1152         # Data store:
1153         self._input_data = pd.Series(dtype="float64")
1154
1155     @property
1156     def data(self):
1157         """Getter - Input data.
1158
1159         :returns: Input data
1160         :rtype: pandas.Series
1161         """
1162         return self._input_data
1163
1164     def metadata(self, job, build):
1165         """Getter - metadata
1166
1167         :param job: Job which metadata we want.
1168         :param build: Build which metadata we want.
1169         :type job: str
1170         :type build: str
1171         :returns: Metadata
1172         :rtype: pandas.Series
1173         """
1174         return self.data[job][build]["metadata"]
1175
1176     def suites(self, job, build):
1177         """Getter - suites
1178
1179         :param job: Job which suites we want.
1180         :param build: Build which suites we want.
1181         :type job: str
1182         :type build: str
1183         :returns: Suites.
1184         :rtype: pandas.Series
1185         """
1186         return self.data[job][str(build)]["suites"]
1187
1188     def tests(self, job, build):
1189         """Getter - tests
1190
1191         :param job: Job which tests we want.
1192         :param build: Build which tests we want.
1193         :type job: str
1194         :type build: str
1195         :returns: Tests.
1196         :rtype: pandas.Series
1197         """
1198         return self.data[job][build]["tests"]
1199
1200     def _parse_tests(self, job, build):
1201         """Process data from robot output.xml file and return JSON structured
1202         data.
1203
1204         :param job: The name of job which build output data will be processed.
1205         :param build: The build which output data will be processed.
1206         :type job: str
1207         :type build: dict
1208         :returns: JSON data structure.
1209         :rtype: dict
1210         """
1211
1212         metadata = {
1213             "job": job,
1214             "build": build
1215         }
1216
1217         with open(build["file-name"], 'r') as data_file:
1218             try:
1219                 result = ExecutionResult(data_file)
1220             except errors.DataError as err:
1221                 logging.error(
1222                     f"Error occurred while parsing output.xml: {repr(err)}"
1223                 )
1224                 return None
1225
1226         process_oper = False
1227         if "-vpp-perf-report-coverage-" in job:
1228             process_oper = True
1229         # elif "-vpp-perf-report-iterative-" in job:
1230         #     # Exceptions for TBs where we do not have coverage data:
1231         #     for item in ("-2n-icx", ):
1232         #         if item in job:
1233         #             process_oper = True
1234         #             break
1235         checker = ExecutionChecker(
1236             metadata, self._cfg.mapping, self._cfg.ignore, process_oper
1237         )
1238         result.visit(checker)
1239
1240         checker.data["metadata"]["tests_total"] = \
1241             result.statistics.total.total
1242         checker.data["metadata"]["tests_passed"] = \
1243             result.statistics.total.passed
1244         checker.data["metadata"]["tests_failed"] = \
1245             result.statistics.total.failed
1246         checker.data["metadata"]["elapsedtime"] = result.suite.elapsedtime
1247         checker.data["metadata"]["generated"] = result.suite.endtime[:14]
1248
1249         return checker.data
1250
1251     def _download_and_parse_build(self, job, build, repeat, pid=10000):
1252         """Download and parse the input data file.
1253
1254         :param pid: PID of the process executing this method.
1255         :param job: Name of the Jenkins job which generated the processed input
1256             file.
1257         :param build: Information about the Jenkins build which generated the
1258             processed input file.
1259         :param repeat: Repeat the download specified number of times if not
1260             successful.
1261         :type pid: int
1262         :type job: str
1263         :type build: dict
1264         :type repeat: int
1265         """
1266
1267         logging.info(f"Processing the job/build: {job}: {build['build']}")
1268
1269         state = "failed"
1270         success = False
1271         data = None
1272         do_repeat = repeat
1273         while do_repeat:
1274             success = download_and_unzip_data_file(self._cfg, job, build, pid)
1275             if success:
1276                 break
1277             do_repeat -= 1
1278         if not success:
1279             logging.error(
1280                 f"It is not possible to download the input data file from the "
1281                 f"job {job}, build {build['build']}, or it is damaged. "
1282                 f"Skipped."
1283             )
1284         if success:
1285             logging.info(f"  Processing data from build {build['build']}")
1286             data = self._parse_tests(job, build)
1287             if data is None:
1288                 logging.error(
1289                     f"Input data file from the job {job}, build "
1290                     f"{build['build']} is damaged. Skipped."
1291                 )
1292             else:
1293                 state = "processed"
1294
1295             try:
1296                 remove(build["file-name"])
1297             except OSError as err:
1298                 logging.error(
1299                     f"Cannot remove the file {build['file-name']}: {repr(err)}"
1300                 )
1301
1302         # If the time-period is defined in the specification file, remove all
1303         # files which are outside the time period.
1304         is_last = False
1305         timeperiod = self._cfg.environment.get("time-period", None)
1306         if timeperiod and data:
1307             now = dt.utcnow()
1308             timeperiod = timedelta(int(timeperiod))
1309             metadata = data.get("metadata", None)
1310             if metadata:
1311                 generated = metadata.get("generated", None)
1312                 if generated:
1313                     generated = dt.strptime(generated, "%Y%m%d %H:%M")
1314                     if (now - generated) > timeperiod:
1315                         # Remove the data and the file:
1316                         state = "removed"
1317                         data = None
1318                         is_last = True
1319                         logging.info(
1320                             f"  The build {job}/{build['build']} is "
1321                             f"outdated, will be removed."
1322                         )
1323         return {
1324             "data": data,
1325             "state": state,
1326             "job": job,
1327             "build": build,
1328             "last": is_last
1329         }
1330
1331     def download_and_parse_data(self, repeat=1):
1332         """Download the input data files, parse input data from input files and
1333         store in pandas' Series.
1334
1335         :param repeat: Repeat the download specified number of times if not
1336             successful.
1337         :type repeat: int
1338         """
1339
1340         logging.info("Downloading and parsing input files ...")
1341
1342         for job, builds in self._cfg.input.items():
1343             for build in builds:
1344
1345                 result = self._download_and_parse_build(job, build, repeat)
1346                 if result["last"]:
1347                     break
1348                 build_nr = result["build"]["build"]
1349
1350                 if result["data"]:
1351                     data = result["data"]
1352                     build_data = pd.Series({
1353                         "metadata": pd.Series(
1354                             list(data["metadata"].values()),
1355                             index=list(data["metadata"].keys())
1356                         ),
1357                         "suites": pd.Series(
1358                             list(data["suites"].values()),
1359                             index=list(data["suites"].keys())
1360                         ),
1361                         "tests": pd.Series(
1362                             list(data["tests"].values()),
1363                             index=list(data["tests"].keys())
1364                         )
1365                     })
1366
1367                     if self._input_data.get(job, None) is None:
1368                         self._input_data[job] = pd.Series(dtype="float64")
1369                     self._input_data[job][str(build_nr)] = build_data
1370                     self._cfg.set_input_file_name(
1371                         job, build_nr, result["build"]["file-name"]
1372                     )
1373                 self._cfg.set_input_state(job, build_nr, result["state"])
1374
1375                 mem_alloc = \
1376                     resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1000
1377                 logging.info(f"Memory allocation: {mem_alloc:.0f}MB")
1378
1379         logging.info("Done.")
1380
1381         msg = f"Successful downloads from the sources:\n"
1382         for source in self._cfg.environment["data-sources"]:
1383             if source["successful-downloads"]:
1384                 msg += (
1385                     f"{source['url']}/{source['path']}/"
1386                     f"{source['file-name']}: "
1387                     f"{source['successful-downloads']}\n"
1388                 )
1389         logging.info(msg)
1390
1391     def process_local_file(self, local_file, job="local", build_nr=1,
1392                            replace=True):
1393         """Process local XML file given as a command-line parameter.
1394
1395         :param local_file: The file to process.
1396         :param job: Job name.
1397         :param build_nr: Build number.
1398         :param replace: If True, the information about jobs and builds is
1399             replaced by the new one, otherwise the new jobs and builds are
1400             added.
1401         :type local_file: str
1402         :type job: str
1403         :type build_nr: int
1404         :type replace: bool
1405         :raises: PresentationError if an error occurs.
1406         """
1407         if not isfile(local_file):
1408             raise PresentationError(f"The file {local_file} does not exist.")
1409
1410         try:
1411             build_nr = int(local_file.split("/")[-1].split(".")[0])
1412         except (IndexError, ValueError):
1413             pass
1414
1415         build = {
1416             "build": build_nr,
1417             "status": "failed",
1418             "file-name": local_file
1419         }
1420         if replace:
1421             self._cfg.input = dict()
1422         self._cfg.add_build(job, build)
1423
1424         logging.info(f"Processing {job}: {build_nr:2d}: {local_file}")
1425         data = self._parse_tests(job, build)
1426         if data is None:
1427             raise PresentationError(
1428                 f"Error occurred while parsing the file {local_file}"
1429             )
1430
1431         build_data = pd.Series({
1432             "metadata": pd.Series(
1433                 list(data["metadata"].values()),
1434                 index=list(data["metadata"].keys())
1435             ),
1436             "suites": pd.Series(
1437                 list(data["suites"].values()),
1438                 index=list(data["suites"].keys())
1439             ),
1440             "tests": pd.Series(
1441                 list(data["tests"].values()),
1442                 index=list(data["tests"].keys())
1443             )
1444         })
1445
1446         if self._input_data.get(job, None) is None:
1447             self._input_data[job] = pd.Series(dtype="float64")
1448         self._input_data[job][str(build_nr)] = build_data
1449
1450         self._cfg.set_input_state(job, build_nr, "processed")
1451
1452     def process_local_directory(self, local_dir, replace=True):
1453         """Process local directory with XML file(s). The directory is processed
1454         as a 'job' and the XML files in it as builds.
1455         If the given directory contains only sub-directories, these
1456         sub-directories processed as jobs and corresponding XML files as builds
1457         of their job.
1458
1459         :param local_dir: Local directory to process.
1460         :param replace: If True, the information about jobs and builds is
1461             replaced by the new one, otherwise the new jobs and builds are
1462             added.
1463         :type local_dir: str
1464         :type replace: bool
1465         """
1466         if not isdir(local_dir):
1467             raise PresentationError(
1468                 f"The directory {local_dir} does not exist."
1469             )
1470
1471         # Check if the given directory includes only files, or only directories
1472         _, dirnames, filenames = next(walk(local_dir))
1473
1474         if filenames and not dirnames:
1475             filenames.sort()
1476             # local_builds:
1477             # key: dir (job) name, value: list of file names (builds)
1478             local_builds = {
1479                 local_dir: [join(local_dir, name) for name in filenames]
1480             }
1481
1482         elif dirnames and not filenames:
1483             dirnames.sort()
1484             # local_builds:
1485             # key: dir (job) name, value: list of file names (builds)
1486             local_builds = dict()
1487             for dirname in dirnames:
1488                 builds = [
1489                     join(local_dir, dirname, name)
1490                     for name in listdir(join(local_dir, dirname))
1491                     if isfile(join(local_dir, dirname, name))
1492                 ]
1493                 if builds:
1494                     local_builds[dirname] = sorted(builds)
1495
1496         elif not filenames and not dirnames:
1497             raise PresentationError(f"The directory {local_dir} is empty.")
1498         else:
1499             raise PresentationError(
1500                 f"The directory {local_dir} can include only files or only "
1501                 f"directories, not both.\nThe directory {local_dir} includes "
1502                 f"file(s):\n{filenames}\nand directories:\n{dirnames}"
1503             )
1504
1505         if replace:
1506             self._cfg.input = dict()
1507
1508         for job, files in local_builds.items():
1509             for idx, local_file in enumerate(files):
1510                 self.process_local_file(local_file, job, idx + 1, replace=False)
1511
1512     @staticmethod
1513     def _end_of_tag(tag_filter, start=0, closer="'"):
1514         """Return the index of character in the string which is the end of tag.
1515
1516         :param tag_filter: The string where the end of tag is being searched.
1517         :param start: The index where the searching is stated.
1518         :param closer: The character which is the tag closer.
1519         :type tag_filter: str
1520         :type start: int
1521         :type closer: str
1522         :returns: The index of the tag closer.
1523         :rtype: int
1524         """
1525         try:
1526             idx_opener = tag_filter.index(closer, start)
1527             return tag_filter.index(closer, idx_opener + 1)
1528         except ValueError:
1529             return None
1530
1531     @staticmethod
1532     def _condition(tag_filter):
1533         """Create a conditional statement from the given tag filter.
1534
1535         :param tag_filter: Filter based on tags from the element specification.
1536         :type tag_filter: str
1537         :returns: Conditional statement which can be evaluated.
1538         :rtype: str
1539         """
1540         index = 0
1541         while True:
1542             index = InputData._end_of_tag(tag_filter, index)
1543             if index is None:
1544                 return tag_filter
1545             index += 1
1546             tag_filter = tag_filter[:index] + " in tags" + tag_filter[index:]
1547
1548     def filter_data(self, element, params=None, data=None, data_set="tests",
1549                     continue_on_error=False):
1550         """Filter required data from the given jobs and builds.
1551
1552         The output data structure is:
1553         - job 1
1554           - build 1
1555             - test (or suite) 1 ID:
1556               - param 1
1557               - param 2
1558               ...
1559               - param n
1560             ...
1561             - test (or suite) n ID:
1562             ...
1563           ...
1564           - build n
1565         ...
1566         - job n
1567
1568         :param element: Element which will use the filtered data.
1569         :param params: Parameters which will be included in the output. If None,
1570             all parameters are included.
1571         :param data: If not None, this data is used instead of data specified
1572             in the element.
1573         :param data_set: The set of data to be filtered: tests, suites,
1574             metadata.
1575         :param continue_on_error: Continue if there is error while reading the
1576             data. The Item will be empty then
1577         :type element: pandas.Series
1578         :type params: list
1579         :type data: dict
1580         :type data_set: str
1581         :type continue_on_error: bool
1582         :returns: Filtered data.
1583         :rtype pandas.Series
1584         """
1585
1586         try:
1587             if data_set == "suites":
1588                 cond = "True"
1589             elif element["filter"] in ("all", "template"):
1590                 cond = "True"
1591             else:
1592                 cond = InputData._condition(element["filter"])
1593             logging.debug(f"   Filter: {cond}")
1594         except KeyError:
1595             logging.error("  No filter defined.")
1596             return None
1597
1598         if params is None:
1599             params = element.get("parameters", None)
1600             if params:
1601                 params.extend(("type", "status"))
1602
1603         data_to_filter = data if data else element["data"]
1604         data = pd.Series(dtype="float64")
1605         try:
1606             for job, builds in data_to_filter.items():
1607                 data[job] = pd.Series(dtype="float64")
1608                 for build in builds:
1609                     data[job][str(build)] = pd.Series(dtype="float64")
1610                     try:
1611                         data_dict = dict(
1612                             self.data[job][str(build)][data_set].items())
1613                     except KeyError:
1614                         if continue_on_error:
1615                             continue
1616                         return None
1617
1618                     for test_id, test_data in data_dict.items():
1619                         if eval(cond, {"tags": test_data.get("tags", "")}):
1620                             data[job][str(build)][test_id] = \
1621                                 pd.Series(dtype="float64")
1622                             if params is None:
1623                                 for param, val in test_data.items():
1624                                     data[job][str(build)][test_id][param] = val
1625                             else:
1626                                 for param in params:
1627                                     try:
1628                                         data[job][str(build)][test_id][param] =\
1629                                             test_data[param]
1630                                     except KeyError:
1631                                         data[job][str(build)][test_id][param] =\
1632                                             "No Data"
1633             return data
1634
1635         except (KeyError, IndexError, ValueError) as err:
1636             logging.error(
1637                 f"Missing mandatory parameter in the element specification: "
1638                 f"{repr(err)}"
1639             )
1640             return None
1641         except AttributeError as err:
1642             logging.error(repr(err))
1643             return None
1644         except SyntaxError as err:
1645             logging.error(
1646                 f"The filter {cond} is not correct. Check if all tags are "
1647                 f"enclosed by apostrophes.\n{repr(err)}"
1648             )
1649             return None
1650
1651     def filter_tests_by_name(self, element, params=None, data_set="tests",
1652                              continue_on_error=False):
1653         """Filter required data from the given jobs and builds.
1654
1655         The output data structure is:
1656         - job 1
1657           - build 1
1658             - test (or suite) 1 ID:
1659               - param 1
1660               - param 2
1661               ...
1662               - param n
1663             ...
1664             - test (or suite) n ID:
1665             ...
1666           ...
1667           - build n
1668         ...
1669         - job n
1670
1671         :param element: Element which will use the filtered data.
1672         :param params: Parameters which will be included in the output. If None,
1673         all parameters are included.
1674         :param data_set: The set of data to be filtered: tests, suites,
1675         metadata.
1676         :param continue_on_error: Continue if there is error while reading the
1677         data. The Item will be empty then
1678         :type element: pandas.Series
1679         :type params: list
1680         :type data_set: str
1681         :type continue_on_error: bool
1682         :returns: Filtered data.
1683         :rtype pandas.Series
1684         """
1685
1686         include = element.get("include", None)
1687         if not include:
1688             logging.warning("No tests to include, skipping the element.")
1689             return None
1690
1691         if params is None:
1692             params = element.get("parameters", None)
1693             if params and "type" not in params:
1694                 params.append("type")
1695
1696         cores = element.get("core", None)
1697         if cores:
1698             tests = list()
1699             for core in cores:
1700                 for test in include:
1701                     tests.append(test.format(core=core))
1702         else:
1703             tests = include
1704
1705         data = pd.Series(dtype="float64")
1706         try:
1707             for job, builds in element["data"].items():
1708                 data[job] = pd.Series(dtype="float64")
1709                 for build in builds:
1710                     data[job][str(build)] = pd.Series(dtype="float64")
1711                     for test in tests:
1712                         try:
1713                             reg_ex = re.compile(str(test).lower())
1714                             for test_id in self.data[job][
1715                                     str(build)][data_set].keys():
1716                                 if re.match(reg_ex, str(test_id).lower()):
1717                                     test_data = self.data[job][
1718                                         str(build)][data_set][test_id]
1719                                     data[job][str(build)][test_id] = \
1720                                         pd.Series(dtype="float64")
1721                                     if params is None:
1722                                         for param, val in test_data.items():
1723                                             data[job][str(build)][test_id]\
1724                                                 [param] = val
1725                                     else:
1726                                         for param in params:
1727                                             try:
1728                                                 data[job][str(build)][
1729                                                     test_id][param] = \
1730                                                     test_data[param]
1731                                             except KeyError:
1732                                                 data[job][str(build)][
1733                                                     test_id][param] = "No Data"
1734                         except KeyError as err:
1735                             if continue_on_error:
1736                                 logging.debug(repr(err))
1737                                 continue
1738                             logging.error(repr(err))
1739                             return None
1740             return data
1741
1742         except (KeyError, IndexError, ValueError) as err:
1743             logging.error(
1744                 f"Missing mandatory parameter in the element "
1745                 f"specification: {repr(err)}"
1746             )
1747             return None
1748         except AttributeError as err:
1749             logging.error(repr(err))
1750             return None
1751
1752     @staticmethod
1753     def merge_data(data):
1754         """Merge data from more jobs and builds to a simple data structure.
1755
1756         The output data structure is:
1757
1758         - test (suite) 1 ID:
1759           - param 1
1760           - param 2
1761           ...
1762           - param n
1763         ...
1764         - test (suite) n ID:
1765         ...
1766
1767         :param data: Data to merge.
1768         :type data: pandas.Series
1769         :returns: Merged data.
1770         :rtype: pandas.Series
1771         """
1772
1773         logging.info("    Merging data ...")
1774
1775         merged_data = pd.Series(dtype="float64")
1776         for builds in data.values:
1777             for item in builds.values:
1778                 for item_id, item_data in item.items():
1779                     merged_data[item_id] = item_data
1780         return merged_data
1781
1782     def print_all_oper_data(self):
1783         """Print all operational data to console.
1784         """
1785
1786         for job in self._input_data.values:
1787             for build in job.values:
1788                 for test_id, test_data in build["tests"].items():
1789                     print(f"{test_id}")
1790                     if test_data.get("show-run", None) is None:
1791                         continue
1792                     for dut_name, data in test_data["show-run"].items():
1793                         if data.get("runtime", None) is None:
1794                             continue
1795                         runtime = loads(data["runtime"])
1796                         try:
1797                             threads_nr = len(runtime[0]["clocks"])
1798                         except (IndexError, KeyError):
1799                             continue
1800                         threads = OrderedDict(
1801                             {idx: list() for idx in range(threads_nr)})
1802                         for item in runtime:
1803                             for idx in range(threads_nr):
1804                                 if item["vectors"][idx] > 0:
1805                                     clocks = item["clocks"][idx] / \
1806                                              item["vectors"][idx]
1807                                 elif item["calls"][idx] > 0:
1808                                     clocks = item["clocks"][idx] / \
1809                                              item["calls"][idx]
1810                                 elif item["suspends"][idx] > 0:
1811                                     clocks = item["clocks"][idx] / \
1812                                              item["suspends"][idx]
1813                                 else:
1814                                     clocks = 0.0
1815
1816                                 if item["calls"][idx] > 0:
1817                                     vectors_call = item["vectors"][idx] / \
1818                                                    item["calls"][idx]
1819                                 else:
1820                                     vectors_call = 0.0
1821
1822                                 if int(item["calls"][idx]) + int(
1823                                         item["vectors"][idx]) + \
1824                                         int(item["suspends"][idx]):
1825                                     threads[idx].append([
1826                                         item["name"],
1827                                         item["calls"][idx],
1828                                         item["vectors"][idx],
1829                                         item["suspends"][idx],
1830                                         clocks,
1831                                         vectors_call
1832                                     ])
1833
1834                         print(f"Host IP: {data.get('host', '')}, "
1835                               f"Socket: {data.get('socket', '')}")
1836                         for thread_nr, thread in threads.items():
1837                             txt_table = prettytable.PrettyTable(
1838                                 (
1839                                     "Name",
1840                                     "Nr of Vectors",
1841                                     "Nr of Packets",
1842                                     "Suspends",
1843                                     "Cycles per Packet",
1844                                     "Average Vector Size"
1845                                 )
1846                             )
1847                             avg = 0.0
1848                             for row in thread:
1849                                 txt_table.add_row(row)
1850                                 avg += row[-1]
1851                             if len(thread) == 0:
1852                                 avg = ""
1853                             else:
1854                                 avg = f", Average Vector Size per Node: " \
1855                                       f"{(avg / len(thread)):.2f}"
1856                             th_name = "main" if thread_nr == 0 \
1857                                 else f"worker_{thread_nr}"
1858                             print(f"{dut_name}, {th_name}{avg}")
1859                             txt_table.float_format = ".2"
1860                             txt_table.align = "r"
1861                             txt_table.align["Name"] = "l"
1862                             print(f"{txt_table.get_string()}\n")