resources/tools/presentation/input_data_parser.py

   1 # Copyright (c) 2019 Cisco and/or its affiliates.
   2 # Licensed under the Apache License, Version 2.0 (the "License");
   3 # you may not use this file except in compliance with the License.
   4 # You may obtain a copy of the License at:
   5 #
   6 #     http://www.apache.org/licenses/LICENSE-2.0
   7 #
   8 # Unless required by applicable law or agreed to in writing, software
   9 # distributed under the License is distributed on an "AS IS" BASIS,
  10 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  11 # See the License for the specific language governing permissions and
  12 # limitations under the License.
  13
  14 """Data pre-processing
  15
  16 - extract data from output.xml files generated by Jenkins jobs and store in
  17   pandas' Series,
  18 - provide access to the data.
  19 - filter the data using tags,
  20 """
  21
  22 import re
  23 import resource
  24 import pandas as pd
  25 import logging
  26 import prettytable
  27
  28 from robot.api import ExecutionResult, ResultVisitor
  29 from robot import errors
  30 from collections import OrderedDict
  31 from string import replace
  32 from os import remove
  33 from datetime import datetime as dt
  34 from datetime import timedelta
  35 from json import loads
  36 from jumpavg.AvgStdevMetadataFactory import AvgStdevMetadataFactory
  37
  38 from input_data_files import download_and_unzip_data_file
  39
  40
  41 # Separator used in file names
  42 SEPARATOR = "__"
  43
  44
  45 class ExecutionChecker(ResultVisitor):
  46     """Class to traverse through the test suite structure.
  47
  48     The functionality implemented in this class generates a json structure:
  49
  50     Performance tests:
  51
  52     {
  53         "metadata": {
  54             "generated": "Timestamp",
  55             "version": "SUT version",
  56             "job": "Jenkins job name",
  57             "build": "Information about the build"
  58         },
  59         "suites": {
  60             "Suite long name 1": {
  61                 "name": Suite name,
  62                 "doc": "Suite 1 documentation",
  63                 "parent": "Suite 1 parent",
  64                 "level": "Level of the suite in the suite hierarchy"
  65             }
  66             "Suite long name N": {
  67                 "name": Suite name,
  68                 "doc": "Suite N documentation",
  69                 "parent": "Suite 2 parent",
  70                 "level": "Level of the suite in the suite hierarchy"
  71             }
  72         }
  73         "tests": {
  74             # NDRPDR tests:
  75             "ID": {
  76                 "name": "Test name",
  77                 "parent": "Name of the parent of the test",
  78                 "doc": "Test documentation",
  79                 "msg": "Test message",
  80                 "conf-history": "DUT1 and DUT2 VAT History",
  81                 "show-run": "Show Run",
  82                 "tags": ["tag 1", "tag 2", "tag n"],
  83                 "type": "NDRPDR",
  84                 "status": "PASS" | "FAIL",
  85                 "throughput": {
  86                     "NDR": {
  87                         "LOWER": float,
  88                         "UPPER": float
  89                     },
  90                     "PDR": {
  91                         "LOWER": float,
  92                         "UPPER": float
  93                     }
  94                 },
  95                 "latency": {
  96                     "NDR": {
  97                         "direction1": {
  98                             "min": float,
  99                             "avg": float,
 100                             "max": float
 101                         },
 102                         "direction2": {
 103                             "min": float,
 104                             "avg": float,
 105                             "max": float
 106                         }
 107                     },
 108                     "PDR": {
 109                         "direction1": {
 110                             "min": float,
 111                             "avg": float,
 112                             "max": float
 113                         },
 114                         "direction2": {
 115                             "min": float,
 116                             "avg": float,
 117                             "max": float
 118                         }
 119                     }
 120                 }
 121             }
 122
 123             # TCP tests:
 124             "ID": {
 125                 "name": "Test name",
 126                 "parent": "Name of the parent of the test",
 127                 "doc": "Test documentation",
 128                 "msg": "Test message",
 129                 "tags": ["tag 1", "tag 2", "tag n"],
 130                 "type": "TCP",
 131                 "status": "PASS" | "FAIL",
 132                 "result": int
 133             }
 134
 135             # MRR, BMRR tests:
 136             "ID": {
 137                 "name": "Test name",
 138                 "parent": "Name of the parent of the test",
 139                 "doc": "Test documentation",
 140                 "msg": "Test message",
 141                 "tags": ["tag 1", "tag 2", "tag n"],
 142                 "type": "MRR" | "BMRR",
 143                 "status": "PASS" | "FAIL",
 144                 "result": {
 145                     "receive-rate": AvgStdevMetadata,
 146                 }
 147             }
 148
 149             # TODO: Remove when definitely no NDRPDRDISC tests are used:
 150             # NDRPDRDISC tests:
 151             "ID": {
 152                 "name": "Test name",
 153                 "parent": "Name of the parent of the test",
 154                 "doc": "Test documentation",
 155                 "msg": "Test message",
 156                 "tags": ["tag 1", "tag 2", "tag n"],
 157                 "type": "PDR" | "NDR",
 158                 "status": "PASS" | "FAIL",
 159                 "throughput": {  # Only type: "PDR" | "NDR"
 160                     "value": int,
 161                     "unit": "pps" | "bps" | "percentage"
 162                 },
 163                 "latency": {  # Only type: "PDR" | "NDR"
 164                     "direction1": {
 165                         "100": {
 166                             "min": int,
 167                             "avg": int,
 168                             "max": int
 169                         },
 170                         "50": {  # Only for NDR
 171                             "min": int,
 172                             "avg": int,
 173                             "max": int
 174                         },
 175                         "10": {  # Only for NDR
 176                             "min": int,
 177                             "avg": int,
 178                             "max": int
 179                         }
 180                     },
 181                     "direction2": {
 182                         "100": {
 183                             "min": int,
 184                             "avg": int,
 185                             "max": int
 186                         },
 187                         "50": {  # Only for NDR
 188                             "min": int,
 189                             "avg": int,
 190                             "max": int
 191                         },
 192                         "10": {  # Only for NDR
 193                             "min": int,
 194                             "avg": int,
 195                             "max": int
 196                         }
 197                     }
 198                 },
 199                 "lossTolerance": "lossTolerance",  # Only type: "PDR"
 200                 "conf-history": "DUT1 and DUT2 VAT History"
 201                 "show-run": "Show Run"
 202             },
 203             "ID" {
 204                 # next test
 205             }
 206         }
 207     }
 208
 209
 210     Functional tests:
 211
 212     {
 213         "metadata": {  # Optional
 214             "version": "VPP version",
 215             "job": "Jenkins job name",
 216             "build": "Information about the build"
 217         },
 218         "suites": {
 219             "Suite name 1": {
 220                 "doc": "Suite 1 documentation",
 221                 "parent": "Suite 1 parent",
 222                 "level": "Level of the suite in the suite hierarchy"
 223             }
 224             "Suite name N": {
 225                 "doc": "Suite N documentation",
 226                 "parent": "Suite 2 parent",
 227                 "level": "Level of the suite in the suite hierarchy"
 228             }
 229         }
 230         "tests": {
 231             "ID": {
 232                 "name": "Test name",
 233                 "parent": "Name of the parent of the test",
 234                 "doc": "Test documentation"
 235                 "msg": "Test message"
 236                 "tags": ["tag 1", "tag 2", "tag n"],
 237                 "conf-history": "DUT1 and DUT2 VAT History"
 238                 "show-run": "Show Run"
 239                 "status": "PASS" | "FAIL"
 240             },
 241             "ID" {
 242                 # next test
 243             }
 244         }
 245     }
 246
 247     .. note:: ID is the lowercase full path to the test.
 248     """
 249
 250     # TODO: Remove when definitely no NDRPDRDISC tests are used:
 251     REGEX_RATE = re.compile(r'^[\D\d]*FINAL_RATE:\s(\d+\.\d+)\s(\w+)')
 252
 253     REGEX_PLR_RATE = re.compile(r'PLRsearch lower bound::?\s(\d+.\d+).*\n'
 254                                 r'PLRsearch upper bound::?\s(\d+.\d+)')
 255
 256     REGEX_NDRPDR_RATE = re.compile(r'NDR_LOWER:\s(\d+.\d+).*\n.*\n'
 257                                    r'NDR_UPPER:\s(\d+.\d+).*\n'
 258                                    r'PDR_LOWER:\s(\d+.\d+).*\n.*\n'
 259                                    r'PDR_UPPER:\s(\d+.\d+)')
 260
 261     # TODO: Remove when definitely no NDRPDRDISC tests are used:
 262     REGEX_LAT_NDR = re.compile(r'^[\D\d]*'
 263                                r'LAT_\d+%NDR:\s\[\'(-?\d+/-?\d+/-?\d+)\','
 264                                r'\s\'(-?\d+/-?\d+/-?\d+)\'\]\s\n'
 265                                r'LAT_\d+%NDR:\s\[\'(-?\d+/-?\d+/-?\d+)\','
 266                                r'\s\'(-?\d+/-?\d+/-?\d+)\'\]\s\n'
 267                                r'LAT_\d+%NDR:\s\[\'(-?\d+/-?\d+/-?\d+)\','
 268                                r'\s\'(-?\d+/-?\d+/-?\d+)\'\]')
 269
 270     REGEX_LAT_PDR = re.compile(r'^[\D\d]*'
 271                                r'LAT_\d+%PDR:\s\[\'(-?\d+/-?\d+/-?\d+)\','
 272                                r'\s\'(-?\d+/-?\d+/-?\d+)\'\][\D\d]*')
 273
 274     REGEX_NDRPDR_LAT = re.compile(r'LATENCY.*\[\'(.*)\', \'(.*)\'\]\s\n.*\n.*\n'
 275                                   r'LATENCY.*\[\'(.*)\', \'(.*)\'\]')
 276
 277     REGEX_TOLERANCE = re.compile(r'^[\D\d]*LOSS_ACCEPTANCE:\s(\d*\.\d*)\s'
 278                                  r'[\D\d]*')
 279
 280     REGEX_VERSION_VPP = re.compile(r"(return STDOUT Version:\s*|"
 281                                    r"VPP Version:\s*|VPP version:\s*)(.*)")
 282
 283     REGEX_VERSION_DPDK = re.compile(r"(DPDK version:\s*|DPDK Version:\s*)(.*)")
 284
 285     REGEX_TCP = re.compile(r'Total\s(rps|cps|throughput):\s([0-9]*).*$')
 286
 287     REGEX_MRR = re.compile(r'MaxReceivedRate_Results\s\[pkts/(\d*)sec\]:\s'
 288                            r'tx\s(\d*),\srx\s(\d*)')
 289
 290     REGEX_BMRR = re.compile(r'Maximum Receive Rate trial results'
 291                             r' in packets per second: \[(.*)\]')
 292
 293     REGEX_RECONF_LOSS = re.compile(r'Packets lost due to reconfig: (\d*)')
 294     REGEX_RECONF_TIME = re.compile(r'Implied time lost: (\d*.\d*)')
 295
 296     REGEX_TC_TAG = re.compile(r'\d+[tT]\d+[cC]')
 297
 298     REGEX_TC_NAME_OLD = re.compile(r'-\d+[tT]\d+[cC]-')
 299
 300     REGEX_TC_NAME_NEW = re.compile(r'-\d+[cC]-')
 301
 302     REGEX_TC_NUMBER = re.compile(r'tc[0-9]{2}-')
 303
 304     def __init__(self, metadata, mapping, ignore):
 305         """Initialisation.
 306
 307         :param metadata: Key-value pairs to be included in "metadata" part of
 308             JSON structure.
 309         :param mapping: Mapping of the old names of test cases to the new
 310             (actual) one.
 311         :param ignore: List of TCs to be ignored.
 312         :type metadata: dict
 313         :type mapping: dict
 314         :type ignore: list
 315         """
 316
 317         # Type of message to parse out from the test messages
 318         self._msg_type = None
 319
 320         # VPP version
 321         self._version = None
 322
 323         # Timestamp
 324         self._timestamp = None
 325
 326         # Testbed. The testbed is identified by TG node IP address.
 327         self._testbed = None
 328
 329         # Mapping of TCs long names
 330         self._mapping = mapping
 331
 332         # Ignore list
 333         self._ignore = ignore
 334
 335         # Number of VAT History messages found:
 336         # 0 - no message
 337         # 1 - VAT History of DUT1
 338         # 2 - VAT History of DUT2
 339         self._lookup_kw_nr = 0
 340         self._conf_history_lookup_nr = 0
 341
 342         # Number of Show Running messages found
 343         # 0 - no message
 344         # 1 - Show run message found
 345         self._show_run_lookup_nr = 0
 346
 347         # Test ID of currently processed test- the lowercase full path to the
 348         # test
 349         self._test_ID = None
 350
 351         # The main data structure
 352         self._data = {
 353             "metadata": OrderedDict(),
 354             "suites": OrderedDict(),
 355             "tests": OrderedDict()
 356         }
 357
 358         # Save the provided metadata
 359         for key, val in metadata.items():
 360             self._data["metadata"][key] = val
 361
 362         # Dictionary defining the methods used to parse different types of
 363         # messages
 364         self.parse_msg = {
 365             "timestamp": self._get_timestamp,
 366             "vpp-version": self._get_vpp_version,
 367             "dpdk-version": self._get_dpdk_version,
 368             "teardown-vat-history": self._get_vat_history,
 369             "teardown-papi-history": self._get_papi_history,
 370             "test-show-runtime": self._get_show_run,
 371             "testbed": self._get_testbed
 372         }
 373
 374     @property
 375     def data(self):
 376         """Getter - Data parsed from the XML file.
 377
 378         :returns: Data parsed from the XML file.
 379         :rtype: dict
 380         """
 381         return self._data
 382
 383     def _get_testbed(self, msg):
 384         """Called when extraction of testbed IP is required.
 385         The testbed is identified by TG node IP address.
 386
 387         :param msg: Message to process.
 388         :type msg: Message
 389         :returns: Nothing.
 390         """
 391
 392         if msg.message.count("Setup of TG node"):
 393             reg_tg_ip = re.compile(
 394                 r'Setup of TG node (\d{1,3}.\d{1,3}.\d{1,3}.\d{1,3}) done')
 395             try:
 396                 self._testbed = str(re.search(reg_tg_ip, msg.message).group(1))
 397             except (KeyError, ValueError, IndexError, AttributeError):
 398                 pass
 399             finally:
 400                 self._data["metadata"]["testbed"] = self._testbed
 401                 self._msg_type = None
 402
 403     def _get_vpp_version(self, msg):
 404         """Called when extraction of VPP version is required.
 405
 406         :param msg: Message to process.
 407         :type msg: Message
 408         :returns: Nothing.
 409         """
 410
 411         if msg.message.count("return STDOUT Version:") or \
 412             msg.message.count("VPP Version:") or \
 413             msg.message.count("VPP version:"):
 414             self._version = str(re.search(self.REGEX_VERSION_VPP, msg.message).
 415                                 group(2))
 416             self._data["metadata"]["version"] = self._version
 417             self._msg_type = None
 418
 419     def _get_dpdk_version(self, msg):
 420         """Called when extraction of DPDK version is required.
 421
 422         :param msg: Message to process.
 423         :type msg: Message
 424         :returns: Nothing.
 425         """
 426
 427         if msg.message.count("DPDK Version:"):
 428             try:
 429                 self._version = str(re.search(
 430                     self.REGEX_VERSION_DPDK, msg.message). group(2))
 431                 self._data["metadata"]["version"] = self._version
 432             except IndexError:
 433                 pass
 434             finally:
 435                 self._msg_type = None
 436
 437     def _get_timestamp(self, msg):
 438         """Called when extraction of timestamp is required.
 439
 440         :param msg: Message to process.
 441         :type msg: Message
 442         :returns: Nothing.
 443         """
 444
 445         self._timestamp = msg.timestamp[:14]
 446         self._data["metadata"]["generated"] = self._timestamp
 447         self._msg_type = None
 448
 449     def _get_vat_history(self, msg):
 450         """Called when extraction of VAT command history is required.
 451
 452         :param msg: Message to process.
 453         :type msg: Message
 454         :returns: Nothing.
 455         """
 456         if msg.message.count("VAT command history:"):
 457             self._conf_history_lookup_nr += 1
 458             if self._conf_history_lookup_nr == 1:
 459                 self._data["tests"][self._test_ID]["conf-history"] = str()
 460             else:
 461                 self._msg_type = None
 462             text = re.sub("[0-9]{1,3}.[0-9]{1,3}.[0-9]{1,3}.[0-9]{1,3} "
 463                           "VAT command history:", "", msg.message, count=1). \
 464                 replace("\n\n", "\n").replace('\n', ' |br| ').\
 465                 replace('\r', '').replace('"', "'")
 466
 467             self._data["tests"][self._test_ID]["conf-history"] += " |br| "
 468             self._data["tests"][self._test_ID]["conf-history"] += \
 469                 "**DUT" + str(self._conf_history_lookup_nr) + ":** " + text
 470
 471     def _get_papi_history(self, msg):
 472         """Called when extraction of PAPI command history is required.
 473
 474         :param msg: Message to process.
 475         :type msg: Message
 476         :returns: Nothing.
 477         """
 478         if msg.message.count("PAPI command history:"):
 479             self._conf_history_lookup_nr += 1
 480             if self._conf_history_lookup_nr == 1:
 481                 self._data["tests"][self._test_ID]["conf-history"] = str()
 482             else:
 483                 self._msg_type = None
 484             text = re.sub("[0-9]{1,3}.[0-9]{1,3}.[0-9]{1,3}.[0-9]{1,3} "
 485                           "PAPI command history:", "", msg.message, count=1). \
 486                 replace("\n\n", "\n").replace('\n', ' |br| ').\
 487                 replace('\r', '').replace('"', "'")
 488
 489             self._data["tests"][self._test_ID]["conf-history"] += " |br| "
 490             self._data["tests"][self._test_ID]["conf-history"] += \
 491                 "**DUT" + str(self._conf_history_lookup_nr) + ":** " + text
 492
 493     def _get_show_run(self, msg):
 494         """Called when extraction of VPP operational data (output of CLI command
 495         Show Runtime) is required.
 496
 497         :param msg: Message to process.
 498         :type msg: Message
 499         :returns: Nothing.
 500         """
 501         if msg.message.count("Runtime:"):
 502             self._show_run_lookup_nr += 1
 503             if self._lookup_kw_nr == 1 and self._show_run_lookup_nr == 1:
 504                 self._data["tests"][self._test_ID]["show-run"] = str()
 505             if self._lookup_kw_nr > 1:
 506                 self._msg_type = None
 507             if self._show_run_lookup_nr > 0:
 508                 message = str(msg.message).replace(' ', '').replace('\n', '').\
 509                     replace("'", '"').replace('b"', '"').replace('u"', '"')[8:]
 510                 runtime = loads(message)
 511                 try:
 512                     threads_nr = len(runtime[0]["clocks"])
 513                 except (IndexError, KeyError):
 514                     return
 515                 tbl_hdr = ["Name", "Calls", "Vectors", "Suspends", "Clocks"]
 516                 table = [[tbl_hdr, ] for _ in range(threads_nr)]
 517                 for item in runtime:
 518                     for idx in range(threads_nr):
 519                         table[idx].append([
 520                             item["name"],
 521                             item["calls"][idx],
 522                             item["vectors"][idx],
 523                             item["suspends"][idx],
 524                             item["clocks"][idx]
 525                         ])
 526                 text = ""
 527                 for idx in range(threads_nr):
 528                     text += "Thread {idx} ".format(idx=idx)
 529                     text += "vpp_main\n" if idx == 0 else \
 530                         "vpp_wk_{idx}\n".format(idx=idx-1)
 531                     txt_table = None
 532                     for row in table[idx]:
 533                         if txt_table is None:
 534                             txt_table = prettytable.PrettyTable(row)
 535                         else:
 536                             if any(row[1:]):
 537                                 txt_table.add_row(row)
 538                     txt_table.set_style(prettytable.MSWORD_FRIENDLY)
 539                     txt_table.align["Name"] = "l"
 540                     txt_table.align["Calls"] = "r"
 541                     txt_table.align["Vectors"] = "r"
 542                     txt_table.align["Suspends"] = "r"
 543                     txt_table.align["Clocks"] = "r"
 544
 545                     text += txt_table.get_string(sortby="Name") + '\n'
 546
 547                 text = text.replace('\n', ' |br| ').replace('\r', '').\
 548                     replace('"', "'")
 549                 try:
 550                     self._data["tests"][self._test_ID]["show-run"] += " |br| "
 551                     self._data["tests"][self._test_ID]["show-run"] += \
 552                         "**DUT" + str(self._show_run_lookup_nr) + ":** |br| " \
 553                         + text
 554                 except KeyError:
 555                     pass
 556
 557     # TODO: Remove when definitely no NDRPDRDISC tests are used:
 558     def _get_latency(self, msg, test_type):
 559         """Get the latency data from the test message.
 560
 561         :param msg: Message to be parsed.
 562         :param test_type: Type of the test - NDR or PDR.
 563         :type msg: str
 564         :type test_type: str
 565         :returns: Latencies parsed from the message.
 566         :rtype: dict
 567         """
 568
 569         if test_type == "NDR":
 570             groups = re.search(self.REGEX_LAT_NDR, msg)
 571             groups_range = range(1, 7)
 572         elif test_type == "PDR":
 573             groups = re.search(self.REGEX_LAT_PDR, msg)
 574             groups_range = range(1, 3)
 575         else:
 576             return {}
 577
 578         latencies = list()
 579         for idx in groups_range:
 580             try:
 581                 lat = [int(item) for item in str(groups.group(idx)).split('/')]
 582             except (AttributeError, ValueError):
 583                 lat = [-1, -1, -1]
 584             latencies.append(lat)
 585
 586         keys = ("min", "avg", "max")
 587         latency = {
 588             "direction1": {
 589             },
 590             "direction2": {
 591             }
 592         }
 593
 594         latency["direction1"]["100"] = dict(zip(keys, latencies[0]))
 595         latency["direction2"]["100"] = dict(zip(keys, latencies[1]))
 596         if test_type == "NDR":
 597             latency["direction1"]["50"] = dict(zip(keys, latencies[2]))
 598             latency["direction2"]["50"] = dict(zip(keys, latencies[3]))
 599             latency["direction1"]["10"] = dict(zip(keys, latencies[4]))
 600             latency["direction2"]["10"] = dict(zip(keys, latencies[5]))
 601
 602         return latency
 603
 604     def _get_ndrpdr_throughput(self, msg):
 605         """Get NDR_LOWER, NDR_UPPER, PDR_LOWER and PDR_UPPER from the test
 606         message.
 607
 608         :param msg: The test message to be parsed.
 609         :type msg: str
 610         :returns: Parsed data as a dict and the status (PASS/FAIL).
 611         :rtype: tuple(dict, str)
 612         """
 613
 614         throughput = {
 615             "NDR": {"LOWER": -1.0, "UPPER": -1.0},
 616             "PDR": {"LOWER": -1.0, "UPPER": -1.0}
 617         }
 618         status = "FAIL"
 619         groups = re.search(self.REGEX_NDRPDR_RATE, msg)
 620
 621         if groups is not None:
 622             try:
 623                 throughput["NDR"]["LOWER"] = float(groups.group(1))
 624                 throughput["NDR"]["UPPER"] = float(groups.group(2))
 625                 throughput["PDR"]["LOWER"] = float(groups.group(3))
 626                 throughput["PDR"]["UPPER"] = float(groups.group(4))
 627                 status = "PASS"
 628             except (IndexError, ValueError):
 629                 pass
 630
 631         return throughput, status
 632
 633     def _get_plr_throughput(self, msg):
 634         """Get PLRsearch lower bound and PLRsearch upper bound from the test
 635         message.
 636
 637         :param msg: The test message to be parsed.
 638         :type msg: str
 639         :returns: Parsed data as a dict and the status (PASS/FAIL).
 640         :rtype: tuple(dict, str)
 641         """
 642
 643         throughput = {
 644             "LOWER": -1.0,
 645             "UPPER": -1.0
 646         }
 647         status = "FAIL"
 648         groups = re.search(self.REGEX_PLR_RATE, msg)
 649
 650         if groups is not None:
 651             try:
 652                 throughput["LOWER"] = float(groups.group(1))
 653                 throughput["UPPER"] = float(groups.group(2))
 654                 status = "PASS"
 655             except (IndexError, ValueError):
 656                 pass
 657
 658         return throughput, status
 659
 660     def _get_ndrpdr_latency(self, msg):
 661         """Get LATENCY from the test message.
 662
 663         :param msg: The test message to be parsed.
 664         :type msg: str
 665         :returns: Parsed data as a dict and the status (PASS/FAIL).
 666         :rtype: tuple(dict, str)
 667         """
 668
 669         latency = {
 670             "NDR": {
 671                 "direction1": {"min": -1.0, "avg": -1.0, "max": -1.0},
 672                 "direction2": {"min": -1.0, "avg": -1.0, "max": -1.0}
 673             },
 674             "PDR": {
 675                 "direction1": {"min": -1.0, "avg": -1.0, "max": -1.0},
 676                 "direction2": {"min": -1.0, "avg": -1.0, "max": -1.0}
 677             }
 678         }
 679         status = "FAIL"
 680         groups = re.search(self.REGEX_NDRPDR_LAT, msg)
 681
 682         if groups is not None:
 683             keys = ("min", "avg", "max")
 684             try:
 685                 latency["NDR"]["direction1"] = dict(
 686                     zip(keys, [float(l) for l in groups.group(1).split('/')]))
 687                 latency["NDR"]["direction2"] = dict(
 688                     zip(keys, [float(l) for l in groups.group(2).split('/')]))
 689                 latency["PDR"]["direction1"] = dict(
 690                     zip(keys, [float(l) for l in groups.group(3).split('/')]))
 691                 latency["PDR"]["direction2"] = dict(
 692                     zip(keys, [float(l) for l in groups.group(4).split('/')]))
 693                 status = "PASS"
 694             except (IndexError, ValueError):
 695                 pass
 696
 697         return latency, status
 698
 699     def visit_suite(self, suite):
 700         """Implements traversing through the suite and its direct children.
 701
 702         :param suite: Suite to process.
 703         :type suite: Suite
 704         :returns: Nothing.
 705         """
 706         if self.start_suite(suite) is not False:
 707             suite.suites.visit(self)
 708             suite.tests.visit(self)
 709             self.end_suite(suite)
 710
 711     def start_suite(self, suite):
 712         """Called when suite starts.
 713
 714         :param suite: Suite to process.
 715         :type suite: Suite
 716         :returns: Nothing.
 717         """
 718
 719         try:
 720             parent_name = suite.parent.name
 721         except AttributeError:
 722             return
 723
 724         doc_str = suite.doc.replace('"', "'").replace('\n', ' ').\
 725             replace('\r', '').replace('*[', ' |br| *[').replace("*", "**")
 726         doc_str = replace(doc_str, ' |br| *[', '*[', maxreplace=1)
 727
 728         self._data["suites"][suite.longname.lower().replace('"', "'").
 729             replace(" ", "_")] = {
 730                 "name": suite.name.lower(),
 731                 "doc": doc_str,
 732                 "parent": parent_name,
 733                 "level": len(suite.longname.split("."))
 734             }
 735
 736         suite.keywords.visit(self)
 737
 738     def end_suite(self, suite):
 739         """Called when suite ends.
 740
 741         :param suite: Suite to process.
 742         :type suite: Suite
 743         :returns: Nothing.
 744         """
 745         pass
 746
 747     def visit_test(self, test):
 748         """Implements traversing through the test.
 749
 750         :param test: Test to process.
 751         :type test: Test
 752         :returns: Nothing.
 753         """
 754         if self.start_test(test) is not False:
 755             test.keywords.visit(self)
 756             self.end_test(test)
 757
 758     def start_test(self, test):
 759         """Called when test starts.
 760
 761         :param test: Test to process.
 762         :type test: Test
 763         :returns: Nothing.
 764         """
 765
 766         longname_orig = test.longname.lower()
 767
 768         # Check the ignore list
 769         if longname_orig in self._ignore:
 770             return
 771
 772         tags = [str(tag) for tag in test.tags]
 773         test_result = dict()
 774
 775         # Change the TC long name and name if defined in the mapping table
 776         longname = self._mapping.get(longname_orig, None)
 777         if longname is not None:
 778             name = longname.split('.')[-1]
 779             logging.debug("{0}\n{1}\n{2}\n{3}".format(
 780                 self._data["metadata"], longname_orig, longname, name))
 781         else:
 782             longname = longname_orig
 783             name = test.name.lower()
 784
 785         # Remove TC number from the TC long name (backward compatibility):
 786         self._test_ID = re.sub(self.REGEX_TC_NUMBER, "", longname)
 787         # Remove TC number from the TC name (not needed):
 788         test_result["name"] = re.sub(self.REGEX_TC_NUMBER, "", name)
 789
 790         test_result["parent"] = test.parent.name.lower()
 791         test_result["tags"] = tags
 792         doc_str = test.doc.replace('"', "'").replace('\n', ' '). \
 793             replace('\r', '').replace('[', ' |br| [')
 794         test_result["doc"] = replace(doc_str, ' |br| [', '[', maxreplace=1)
 795         test_result["msg"] = test.message.replace('\n', ' |br| '). \
 796             replace('\r', '').replace('"', "'")
 797         test_result["type"] = "FUNC"
 798         test_result["status"] = test.status
 799
 800         if "PERFTEST" in tags:
 801             # Replace info about cores (e.g. -1c-) with the info about threads
 802             # and cores (e.g. -1t1c-) in the long test case names and in the
 803             # test case names if necessary.
 804             groups = re.search(self.REGEX_TC_NAME_OLD, self._test_ID)
 805             if not groups:
 806                 tag_count = 0
 807                 tag_tc = str()
 808                 for tag in test_result["tags"]:
 809                     groups = re.search(self.REGEX_TC_TAG, tag)
 810                     if groups:
 811                         tag_count += 1
 812                         tag_tc = tag
 813
 814                 if tag_count == 1:
 815                     self._test_ID = re.sub(self.REGEX_TC_NAME_NEW,
 816                                            "-{0}-".format(tag_tc.lower()),
 817                                            self._test_ID,
 818                                            count=1)
 819                     test_result["name"] = re.sub(self.REGEX_TC_NAME_NEW,
 820                                                  "-{0}-".format(tag_tc.lower()),
 821                                                  test_result["name"],
 822                                                  count=1)
 823                 else:
 824                     test_result["status"] = "FAIL"
 825                     self._data["tests"][self._test_ID] = test_result
 826                     logging.debug("The test '{0}' has no or more than one "
 827                                   "multi-threading tags.".format(self._test_ID))
 828                     logging.debug("Tags: {0}".format(test_result["tags"]))
 829                     return
 830
 831         if test.status == "PASS" and ("NDRPDRDISC" in tags or
 832                                       "NDRPDR" in tags or
 833                                       "SOAK" in tags or
 834                                       "TCP" in tags or
 835                                       "MRR" in tags or
 836                                       "BMRR" in tags or
 837                                       "RECONF" in tags):
 838             # TODO: Remove when definitely no NDRPDRDISC tests are used:
 839             if "NDRDISC" in tags:
 840                 test_result["type"] = "NDR"
 841             # TODO: Remove when definitely no NDRPDRDISC tests are used:
 842             elif "PDRDISC" in tags:
 843                 test_result["type"] = "PDR"
 844             elif "NDRPDR" in tags:
 845                 test_result["type"] = "NDRPDR"
 846             elif "SOAK" in tags:
 847                 test_result["type"] = "SOAK"
 848             elif "TCP" in tags:
 849                 test_result["type"] = "TCP"
 850             elif "MRR" in tags:
 851                 test_result["type"] = "MRR"
 852             elif "FRMOBL" in tags or "BMRR" in tags:
 853                 test_result["type"] = "BMRR"
 854             elif "RECONF" in tags:
 855                 test_result["type"] = "RECONF"
 856             else:
 857                 test_result["status"] = "FAIL"
 858                 self._data["tests"][self._test_ID] = test_result
 859                 return
 860
 861             # TODO: Remove when definitely no NDRPDRDISC tests are used:
 862             if test_result["type"] in ("NDR", "PDR"):
 863                 try:
 864                     rate_value = str(re.search(
 865                         self.REGEX_RATE, test.message).group(1))
 866                 except AttributeError:
 867                     rate_value = "-1"
 868                 try:
 869                     rate_unit = str(re.search(
 870                         self.REGEX_RATE, test.message).group(2))
 871                 except AttributeError:
 872                     rate_unit = "-1"
 873
 874                 test_result["throughput"] = dict()
 875                 test_result["throughput"]["value"] = \
 876                     int(rate_value.split('.')[0])
 877                 test_result["throughput"]["unit"] = rate_unit
 878                 test_result["latency"] = \
 879                     self._get_latency(test.message, test_result["type"])
 880                 if test_result["type"] == "PDR":
 881                     test_result["lossTolerance"] = str(re.search(
 882                         self.REGEX_TOLERANCE, test.message).group(1))
 883
 884             elif test_result["type"] in ("NDRPDR", ):
 885                 test_result["throughput"], test_result["status"] = \
 886                     self._get_ndrpdr_throughput(test.message)
 887                 test_result["latency"], test_result["status"] = \
 888                     self._get_ndrpdr_latency(test.message)
 889
 890             elif test_result["type"] in ("SOAK", ):
 891                 test_result["throughput"], test_result["status"] = \
 892                     self._get_plr_throughput(test.message)
 893
 894             elif test_result["type"] in ("TCP", ):
 895                 groups = re.search(self.REGEX_TCP, test.message)
 896                 test_result["result"] = int(groups.group(2))
 897
 898             elif test_result["type"] in ("MRR", "BMRR"):
 899                 test_result["result"] = dict()
 900                 groups = re.search(self.REGEX_BMRR, test.message)
 901                 if groups is not None:
 902                     items_str = groups.group(1)
 903                     items_float = [float(item.strip()) for item
 904                                    in items_str.split(",")]
 905                     metadata = AvgStdevMetadataFactory.from_data(items_float)
 906                     # Next two lines have been introduced in CSIT-1179,
 907                     # to be removed in CSIT-1180.
 908                     metadata.size = 1
 909                     metadata.stdev = 0.0
 910                     test_result["result"]["receive-rate"] = metadata
 911                 else:
 912                     groups = re.search(self.REGEX_MRR, test.message)
 913                     test_result["result"]["receive-rate"] = \
 914                         AvgStdevMetadataFactory.from_data([
 915                             float(groups.group(3)) / float(groups.group(1)), ])
 916
 917             elif test_result["type"] == "RECONF":
 918                 test_result["result"] = None
 919                 try:
 920                     grps_loss = re.search(self.REGEX_RECONF_LOSS, test.message)
 921                     grps_time = re.search(self.REGEX_RECONF_TIME, test.message)
 922                     test_result["result"] = {
 923                         "loss": int(grps_loss.group(1)),
 924                         "time": float(grps_time.group(1))
 925                     }
 926                 except (AttributeError, IndexError, ValueError, TypeError):
 927                     test_result["status"] = "FAIL"
 928
 929         self._data["tests"][self._test_ID] = test_result
 930
 931     def end_test(self, test):
 932         """Called when test ends.
 933
 934         :param test: Test to process.
 935         :type test: Test
 936         :returns: Nothing.
 937         """
 938         pass
 939
 940     def visit_keyword(self, keyword):
 941         """Implements traversing through the keyword and its child keywords.
 942
 943         :param keyword: Keyword to process.
 944         :type keyword: Keyword
 945         :returns: Nothing.
 946         """
 947         if self.start_keyword(keyword) is not False:
 948             self.end_keyword(keyword)
 949
 950     def start_keyword(self, keyword):
 951         """Called when keyword starts. Default implementation does nothing.
 952
 953         :param keyword: Keyword to process.
 954         :type keyword: Keyword
 955         :returns: Nothing.
 956         """
 957         try:
 958             if keyword.type == "setup":
 959                 self.visit_setup_kw(keyword)
 960             elif keyword.type == "teardown":
 961                 self._lookup_kw_nr = 0
 962                 self.visit_teardown_kw(keyword)
 963             else:
 964                 self._lookup_kw_nr = 0
 965                 self.visit_test_kw(keyword)
 966         except AttributeError:
 967             pass
 968
 969     def end_keyword(self, keyword):
 970         """Called when keyword ends. Default implementation does nothing.
 971
 972         :param keyword: Keyword to process.
 973         :type keyword: Keyword
 974         :returns: Nothing.
 975         """
 976         pass
 977
 978     def visit_test_kw(self, test_kw):
 979         """Implements traversing through the test keyword and its child
 980         keywords.
 981
 982         :param test_kw: Keyword to process.
 983         :type test_kw: Keyword
 984         :returns: Nothing.
 985         """
 986         for keyword in test_kw.keywords:
 987             if self.start_test_kw(keyword) is not False:
 988                 self.visit_test_kw(keyword)
 989                 self.end_test_kw(keyword)
 990
 991     def start_test_kw(self, test_kw):
 992         """Called when test keyword starts. Default implementation does
 993         nothing.
 994
 995         :param test_kw: Keyword to process.
 996         :type test_kw: Keyword
 997         :returns: Nothing.
 998         """
 999         if test_kw.name.count("Show Runtime Counters On All Duts"):
1000             self._lookup_kw_nr += 1
1001             self._show_run_lookup_nr = 0
1002             self._msg_type = "test-show-runtime"
1003         elif test_kw.name.count("Install Dpdk Test") and not self._version:
1004             self._msg_type = "dpdk-version"
1005         else:
1006             return
1007         test_kw.messages.visit(self)
1008
1009     def end_test_kw(self, test_kw):
1010         """Called when keyword ends. Default implementation does nothing.
1011
1012         :param test_kw: Keyword to process.
1013         :type test_kw: Keyword
1014         :returns: Nothing.
1015         """
1016         pass
1017
1018     def visit_setup_kw(self, setup_kw):
1019         """Implements traversing through the teardown keyword and its child
1020         keywords.
1021
1022         :param setup_kw: Keyword to process.
1023         :type setup_kw: Keyword
1024         :returns: Nothing.
1025         """
1026         for keyword in setup_kw.keywords:
1027             if self.start_setup_kw(keyword) is not False:
1028                 self.visit_setup_kw(keyword)
1029                 self.end_setup_kw(keyword)
1030
1031     def start_setup_kw(self, setup_kw):
1032         """Called when teardown keyword starts. Default implementation does
1033         nothing.
1034
1035         :param setup_kw: Keyword to process.
1036         :type setup_kw: Keyword
1037         :returns: Nothing.
1038         """
1039         if setup_kw.name.count("Show Vpp Version On All Duts") \
1040                 and not self._version:
1041             self._msg_type = "vpp-version"
1042         elif setup_kw.name.count("Set Global Variable") \
1043                 and not self._timestamp:
1044             self._msg_type = "timestamp"
1045         elif setup_kw.name.count("Setup Framework") and not self._testbed:
1046             self._msg_type = "testbed"
1047         else:
1048             return
1049         setup_kw.messages.visit(self)
1050
1051     def end_setup_kw(self, setup_kw):
1052         """Called when keyword ends. Default implementation does nothing.
1053
1054         :param setup_kw: Keyword to process.
1055         :type setup_kw: Keyword
1056         :returns: Nothing.
1057         """
1058         pass
1059
1060     def visit_teardown_kw(self, teardown_kw):
1061         """Implements traversing through the teardown keyword and its child
1062         keywords.
1063
1064         :param teardown_kw: Keyword to process.
1065         :type teardown_kw: Keyword
1066         :returns: Nothing.
1067         """
1068         for keyword in teardown_kw.keywords:
1069             if self.start_teardown_kw(keyword) is not False:
1070                 self.visit_teardown_kw(keyword)
1071                 self.end_teardown_kw(keyword)
1072
1073     def start_teardown_kw(self, teardown_kw):
1074         """Called when teardown keyword starts. Default implementation does
1075         nothing.
1076
1077         :param teardown_kw: Keyword to process.
1078         :type teardown_kw: Keyword
1079         :returns: Nothing.
1080         """
1081
1082         if teardown_kw.name.count("Show Vat History On All Duts"):
1083             self._conf_history_lookup_nr = 0
1084             self._msg_type = "teardown-vat-history"
1085             teardown_kw.messages.visit(self)
1086         elif teardown_kw.name.count("Show Papi History On All Duts"):
1087             self._conf_history_lookup_nr = 0
1088             self._msg_type = "teardown-papi-history"
1089             teardown_kw.messages.visit(self)
1090
1091     def end_teardown_kw(self, teardown_kw):
1092         """Called when keyword ends. Default implementation does nothing.
1093
1094         :param teardown_kw: Keyword to process.
1095         :type teardown_kw: Keyword
1096         :returns: Nothing.
1097         """
1098         pass
1099
1100     def visit_message(self, msg):
1101         """Implements visiting the message.
1102
1103         :param msg: Message to process.
1104         :type msg: Message
1105         :returns: Nothing.
1106         """
1107         if self.start_message(msg) is not False:
1108             self.end_message(msg)
1109
1110     def start_message(self, msg):
1111         """Called when message starts. Get required information from messages:
1112         - VPP version.
1113
1114         :param msg: Message to process.
1115         :type msg: Message
1116         :returns: Nothing.
1117         """
1118
1119         if self._msg_type:
1120             self.parse_msg[self._msg_type](msg)
1121
1122     def end_message(self, msg):
1123         """Called when message ends. Default implementation does nothing.
1124
1125         :param msg: Message to process.
1126         :type msg: Message
1127         :returns: Nothing.
1128         """
1129         pass
1130
1131
1132 class InputData(object):
1133     """Input data
1134
1135     The data is extracted from output.xml files generated by Jenkins jobs and
1136     stored in pandas' DataFrames.
1137
1138     The data structure:
1139     - job name
1140       - build number
1141         - metadata
1142           (as described in ExecutionChecker documentation)
1143         - suites
1144           (as described in ExecutionChecker documentation)
1145         - tests
1146           (as described in ExecutionChecker documentation)
1147     """
1148
1149     def __init__(self, spec):
1150         """Initialization.
1151
1152         :param spec: Specification.
1153         :type spec: Specification
1154         """
1155
1156         # Specification:
1157         self._cfg = spec
1158
1159         # Data store:
1160         self._input_data = pd.Series()
1161
1162     @property
1163     def data(self):
1164         """Getter - Input data.
1165
1166         :returns: Input data
1167         :rtype: pandas.Series
1168         """
1169         return self._input_data
1170
1171     def metadata(self, job, build):
1172         """Getter - metadata
1173
1174         :param job: Job which metadata we want.
1175         :param build: Build which metadata we want.
1176         :type job: str
1177         :type build: str
1178         :returns: Metadata
1179         :rtype: pandas.Series
1180         """
1181
1182         return self.data[job][build]["metadata"]
1183
1184     def suites(self, job, build):
1185         """Getter - suites
1186
1187         :param job: Job which suites we want.
1188         :param build: Build which suites we want.
1189         :type job: str
1190         :type build: str
1191         :returns: Suites.
1192         :rtype: pandas.Series
1193         """
1194
1195         return self.data[job][str(build)]["suites"]
1196
1197     def tests(self, job, build):
1198         """Getter - tests
1199
1200         :param job: Job which tests we want.
1201         :param build: Build which tests we want.
1202         :type job: str
1203         :type build: str
1204         :returns: Tests.
1205         :rtype: pandas.Series
1206         """
1207
1208         return self.data[job][build]["tests"]
1209
1210     def _parse_tests(self, job, build, log):
1211         """Process data from robot output.xml file and return JSON structured
1212         data.
1213
1214         :param job: The name of job which build output data will be processed.
1215         :param build: The build which output data will be processed.
1216         :param log: List of log messages.
1217         :type job: str
1218         :type build: dict
1219         :type log: list of tuples (severity, msg)
1220         :returns: JSON data structure.
1221         :rtype: dict
1222         """
1223
1224         metadata = {
1225             "job": job,
1226             "build": build
1227         }
1228
1229         with open(build["file-name"], 'r') as data_file:
1230             try:
1231                 result = ExecutionResult(data_file)
1232             except errors.DataError as err:
1233                 log.append(("ERROR", "Error occurred while parsing output.xml: "
1234                                      "{0}".format(err)))
1235                 return None
1236         checker = ExecutionChecker(metadata, self._cfg.mapping,
1237                                    self._cfg.ignore)
1238         result.visit(checker)
1239
1240         return checker.data
1241
1242     def _download_and_parse_build(self, job, build, repeat, pid=10000):
1243         """Download and parse the input data file.
1244
1245         :param pid: PID of the process executing this method.
1246         :param job: Name of the Jenkins job which generated the processed input
1247             file.
1248         :param build: Information about the Jenkins build which generated the
1249             processed input file.
1250         :param repeat: Repeat the download specified number of times if not
1251             successful.
1252         :type pid: int
1253         :type job: str
1254         :type build: dict
1255         :type repeat: int
1256         """
1257
1258         logs = list()
1259
1260         logs.append(("INFO", "  Processing the job/build: {0}: {1}".
1261                      format(job, build["build"])))
1262
1263         state = "failed"
1264         success = False
1265         data = None
1266         do_repeat = repeat
1267         while do_repeat:
1268             success = download_and_unzip_data_file(self._cfg, job, build, pid,
1269                                                    logs)
1270             if success:
1271                 break
1272             do_repeat -= 1
1273         if not success:
1274             logs.append(("ERROR", "It is not possible to download the input "
1275                                   "data file from the job '{job}', build "
1276                                   "'{build}', or it is damaged. Skipped.".
1277                          format(job=job, build=build["build"])))
1278         if success:
1279             logs.append(("INFO", "    Processing data from the build '{0}' ...".
1280                          format(build["build"])))
1281             data = self._parse_tests(job, build, logs)
1282             if data is None:
1283                 logs.append(("ERROR", "Input data file from the job '{job}', "
1284                                       "build '{build}' is damaged. Skipped.".
1285                              format(job=job, build=build["build"])))
1286             else:
1287                 state = "processed"
1288
1289             try:
1290                 remove(build["file-name"])
1291             except OSError as err:
1292                 logs.append(("ERROR", "Cannot remove the file '{0}': {1}".
1293                              format(build["file-name"], repr(err))))
1294
1295         # If the time-period is defined in the specification file, remove all
1296         # files which are outside the time period.
1297         timeperiod = self._cfg.input.get("time-period", None)
1298         if timeperiod and data:
1299             now = dt.utcnow()
1300             timeperiod = timedelta(int(timeperiod))
1301             metadata = data.get("metadata", None)
1302             if metadata:
1303                 generated = metadata.get("generated", None)
1304                 if generated:
1305                     generated = dt.strptime(generated, "%Y%m%d %H:%M")
1306                     if (now - generated) > timeperiod:
1307                         # Remove the data and the file:
1308                         state = "removed"
1309                         data = None
1310                         logs.append(
1311                             ("INFO",
1312                              "    The build {job}/{build} is outdated, will be "
1313                              "removed".format(job=job, build=build["build"])))
1314         logs.append(("INFO", "  Done."))
1315
1316         for level, line in logs:
1317             if level == "INFO":
1318                 logging.info(line)
1319             elif level == "ERROR":
1320                 logging.error(line)
1321             elif level == "DEBUG":
1322                 logging.debug(line)
1323             elif level == "CRITICAL":
1324                 logging.critical(line)
1325             elif level == "WARNING":
1326                 logging.warning(line)
1327
1328         return {"data": data, "state": state, "job": job, "build": build}
1329
1330     def download_and_parse_data(self, repeat=1):
1331         """Download the input data files, parse input data from input files and
1332         store in pandas' Series.
1333
1334         :param repeat: Repeat the download specified number of times if not
1335             successful.
1336         :type repeat: int
1337         """
1338
1339         logging.info("Downloading and parsing input files ...")
1340
1341         for job, builds in self._cfg.builds.items():
1342             for build in builds:
1343
1344                 result = self._download_and_parse_build(job, build, repeat)
1345                 build_nr = result["build"]["build"]
1346
1347                 if result["data"]:
1348                     data = result["data"]
1349                     build_data = pd.Series({
1350                         "metadata": pd.Series(
1351                             data["metadata"].values(),
1352                             index=data["metadata"].keys()),
1353                         "suites": pd.Series(data["suites"].values(),
1354                                             index=data["suites"].keys()),
1355                         "tests": pd.Series(data["tests"].values(),
1356                                            index=data["tests"].keys())})
1357
1358                     if self._input_data.get(job, None) is None:
1359                         self._input_data[job] = pd.Series()
1360                     self._input_data[job][str(build_nr)] = build_data
1361
1362                     self._cfg.set_input_file_name(
1363                         job, build_nr, result["build"]["file-name"])
1364
1365                 self._cfg.set_input_state(job, build_nr, result["state"])
1366
1367                 logging.info("Memory allocation: {0:,d}MB".format(
1368                     resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1000))
1369
1370         logging.info("Done.")
1371
1372     @staticmethod
1373     def _end_of_tag(tag_filter, start=0, closer="'"):
1374         """Return the index of character in the string which is the end of tag.
1375
1376         :param tag_filter: The string where the end of tag is being searched.
1377         :param start: The index where the searching is stated.
1378         :param closer: The character which is the tag closer.
1379         :type tag_filter: str
1380         :type start: int
1381         :type closer: str
1382         :returns: The index of the tag closer.
1383         :rtype: int
1384         """
1385
1386         try:
1387             idx_opener = tag_filter.index(closer, start)
1388             return tag_filter.index(closer, idx_opener + 1)
1389         except ValueError:
1390             return None
1391
1392     @staticmethod
1393     def _condition(tag_filter):
1394         """Create a conditional statement from the given tag filter.
1395
1396         :param tag_filter: Filter based on tags from the element specification.
1397         :type tag_filter: str
1398         :returns: Conditional statement which can be evaluated.
1399         :rtype: str
1400         """
1401
1402         index = 0
1403         while True:
1404             index = InputData._end_of_tag(tag_filter, index)
1405             if index is None:
1406                 return tag_filter
1407             index += 1
1408             tag_filter = tag_filter[:index] + " in tags" + tag_filter[index:]
1409
1410     def filter_data(self, element, params=None, data=None, data_set="tests",
1411                     continue_on_error=False):
1412         """Filter required data from the given jobs and builds.
1413
1414         The output data structure is:
1415
1416         - job 1
1417           - build 1
1418             - test (or suite) 1 ID:
1419               - param 1
1420               - param 2
1421               ...
1422               - param n
1423             ...
1424             - test (or suite) n ID:
1425             ...
1426           ...
1427           - build n
1428         ...
1429         - job n
1430
1431         :param element: Element which will use the filtered data.
1432         :param params: Parameters which will be included in the output. If None,
1433             all parameters are included.
1434         :param data: If not None, this data is used instead of data specified
1435             in the element.
1436         :param data_set: The set of data to be filtered: tests, suites,
1437             metadata.
1438         :param continue_on_error: Continue if there is error while reading the
1439             data. The Item will be empty then
1440         :type element: pandas.Series
1441         :type params: list
1442         :type data: dict
1443         :type data_set: str
1444         :type continue_on_error: bool
1445         :returns: Filtered data.
1446         :rtype pandas.Series
1447         """
1448
1449         try:
1450             if element["filter"] in ("all", "template"):
1451                 cond = "True"
1452             else:
1453                 cond = InputData._condition(element["filter"])
1454             logging.debug("   Filter: {0}".format(cond))
1455         except KeyError:
1456             logging.error("  No filter defined.")
1457             return None
1458
1459         if params is None:
1460             params = element.get("parameters", None)
1461             if params:
1462                 params.append("type")
1463
1464         data_to_filter = data if data else element["data"]
1465         data = pd.Series()
1466         try:
1467             for job, builds in data_to_filter.items():
1468                 data[job] = pd.Series()
1469                 for build in builds:
1470                     data[job][str(build)] = pd.Series()
1471                     try:
1472                         data_iter = self.data[job][str(build)][data_set].\
1473                             iteritems()
1474                     except KeyError:
1475                         if continue_on_error:
1476                             continue
1477                         else:
1478                             return None
1479                     for test_ID, test_data in data_iter:
1480                         if eval(cond, {"tags": test_data.get("tags", "")}):
1481                             data[job][str(build)][test_ID] = pd.Series()
1482                             if params is None:
1483                                 for param, val in test_data.items():
1484                                     data[job][str(build)][test_ID][param] = val
1485                             else:
1486                                 for param in params:
1487                                     try:
1488                                         data[job][str(build)][test_ID][param] =\
1489                                             test_data[param]
1490                                     except KeyError:
1491                                         data[job][str(build)][test_ID][param] =\
1492                                             "No Data"
1493             return data
1494
1495         except (KeyError, IndexError, ValueError) as err:
1496             logging.error("   Missing mandatory parameter in the element "
1497                           "specification: {0}".format(err))
1498             return None
1499         except AttributeError:
1500             return None
1501         except SyntaxError:
1502             logging.error("   The filter '{0}' is not correct. Check if all "
1503                           "tags are enclosed by apostrophes.".format(cond))
1504             return None
1505
1506     def filter_tests_by_name(self, element, params=None, data_set="tests",
1507                              continue_on_error=False):
1508         """Filter required data from the given jobs and builds.
1509
1510         The output data structure is:
1511
1512         - job 1
1513           - build 1
1514             - test (or suite) 1 ID:
1515               - param 1
1516               - param 2
1517               ...
1518               - param n
1519             ...
1520             - test (or suite) n ID:
1521             ...
1522           ...
1523           - build n
1524         ...
1525         - job n
1526
1527         :param element: Element which will use the filtered data.
1528         :param params: Parameters which will be included in the output. If None,
1529         all parameters are included.
1530         :param data_set: The set of data to be filtered: tests, suites,
1531         metadata.
1532         :param continue_on_error: Continue if there is error while reading the
1533         data. The Item will be empty then
1534         :type element: pandas.Series
1535         :type params: list
1536         :type data_set: str
1537         :type continue_on_error: bool
1538         :returns: Filtered data.
1539         :rtype pandas.Series
1540         """
1541
1542         include = element.get("include", None)
1543         if not include:
1544             logging.warning("No tests to include, skipping the element.")
1545             return None
1546
1547         if params is None:
1548             params = element.get("parameters", None)
1549             if params:
1550                 params.append("type")
1551
1552         data = pd.Series()
1553         try:
1554             for job, builds in element["data"].items():
1555                 data[job] = pd.Series()
1556                 for build in builds:
1557                     data[job][str(build)] = pd.Series()
1558                     for test in include:
1559                         try:
1560                             reg_ex = re.compile(str(test).lower())
1561                             for test_ID in self.data[job][str(build)]\
1562                                     [data_set].keys():
1563                                 if re.match(reg_ex, str(test_ID).lower()):
1564                                     test_data = self.data[job][str(build)]\
1565                                         [data_set][test_ID]
1566                                     data[job][str(build)][test_ID] = pd.Series()
1567                                     if params is None:
1568                                         for param, val in test_data.items():
1569                                             data[job][str(build)][test_ID]\
1570                                                 [param] = val
1571                                     else:
1572                                         for param in params:
1573                                             try:
1574                                                 data[job][str(build)][test_ID]\
1575                                                     [param] = test_data[param]
1576                                             except KeyError:
1577                                                 data[job][str(build)][test_ID]\
1578                                                     [param] = "No Data"
1579                         except KeyError as err:
1580                             logging.error("{err!r}".format(err=err))
1581                             if continue_on_error:
1582                                 continue
1583                             else:
1584                                 return None
1585             return data
1586
1587         except (KeyError, IndexError, ValueError) as err:
1588             logging.error("Missing mandatory parameter in the element "
1589                           "specification: {err!r}".format(err=err))
1590             return None
1591         except AttributeError as err:
1592             logging.error("{err!r}".format(err=err))
1593             return None
1594
1595
1596     @staticmethod
1597     def merge_data(data):
1598         """Merge data from more jobs and builds to a simple data structure.
1599
1600         The output data structure is:
1601
1602         - test (suite) 1 ID:
1603           - param 1
1604           - param 2
1605           ...
1606           - param n
1607         ...
1608         - test (suite) n ID:
1609         ...
1610
1611         :param data: Data to merge.
1612         :type data: pandas.Series
1613         :returns: Merged data.
1614         :rtype: pandas.Series
1615         """
1616
1617         logging.info("    Merging data ...")
1618
1619         merged_data = pd.Series()
1620         for _, builds in data.iteritems():
1621             for _, item in builds.iteritems():
1622                 for ID, item_data in item.iteritems():
1623                     merged_data[ID] = item_data
1624
1625         return merged_data