resources/tools/presentation/input_data_parser.py

   1 # Copyright (c) 2018 Cisco and/or its affiliates.
   2 # Licensed under the Apache License, Version 2.0 (the "License");
   3 # you may not use this file except in compliance with the License.
   4 # You may obtain a copy of the License at:
   5 #
   6 #     http://www.apache.org/licenses/LICENSE-2.0
   7 #
   8 # Unless required by applicable law or agreed to in writing, software
   9 # distributed under the License is distributed on an "AS IS" BASIS,
  10 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  11 # See the License for the specific language governing permissions and
  12 # limitations under the License.
  13
  14 """Data pre-processing
  15
  16 - extract data from output.xml files generated by Jenkins jobs and store in
  17   pandas' Series,
  18 - provide access to the data.
  19 """
  20
  21 import re
  22 import pandas as pd
  23 import logging
  24 import xml.etree.ElementTree as ET
  25
  26 from robot.api import ExecutionResult, ResultVisitor
  27 from robot import errors
  28 from collections import OrderedDict
  29 from string import replace
  30 from os import remove
  31
  32 from input_data_files import download_and_unzip_data_file
  33
  34
  35 class ExecutionChecker(ResultVisitor):
  36     """Class to traverse through the test suite structure.
  37
  38     The functionality implemented in this class generates a json structure:
  39
  40     Performance tests:
  41
  42     {
  43         "metadata": {  # Optional
  44             "version": "VPP version",
  45             "job": "Jenkins job name",
  46             "build": "Information about the build"
  47         },
  48         "suites": {
  49             "Suite name 1": {
  50                 "doc": "Suite 1 documentation",
  51                 "parent": "Suite 1 parent",
  52                 "level": "Level of the suite in the suite hierarchy"
  53             }
  54             "Suite name N": {
  55                 "doc": "Suite N documentation",
  56                 "parent": "Suite 2 parent",
  57                 "level": "Level of the suite in the suite hierarchy"
  58             }
  59         }
  60         "tests": {
  61             "ID": {
  62                 "name": "Test name",
  63                 "parent": "Name of the parent of the test",
  64                 "doc": "Test documentation"
  65                 "msg": "Test message"
  66                 "tags": ["tag 1", "tag 2", "tag n"],
  67                 "type": "PDR" | "NDR",
  68                 "throughput": {
  69                     "value": int,
  70                     "unit": "pps" | "bps" | "percentage"
  71                 },
  72                 "latency": {
  73                     "direction1": {
  74                         "100": {
  75                             "min": int,
  76                             "avg": int,
  77                             "max": int
  78                         },
  79                         "50": {  # Only for NDR
  80                             "min": int,
  81                             "avg": int,
  82                             "max": int
  83                         },
  84                         "10": {  # Only for NDR
  85                             "min": int,
  86                             "avg": int,
  87                             "max": int
  88                         }
  89                     },
  90                     "direction2": {
  91                         "100": {
  92                             "min": int,
  93                             "avg": int,
  94                             "max": int
  95                         },
  96                         "50": {  # Only for NDR
  97                             "min": int,
  98                             "avg": int,
  99                             "max": int
 100                         },
 101                         "10": {  # Only for NDR
 102                             "min": int,
 103                             "avg": int,
 104                             "max": int
 105                         }
 106                     }
 107                 },
 108                 "lossTolerance": "lossTolerance",  # Only for PDR
 109                 "vat-history": "DUT1 and DUT2 VAT History"
 110                 },
 111                 "show-run": "Show Run"
 112             },
 113             "ID" {
 114                 # next test
 115             }
 116         }
 117     }
 118
 119     Functional tests:
 120
 121
 122     {
 123         "metadata": {  # Optional
 124             "version": "VPP version",
 125             "job": "Jenkins job name",
 126             "build": "Information about the build"
 127         },
 128         "suites": {
 129             "Suite name 1": {
 130                 "doc": "Suite 1 documentation",
 131                 "parent": "Suite 1 parent",
 132                 "level": "Level of the suite in the suite hierarchy"
 133             }
 134             "Suite name N": {
 135                 "doc": "Suite N documentation",
 136                 "parent": "Suite 2 parent",
 137                 "level": "Level of the suite in the suite hierarchy"
 138             }
 139         }
 140         "tests": {
 141             "ID": {
 142                 "name": "Test name",
 143                 "parent": "Name of the parent of the test",
 144                 "doc": "Test documentation"
 145                 "msg": "Test message"
 146                 "tags": ["tag 1", "tag 2", "tag n"],
 147                 "vat-history": "DUT1 and DUT2 VAT History"
 148                 "show-run": "Show Run"
 149                 "status": "PASS" | "FAIL"
 150             },
 151             "ID" {
 152                 # next test
 153             }
 154         }
 155     }
 156
 157     .. note:: ID is the lowercase full path to the test.
 158     """
 159
 160     REGEX_RATE = re.compile(r'^[\D\d]*FINAL_RATE:\s(\d+\.\d+)\s(\w+)')
 161
 162     REGEX_LAT_NDR = re.compile(r'^[\D\d]*'
 163                                r'LAT_\d+%NDR:\s\[\'(-?\d+\/-?\d+/-?\d+)\','
 164                                r'\s\'(-?\d+/-?\d+/-?\d+)\'\]\s\n'
 165                                r'LAT_\d+%NDR:\s\[\'(-?\d+/-?\d+/-?\d+)\','
 166                                r'\s\'(-?\d+/-?\d+/-?\d+)\'\]\s\n'
 167                                r'LAT_\d+%NDR:\s\[\'(-?\d+/-?\d+/-?\d+)\','
 168                                r'\s\'(-?\d+/-?\d+/-?\d+)\'\]')
 169
 170     REGEX_LAT_PDR = re.compile(r'^[\D\d]*'
 171                                r'LAT_\d+%PDR:\s\[\'(-?\d+/-?\d+/-?\d+)\','
 172                                r'\s\'(-?\d+/-?\d+/-?\d+)\'\][\D\d]*')
 173
 174     REGEX_TOLERANCE = re.compile(r'^[\D\d]*LOSS_ACCEPTANCE:\s(\d*\.\d*)\s'
 175                                  r'[\D\d]*')
 176
 177     REGEX_VERSION = re.compile(r"(return STDOUT Version:\s*)(.*)")
 178
 179     REGEX_TCP = re.compile(r'Total\s(rps|cps|throughput):\s([0-9]*).*$')
 180
 181     REGEX_MRR = re.compile(r'MaxReceivedRate_Results\s\[pkts/(\d*)sec\]:\s'
 182                            r'tx\s(\d*),\srx\s(\d*)')
 183
 184     def __init__(self, metadata):
 185         """Initialisation.
 186
 187         :param metadata: Key-value pairs to be included in "metadata" part of
 188         JSON structure.
 189         :type metadata: dict
 190         """
 191
 192         # Type of message to parse out from the test messages
 193         self._msg_type = None
 194
 195         # VPP version
 196         self._version = None
 197
 198         # Number of VAT History messages found:
 199         # 0 - no message
 200         # 1 - VAT History of DUT1
 201         # 2 - VAT History of DUT2
 202         self._lookup_kw_nr = 0
 203         self._vat_history_lookup_nr = 0
 204
 205         # Number of Show Running messages found
 206         # 0 - no message
 207         # 1 - Show run message found
 208         self._show_run_lookup_nr = 0
 209
 210         # Test ID of currently processed test- the lowercase full path to the
 211         # test
 212         self._test_ID = None
 213
 214         # The main data structure
 215         self._data = {
 216             "metadata": OrderedDict(),
 217             "suites": OrderedDict(),
 218             "tests": OrderedDict()
 219         }
 220
 221         # Save the provided metadata
 222         for key, val in metadata.items():
 223             self._data["metadata"][key] = val
 224
 225         # Dictionary defining the methods used to parse different types of
 226         # messages
 227         self.parse_msg = {
 228             "setup-version": self._get_version,
 229             "teardown-vat-history": self._get_vat_history,
 230             "test-show-runtime": self._get_show_run
 231         }
 232
 233     @property
 234     def data(self):
 235         """Getter - Data parsed from the XML file.
 236
 237         :returns: Data parsed from the XML file.
 238         :rtype: dict
 239         """
 240         return self._data
 241
 242     def _get_version(self, msg):
 243         """Called when extraction of VPP version is required.
 244
 245         :param msg: Message to process.
 246         :type msg: Message
 247         :returns: Nothing.
 248         """
 249
 250         if msg.message.count("return STDOUT Version:"):
 251             self._version = str(re.search(self.REGEX_VERSION, msg.message).
 252                                 group(2))
 253             self._data["metadata"]["version"] = self._version
 254             self._data["metadata"]["generated"] = msg.timestamp
 255             self._msg_type = None
 256
 257     def _get_vat_history(self, msg):
 258         """Called when extraction of VAT command history is required.
 259
 260         :param msg: Message to process.
 261         :type msg: Message
 262         :returns: Nothing.
 263         """
 264         if msg.message.count("VAT command history:"):
 265             self._vat_history_lookup_nr += 1
 266             if self._vat_history_lookup_nr == 1:
 267                 self._data["tests"][self._test_ID]["vat-history"] = str()
 268             else:
 269                 self._msg_type = None
 270             text = re.sub("[0-9]{1,3}.[0-9]{1,3}.[0-9]{1,3}.[0-9]{1,3} "
 271                           "VAT command history:", "", msg.message, count=1). \
 272                 replace("\n\n", "\n").replace('\n', ' |br| ').\
 273                 replace('\r', '').replace('"', "'")
 274
 275             self._data["tests"][self._test_ID]["vat-history"] += " |br| "
 276             self._data["tests"][self._test_ID]["vat-history"] += \
 277                 "**DUT" + str(self._vat_history_lookup_nr) + ":** " + text
 278
 279     def _get_show_run(self, msg):
 280         """Called when extraction of VPP operational data (output of CLI command
 281         Show Runtime) is required.
 282
 283         :param msg: Message to process.
 284         :type msg: Message
 285         :returns: Nothing.
 286         """
 287         if msg.message.count("return STDOUT Thread "):
 288             self._show_run_lookup_nr += 1
 289             if self._lookup_kw_nr == 1 and self._show_run_lookup_nr == 1:
 290                 self._data["tests"][self._test_ID]["show-run"] = str()
 291             if self._lookup_kw_nr > 1:
 292                 self._msg_type = None
 293             if self._show_run_lookup_nr == 1:
 294                 text = msg.message.replace("vat# ", "").\
 295                     replace("return STDOUT ", "").replace("\n\n", "\n").\
 296                     replace('\n', ' |br| ').\
 297                     replace('\r', '').replace('"', "'")
 298                 try:
 299                     self._data["tests"][self._test_ID]["show-run"] += " |br| "
 300                     self._data["tests"][self._test_ID]["show-run"] += \
 301                         "**DUT" + str(self._lookup_kw_nr) + ":** |br| " + text
 302                 except KeyError:
 303                     pass
 304
 305     def _get_latency(self, msg, test_type):
 306         """Get the latency data from the test message.
 307
 308         :param msg: Message to be parsed.
 309         :param test_type: Type of the test - NDR or PDR.
 310         :type msg: str
 311         :type test_type: str
 312         :returns: Latencies parsed from the message.
 313         :rtype: dict
 314         """
 315
 316         if test_type == "NDR":
 317             groups = re.search(self.REGEX_LAT_NDR, msg)
 318             groups_range = range(1, 7)
 319         elif test_type == "PDR":
 320             groups = re.search(self.REGEX_LAT_PDR, msg)
 321             groups_range = range(1, 3)
 322         else:
 323             return {}
 324
 325         latencies = list()
 326         for idx in groups_range:
 327             try:
 328                 lat = [int(item) for item in str(groups.group(idx)).split('/')]
 329             except (AttributeError, ValueError):
 330                 lat = [-1, -1, -1]
 331             latencies.append(lat)
 332
 333         keys = ("min", "avg", "max")
 334         latency = {
 335             "direction1": {
 336             },
 337             "direction2": {
 338             }
 339         }
 340
 341         latency["direction1"]["100"] = dict(zip(keys, latencies[0]))
 342         latency["direction2"]["100"] = dict(zip(keys, latencies[1]))
 343         if test_type == "NDR":
 344             latency["direction1"]["50"] = dict(zip(keys, latencies[2]))
 345             latency["direction2"]["50"] = dict(zip(keys, latencies[3]))
 346             latency["direction1"]["10"] = dict(zip(keys, latencies[4]))
 347             latency["direction2"]["10"] = dict(zip(keys, latencies[5]))
 348
 349         return latency
 350
 351     def visit_suite(self, suite):
 352         """Implements traversing through the suite and its direct children.
 353
 354         :param suite: Suite to process.
 355         :type suite: Suite
 356         :returns: Nothing.
 357         """
 358         if self.start_suite(suite) is not False:
 359             suite.suites.visit(self)
 360             suite.tests.visit(self)
 361             self.end_suite(suite)
 362
 363     def start_suite(self, suite):
 364         """Called when suite starts.
 365
 366         :param suite: Suite to process.
 367         :type suite: Suite
 368         :returns: Nothing.
 369         """
 370
 371         try:
 372             parent_name = suite.parent.name
 373         except AttributeError:
 374             return
 375
 376         doc_str = suite.doc.replace('"', "'").replace('\n', ' ').\
 377             replace('\r', '').replace('*[', ' |br| *[').replace("*", "**")
 378         doc_str = replace(doc_str, ' |br| *[', '*[', maxreplace=1)
 379
 380         self._data["suites"][suite.longname.lower().replace('"', "'").
 381             replace(" ", "_")] = {
 382                 "name": suite.name.lower(),
 383                 "doc": doc_str,
 384                 "parent": parent_name,
 385                 "level": len(suite.longname.split("."))
 386             }
 387
 388         suite.keywords.visit(self)
 389
 390     def end_suite(self, suite):
 391         """Called when suite ends.
 392
 393         :param suite: Suite to process.
 394         :type suite: Suite
 395         :returns: Nothing.
 396         """
 397         pass
 398
 399     def visit_test(self, test):
 400         """Implements traversing through the test.
 401
 402         :param test: Test to process.
 403         :type test: Test
 404         :returns: Nothing.
 405         """
 406         if self.start_test(test) is not False:
 407             test.keywords.visit(self)
 408             self.end_test(test)
 409
 410     def start_test(self, test):
 411         """Called when test starts.
 412
 413         :param test: Test to process.
 414         :type test: Test
 415         :returns: Nothing.
 416         """
 417
 418         tags = [str(tag) for tag in test.tags]
 419         test_result = dict()
 420         test_result["name"] = test.name.lower()
 421         test_result["parent"] = test.parent.name.lower()
 422         test_result["tags"] = tags
 423         doc_str = test.doc.replace('"', "'").replace('\n', ' '). \
 424             replace('\r', '').replace('[', ' |br| [')
 425         test_result["doc"] = replace(doc_str, ' |br| [', '[', maxreplace=1)
 426         test_result["msg"] = test.message.replace('\n', ' |br| '). \
 427             replace('\r', '').replace('"', "'")
 428         if test.status == "PASS" and ("NDRPDRDISC" in tags or
 429                                       "TCP" in tags or
 430                                       "MRR" in tags):
 431             if "NDRDISC" in tags:
 432                 test_type = "NDR"
 433             elif "PDRDISC" in tags:
 434                 test_type = "PDR"
 435             elif "TCP" in tags:
 436                 test_type = "TCP"
 437             elif "MRR" in tags:
 438                 test_type = "MRR"
 439             else:
 440                 return
 441
 442             test_result["type"] = test_type
 443
 444             if test_type in ("NDR", "PDR"):
 445                 try:
 446                     rate_value = str(re.search(
 447                         self.REGEX_RATE, test.message).group(1))
 448                 except AttributeError:
 449                     rate_value = "-1"
 450                 try:
 451                     rate_unit = str(re.search(
 452                         self.REGEX_RATE, test.message).group(2))
 453                 except AttributeError:
 454                     rate_unit = "-1"
 455
 456                 test_result["throughput"] = dict()
 457                 test_result["throughput"]["value"] = \
 458                     int(rate_value.split('.')[0])
 459                 test_result["throughput"]["unit"] = rate_unit
 460                 test_result["latency"] = \
 461                     self._get_latency(test.message, test_type)
 462                 if test_type == "PDR":
 463                     test_result["lossTolerance"] = str(re.search(
 464                         self.REGEX_TOLERANCE, test.message).group(1))
 465
 466             elif test_type in ("TCP", ):
 467                 groups = re.search(self.REGEX_TCP, test.message)
 468                 test_result["result"] = dict()
 469                 test_result["result"]["value"] = int(groups.group(2))
 470                 test_result["result"]["unit"] = groups.group(1)
 471             elif test_type in ("MRR", ):
 472                 groups = re.search(self.REGEX_MRR, test.message)
 473                 test_result["result"] = dict()
 474                 test_result["result"]["duration"] = int(groups.group(1))
 475                 test_result["result"]["tx"] = int(groups.group(2))
 476                 test_result["result"]["rx"] = int(groups.group(3))
 477                 test_result["result"]["throughput"] = int(
 478                     test_result["result"]["rx"] /
 479                     test_result["result"]["duration"])
 480         else:
 481             test_result["status"] = test.status
 482
 483         self._test_ID = test.longname.lower()
 484         self._data["tests"][self._test_ID] = test_result
 485
 486     def end_test(self, test):
 487         """Called when test ends.
 488
 489         :param test: Test to process.
 490         :type test: Test
 491         :returns: Nothing.
 492         """
 493         pass
 494
 495     def visit_keyword(self, keyword):
 496         """Implements traversing through the keyword and its child keywords.
 497
 498         :param keyword: Keyword to process.
 499         :type keyword: Keyword
 500         :returns: Nothing.
 501         """
 502         if self.start_keyword(keyword) is not False:
 503             self.end_keyword(keyword)
 504
 505     def start_keyword(self, keyword):
 506         """Called when keyword starts. Default implementation does nothing.
 507
 508         :param keyword: Keyword to process.
 509         :type keyword: Keyword
 510         :returns: Nothing.
 511         """
 512         try:
 513             if keyword.type == "setup":
 514                 self.visit_setup_kw(keyword)
 515             elif keyword.type == "teardown":
 516                 self._lookup_kw_nr = 0
 517                 self.visit_teardown_kw(keyword)
 518             else:
 519                 self._lookup_kw_nr = 0
 520                 self.visit_test_kw(keyword)
 521         except AttributeError:
 522             pass
 523
 524     def end_keyword(self, keyword):
 525         """Called when keyword ends. Default implementation does nothing.
 526
 527         :param keyword: Keyword to process.
 528         :type keyword: Keyword
 529         :returns: Nothing.
 530         """
 531         pass
 532
 533     def visit_test_kw(self, test_kw):
 534         """Implements traversing through the test keyword and its child
 535         keywords.
 536
 537         :param test_kw: Keyword to process.
 538         :type test_kw: Keyword
 539         :returns: Nothing.
 540         """
 541         for keyword in test_kw.keywords:
 542             if self.start_test_kw(keyword) is not False:
 543                 self.visit_test_kw(keyword)
 544                 self.end_test_kw(keyword)
 545
 546     def start_test_kw(self, test_kw):
 547         """Called when test keyword starts. Default implementation does
 548         nothing.
 549
 550         :param test_kw: Keyword to process.
 551         :type test_kw: Keyword
 552         :returns: Nothing.
 553         """
 554         if test_kw.name.count("Show Runtime Counters On All Duts"):
 555             self._lookup_kw_nr += 1
 556             self._show_run_lookup_nr = 0
 557             self._msg_type = "test-show-runtime"
 558             test_kw.messages.visit(self)
 559
 560     def end_test_kw(self, test_kw):
 561         """Called when keyword ends. Default implementation does nothing.
 562
 563         :param test_kw: Keyword to process.
 564         :type test_kw: Keyword
 565         :returns: Nothing.
 566         """
 567         pass
 568
 569     def visit_setup_kw(self, setup_kw):
 570         """Implements traversing through the teardown keyword and its child
 571         keywords.
 572
 573         :param setup_kw: Keyword to process.
 574         :type setup_kw: Keyword
 575         :returns: Nothing.
 576         """
 577         for keyword in setup_kw.keywords:
 578             if self.start_setup_kw(keyword) is not False:
 579                 self.visit_setup_kw(keyword)
 580                 self.end_setup_kw(keyword)
 581
 582     def start_setup_kw(self, setup_kw):
 583         """Called when teardown keyword starts. Default implementation does
 584         nothing.
 585
 586         :param setup_kw: Keyword to process.
 587         :type setup_kw: Keyword
 588         :returns: Nothing.
 589         """
 590         if setup_kw.name.count("Show Vpp Version On All Duts") \
 591                 and not self._version:
 592             self._msg_type = "setup-version"
 593             setup_kw.messages.visit(self)
 594
 595     def end_setup_kw(self, setup_kw):
 596         """Called when keyword ends. Default implementation does nothing.
 597
 598         :param setup_kw: Keyword to process.
 599         :type setup_kw: Keyword
 600         :returns: Nothing.
 601         """
 602         pass
 603
 604     def visit_teardown_kw(self, teardown_kw):
 605         """Implements traversing through the teardown keyword and its child
 606         keywords.
 607
 608         :param teardown_kw: Keyword to process.
 609         :type teardown_kw: Keyword
 610         :returns: Nothing.
 611         """
 612         for keyword in teardown_kw.keywords:
 613             if self.start_teardown_kw(keyword) is not False:
 614                 self.visit_teardown_kw(keyword)
 615                 self.end_teardown_kw(keyword)
 616
 617     def start_teardown_kw(self, teardown_kw):
 618         """Called when teardown keyword starts. Default implementation does
 619         nothing.
 620
 621         :param teardown_kw: Keyword to process.
 622         :type teardown_kw: Keyword
 623         :returns: Nothing.
 624         """
 625
 626         if teardown_kw.name.count("Show Vat History On All Duts"):
 627             self._vat_history_lookup_nr = 0
 628             self._msg_type = "teardown-vat-history"
 629             teardown_kw.messages.visit(self)
 630
 631     def end_teardown_kw(self, teardown_kw):
 632         """Called when keyword ends. Default implementation does nothing.
 633
 634         :param teardown_kw: Keyword to process.
 635         :type teardown_kw: Keyword
 636         :returns: Nothing.
 637         """
 638         pass
 639
 640     def visit_message(self, msg):
 641         """Implements visiting the message.
 642
 643         :param msg: Message to process.
 644         :type msg: Message
 645         :returns: Nothing.
 646         """
 647         if self.start_message(msg) is not False:
 648             self.end_message(msg)
 649
 650     def start_message(self, msg):
 651         """Called when message starts. Get required information from messages:
 652         - VPP version.
 653
 654         :param msg: Message to process.
 655         :type msg: Message
 656         :returns: Nothing.
 657         """
 658
 659         if self._msg_type:
 660             self.parse_msg[self._msg_type](msg)
 661
 662     def end_message(self, msg):
 663         """Called when message ends. Default implementation does nothing.
 664
 665         :param msg: Message to process.
 666         :type msg: Message
 667         :returns: Nothing.
 668         """
 669         pass
 670
 671
 672 class InputData(object):
 673     """Input data
 674
 675     The data is extracted from output.xml files generated by Jenkins jobs and
 676     stored in pandas' DataFrames.
 677
 678     The data structure:
 679     - job name
 680       - build number
 681         - metadata
 682           - job
 683           - build
 684           - vpp version
 685         - suites
 686         - tests
 687           - ID: test data (as described in ExecutionChecker documentation)
 688     """
 689
 690     def __init__(self, spec):
 691         """Initialization.
 692
 693         :param spec: Specification.
 694         :type spec: Specification
 695         """
 696
 697         # Specification:
 698         self._cfg = spec
 699
 700         # Data store:
 701         self._input_data = None
 702
 703     @property
 704     def data(self):
 705         """Getter - Input data.
 706
 707         :returns: Input data
 708         :rtype: pandas.Series
 709         """
 710         return self._input_data
 711
 712     def metadata(self, job, build):
 713         """Getter - metadata
 714
 715         :param job: Job which metadata we want.
 716         :param build: Build which metadata we want.
 717         :type job: str
 718         :type build: str
 719         :returns: Metadata
 720         :rtype: pandas.Series
 721         """
 722
 723         return self.data[job][build]["metadata"]
 724
 725     def suites(self, job, build):
 726         """Getter - suites
 727
 728         :param job: Job which suites we want.
 729         :param build: Build which suites we want.
 730         :type job: str
 731         :type build: str
 732         :returns: Suites.
 733         :rtype: pandas.Series
 734         """
 735
 736         return self.data[job][str(build)]["suites"]
 737
 738     def tests(self, job, build):
 739         """Getter - tests
 740
 741         :param job: Job which tests we want.
 742         :param build: Build which tests we want.
 743         :type job: str
 744         :type build: str
 745         :returns: Tests.
 746         :rtype: pandas.Series
 747         """
 748
 749         return self.data[job][build]["tests"]
 750
 751     @staticmethod
 752     def _parse_tests(job, build):
 753         """Process data from robot output.xml file and return JSON structured
 754         data.
 755
 756         :param job: The name of job which build output data will be processed.
 757         :param build: The build which output data will be processed.
 758         :type job: str
 759         :type build: dict
 760         :returns: JSON data structure.
 761         :rtype: dict
 762         """
 763
 764         metadata = {
 765             "job": job,
 766             "build": build
 767         }
 768
 769         with open(build["file-name"], 'r') as data_file:
 770             try:
 771                 result = ExecutionResult(data_file)
 772             except errors.DataError as err:
 773                 logging.error("Error occurred while parsing output.xml: {0}".
 774                               format(err))
 775                 return None
 776         checker = ExecutionChecker(metadata)
 777         result.visit(checker)
 778
 779         return checker.data
 780
 781     def download_and_parse_data(self):
 782         """Download the input data files, parse input data from input files and
 783         store in pandas' Series.
 784         """
 785
 786         logging.info("Downloading and parsing input files ...")
 787
 788         job_data = dict()
 789         for job, builds in self._cfg.builds.items():
 790             logging.info("  Processing data from the job '{0}' ...".
 791                          format(job))
 792             builds_data = dict()
 793             for build in builds:
 794                 logging.info("    Processing the build '{0}'".
 795                              format(build["build"]))
 796                 self._cfg.set_input_state(job, build["build"], "failed")
 797                 if not download_and_unzip_data_file(self._cfg, job, build):
 798                     logging.error("It is not possible to download the input "
 799                                   "data file from the job '{job}', build "
 800                                   "'{build}', or it is damaged. Skipped.".
 801                                   format(job=job, build=build["build"]))
 802                     continue
 803
 804                 logging.info("      Processing data from the build '{0}' ...".
 805                              format(build["build"]))
 806                 data = InputData._parse_tests(job, build)
 807                 if data is None:
 808                     logging.error("Input data file from the job '{job}', build "
 809                                   "'{build}' is damaged. Skipped.".
 810                                   format(job=job, build=build["build"]))
 811                     continue
 812
 813                 self._cfg.set_input_state(job, build["build"], "processed")
 814
 815                 try:
 816                     remove(build["file-name"])
 817                 except OSError as err:
 818                     logging.error("Cannot remove the file '{0}': {1}".
 819                                   format(build["file-name"], err))
 820
 821                 build_data = pd.Series({
 822                     "metadata": pd.Series(data["metadata"].values(),
 823                                           index=data["metadata"].keys()),
 824                     "suites": pd.Series(data["suites"].values(),
 825                                         index=data["suites"].keys()),
 826                     "tests": pd.Series(data["tests"].values(),
 827                                        index=data["tests"].keys())})
 828                 builds_data[str(build["build"])] = build_data
 829                 build["status"] = "processed"
 830                 logging.info("    Done.")
 831
 832             job_data[job] = pd.Series(builds_data.values(),
 833                                       index=builds_data.keys())
 834             logging.info("  Done.")
 835
 836         self._input_data = pd.Series(job_data.values(), index=job_data.keys())
 837         logging.info("Done.")
 838
 839     @staticmethod
 840     def _end_of_tag(tag_filter, start=0, closer="'"):
 841         """Return the index of character in the string which is the end of tag.
 842
 843         :param tag_filter: The string where the end of tag is being searched.
 844         :param start: The index where the searching is stated.
 845         :param closer: The character which is the tag closer.
 846         :type tag_filter: str
 847         :type start: int
 848         :type closer: str
 849         :returns: The index of the tag closer.
 850         :rtype: int
 851         """
 852
 853         try:
 854             idx_opener = tag_filter.index(closer, start)
 855             return tag_filter.index(closer, idx_opener + 1)
 856         except ValueError:
 857             return None
 858
 859     @staticmethod
 860     def _condition(tag_filter):
 861         """Create a conditional statement from the given tag filter.
 862
 863         :param tag_filter: Filter based on tags from the element specification.
 864         :type tag_filter: str
 865         :returns: Conditional statement which can be evaluated.
 866         :rtype: str
 867         """
 868
 869         index = 0
 870         while True:
 871             index = InputData._end_of_tag(tag_filter, index)
 872             if index is None:
 873                 return tag_filter
 874             index += 1
 875             tag_filter = tag_filter[:index] + " in tags" + tag_filter[index:]
 876
 877     def filter_data(self, element, params=None, data_set="tests",
 878                     continue_on_error=False):
 879         """Filter required data from the given jobs and builds.
 880
 881         The output data structure is:
 882
 883         - job 1
 884           - build 1
 885             - test (suite) 1 ID:
 886               - param 1
 887               - param 2
 888               ...
 889               - param n
 890             ...
 891             - test (suite) n ID:
 892             ...
 893           ...
 894           - build n
 895         ...
 896         - job n
 897
 898         :param element: Element which will use the filtered data.
 899         :param params: Parameters which will be included in the output. If None,
 900         all parameters are included.
 901         :param data_set: The set of data to be filtered: tests, suites,
 902         metadata.
 903         :param continue_on_error: Continue if there is error while reading the
 904         data. The Item will be empty then
 905         :type element: pandas.Series
 906         :type params: list
 907         :type data_set: str
 908         :type continue_on_error: bool
 909         :returns: Filtered data.
 910         :rtype pandas.Series
 911         """
 912
 913         logging.info("    Creating the data set for the {0} '{1}'.".
 914                      format(element.get("type", ""), element.get("title", "")))
 915
 916         try:
 917             if element["filter"] in ("all", "template"):
 918                 cond = "True"
 919             else:
 920                 cond = InputData._condition(element["filter"])
 921             logging.debug("   Filter: {0}".format(cond))
 922         except KeyError:
 923             logging.error("  No filter defined.")
 924             return None
 925
 926         if params is None:
 927             params = element.get("parameters", None)
 928
 929         data = pd.Series()
 930         try:
 931             for job, builds in element["data"].items():
 932                 data[job] = pd.Series()
 933                 for build in builds:
 934                     data[job][str(build)] = pd.Series()
 935                     try:
 936                         data_iter = self.data[job][str(build)][data_set].\
 937                             iteritems()
 938                     except KeyError:
 939                         if continue_on_error:
 940                             continue
 941                         else:
 942                             return None
 943                     for test_ID, test_data in data_iter:
 944                         if eval(cond, {"tags": test_data.get("tags", "")}):
 945                             data[job][str(build)][test_ID] = pd.Series()
 946                             if params is None:
 947                                 for param, val in test_data.items():
 948                                     data[job][str(build)][test_ID][param] = val
 949                             else:
 950                                 for param in params:
 951                                     try:
 952                                         data[job][str(build)][test_ID][param] =\
 953                                             test_data[param]
 954                                     except KeyError:
 955                                         data[job][str(build)][test_ID][param] =\
 956                                             "No Data"
 957             return data
 958
 959         except (KeyError, IndexError, ValueError) as err:
 960             logging.error("   Missing mandatory parameter in the element "
 961                           "specification: {0}".format(err))
 962             return None
 963         except AttributeError:
 964             return None
 965         except SyntaxError:
 966             logging.error("   The filter '{0}' is not correct. Check if all "
 967                           "tags are enclosed by apostrophes.".format(cond))
 968             return None
 969
 970     @staticmethod
 971     def merge_data(data):
 972         """Merge data from more jobs and builds to a simple data structure.
 973
 974         The output data structure is:
 975
 976         - test (suite) 1 ID:
 977           - param 1
 978           - param 2
 979           ...
 980           - param n
 981         ...
 982         - test (suite) n ID:
 983         ...
 984
 985         :param data: Data to merge.
 986         :type data: pandas.Series
 987         :returns: Merged data.
 988         :rtype: pandas.Series
 989         """
 990
 991         logging.info("    Merging data ...")
 992
 993         merged_data = pd.Series()
 994         for _, builds in data.iteritems():
 995             for _, item in builds.iteritems():
 996                 for ID, item_data in item.iteritems():
 997                     merged_data[ID] = item_data
 998
 999         return merged_data