1 # Copyright (c) 2018 Cisco and/or its affiliates.
2 # Licensed under the Apache License, Version 2.0 (the "License");
3 # you may not use this file except in compliance with the License.
4 # You may obtain a copy of the License at:
6 # http://www.apache.org/licenses/LICENSE-2.0
8 # Unless required by applicable law or agreed to in writing, software
9 # distributed under the License is distributed on an "AS IS" BASIS,
10 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 # See the License for the specific language governing permissions and
12 # limitations under the License.
14 """Data pre-processing
16 - extract data from output.xml files generated by Jenkins jobs and store in
18 - provide access to the data.
24 import xml.etree.ElementTree as ET
26 from robot.api import ExecutionResult, ResultVisitor
27 from robot import errors
28 from collections import OrderedDict
29 from string import replace
32 from input_data_files import download_and_unzip_data_file
35 class ExecutionChecker(ResultVisitor):
36 """Class to traverse through the test suite structure.
38 The functionality implemented in this class generates a json structure:
43 "metadata": { # Optional
44 "version": "VPP version",
45 "job": "Jenkins job name",
46 "build": "Information about the build"
50 "doc": "Suite 1 documentation",
51 "parent": "Suite 1 parent",
52 "level": "Level of the suite in the suite hierarchy"
55 "doc": "Suite N documentation",
56 "parent": "Suite 2 parent",
57 "level": "Level of the suite in the suite hierarchy"
63 "parent": "Name of the parent of the test",
64 "doc": "Test documentation"
66 "tags": ["tag 1", "tag 2", "tag n"],
67 "type": "PDR" | "NDR",
70 "unit": "pps" | "bps" | "percentage"
79 "50": { # Only for NDR
84 "10": { # Only for NDR
96 "50": { # Only for NDR
101 "10": { # Only for NDR
108 "lossTolerance": "lossTolerance", # Only for PDR
109 "vat-history": "DUT1 and DUT2 VAT History"
111 "show-run": "Show Run"
123 "metadata": { # Optional
124 "version": "VPP version",
125 "job": "Jenkins job name",
126 "build": "Information about the build"
130 "doc": "Suite 1 documentation",
131 "parent": "Suite 1 parent",
132 "level": "Level of the suite in the suite hierarchy"
135 "doc": "Suite N documentation",
136 "parent": "Suite 2 parent",
137 "level": "Level of the suite in the suite hierarchy"
143 "parent": "Name of the parent of the test",
144 "doc": "Test documentation"
145 "msg": "Test message"
146 "tags": ["tag 1", "tag 2", "tag n"],
147 "vat-history": "DUT1 and DUT2 VAT History"
148 "show-run": "Show Run"
149 "status": "PASS" | "FAIL"
157 .. note:: ID is the lowercase full path to the test.
160 REGEX_RATE = re.compile(r'^[\D\d]*FINAL_RATE:\s(\d+\.\d+)\s(\w+)')
162 REGEX_LAT_NDR = re.compile(r'^[\D\d]*'
163 r'LAT_\d+%NDR:\s\[\'(-?\d+\/-?\d+/-?\d+)\','
164 r'\s\'(-?\d+/-?\d+/-?\d+)\'\]\s\n'
165 r'LAT_\d+%NDR:\s\[\'(-?\d+/-?\d+/-?\d+)\','
166 r'\s\'(-?\d+/-?\d+/-?\d+)\'\]\s\n'
167 r'LAT_\d+%NDR:\s\[\'(-?\d+/-?\d+/-?\d+)\','
168 r'\s\'(-?\d+/-?\d+/-?\d+)\'\]')
170 REGEX_LAT_PDR = re.compile(r'^[\D\d]*'
171 r'LAT_\d+%PDR:\s\[\'(-?\d+/-?\d+/-?\d+)\','
172 r'\s\'(-?\d+/-?\d+/-?\d+)\'\][\D\d]*')
174 REGEX_TOLERANCE = re.compile(r'^[\D\d]*LOSS_ACCEPTANCE:\s(\d*\.\d*)\s'
177 REGEX_VERSION = re.compile(r"(return STDOUT Version:\s*)(.*)")
179 REGEX_TCP = re.compile(r'Total\s(rps|cps|throughput):\s([0-9]*).*$')
181 REGEX_MRR = re.compile(r'MaxReceivedRate_Results\s\[pkts/(\d*)sec\]:\s'
182 r'tx\s(\d*),\srx\s(\d*)')
184 def __init__(self, metadata):
187 :param metadata: Key-value pairs to be included in "metadata" part of
192 # Type of message to parse out from the test messages
193 self._msg_type = None
198 # Number of VAT History messages found:
200 # 1 - VAT History of DUT1
201 # 2 - VAT History of DUT2
202 self._lookup_kw_nr = 0
203 self._vat_history_lookup_nr = 0
205 # Number of Show Running messages found
207 # 1 - Show run message found
208 self._show_run_lookup_nr = 0
210 # Test ID of currently processed test- the lowercase full path to the
214 # The main data structure
216 "metadata": OrderedDict(),
217 "suites": OrderedDict(),
218 "tests": OrderedDict()
221 # Save the provided metadata
222 for key, val in metadata.items():
223 self._data["metadata"][key] = val
225 # Dictionary defining the methods used to parse different types of
228 "setup-version": self._get_version,
229 "teardown-vat-history": self._get_vat_history,
230 "test-show-runtime": self._get_show_run
235 """Getter - Data parsed from the XML file.
237 :returns: Data parsed from the XML file.
242 def _get_version(self, msg):
243 """Called when extraction of VPP version is required.
245 :param msg: Message to process.
250 if msg.message.count("return STDOUT Version:"):
251 self._version = str(re.search(self.REGEX_VERSION, msg.message).
253 self._data["metadata"]["version"] = self._version
254 self._msg_type = None
256 def _get_vat_history(self, msg):
257 """Called when extraction of VAT command history is required.
259 :param msg: Message to process.
263 if msg.message.count("VAT command history:"):
264 self._vat_history_lookup_nr += 1
265 if self._vat_history_lookup_nr == 1:
266 self._data["tests"][self._test_ID]["vat-history"] = str()
268 self._msg_type = None
269 text = re.sub("[0-9]{1,3}.[0-9]{1,3}.[0-9]{1,3}.[0-9]{1,3} "
270 "VAT command history:", "", msg.message, count=1). \
271 replace("\n\n", "\n").replace('\n', ' |br| ').\
272 replace('\r', '').replace('"', "'")
274 self._data["tests"][self._test_ID]["vat-history"] += " |br| "
275 self._data["tests"][self._test_ID]["vat-history"] += \
276 "**DUT" + str(self._vat_history_lookup_nr) + ":** " + text
278 def _get_show_run(self, msg):
279 """Called when extraction of VPP operational data (output of CLI command
280 Show Runtime) is required.
282 :param msg: Message to process.
286 if msg.message.count("return STDOUT Thread "):
287 self._show_run_lookup_nr += 1
288 if self._lookup_kw_nr == 1 and self._show_run_lookup_nr == 1:
289 self._data["tests"][self._test_ID]["show-run"] = str()
290 if self._lookup_kw_nr > 1:
291 self._msg_type = None
292 if self._show_run_lookup_nr == 1:
293 text = msg.message.replace("vat# ", "").\
294 replace("return STDOUT ", "").replace("\n\n", "\n").\
295 replace('\n', ' |br| ').\
296 replace('\r', '').replace('"', "'")
298 self._data["tests"][self._test_ID]["show-run"] += " |br| "
299 self._data["tests"][self._test_ID]["show-run"] += \
300 "**DUT" + str(self._lookup_kw_nr) + ":** |br| " + text
304 def _get_latency(self, msg, test_type):
305 """Get the latency data from the test message.
307 :param msg: Message to be parsed.
308 :param test_type: Type of the test - NDR or PDR.
311 :returns: Latencies parsed from the message.
315 if test_type == "NDR":
316 groups = re.search(self.REGEX_LAT_NDR, msg)
317 groups_range = range(1, 7)
318 elif test_type == "PDR":
319 groups = re.search(self.REGEX_LAT_PDR, msg)
320 groups_range = range(1, 3)
325 for idx in groups_range:
327 lat = [int(item) for item in str(groups.group(idx)).split('/')]
328 except (AttributeError, ValueError):
330 latencies.append(lat)
332 keys = ("min", "avg", "max")
340 latency["direction1"]["100"] = dict(zip(keys, latencies[0]))
341 latency["direction2"]["100"] = dict(zip(keys, latencies[1]))
342 if test_type == "NDR":
343 latency["direction1"]["50"] = dict(zip(keys, latencies[2]))
344 latency["direction2"]["50"] = dict(zip(keys, latencies[3]))
345 latency["direction1"]["10"] = dict(zip(keys, latencies[4]))
346 latency["direction2"]["10"] = dict(zip(keys, latencies[5]))
350 def visit_suite(self, suite):
351 """Implements traversing through the suite and its direct children.
353 :param suite: Suite to process.
357 if self.start_suite(suite) is not False:
358 suite.suites.visit(self)
359 suite.tests.visit(self)
360 self.end_suite(suite)
362 def start_suite(self, suite):
363 """Called when suite starts.
365 :param suite: Suite to process.
371 parent_name = suite.parent.name
372 except AttributeError:
375 doc_str = suite.doc.replace('"', "'").replace('\n', ' ').\
376 replace('\r', '').replace('*[', ' |br| *[').replace("*", "**")
377 doc_str = replace(doc_str, ' |br| *[', '*[', maxreplace=1)
379 self._data["suites"][suite.longname.lower().replace('"', "'").
380 replace(" ", "_")] = {
381 "name": suite.name.lower(),
383 "parent": parent_name,
384 "level": len(suite.longname.split("."))
387 suite.keywords.visit(self)
389 def end_suite(self, suite):
390 """Called when suite ends.
392 :param suite: Suite to process.
398 def visit_test(self, test):
399 """Implements traversing through the test.
401 :param test: Test to process.
405 if self.start_test(test) is not False:
406 test.keywords.visit(self)
409 def start_test(self, test):
410 """Called when test starts.
412 :param test: Test to process.
417 tags = [str(tag) for tag in test.tags]
419 test_result["name"] = test.name.lower()
420 test_result["parent"] = test.parent.name.lower()
421 test_result["tags"] = tags
422 doc_str = test.doc.replace('"', "'").replace('\n', ' '). \
423 replace('\r', '').replace('[', ' |br| [')
424 test_result["doc"] = replace(doc_str, ' |br| [', '[', maxreplace=1)
425 test_result["msg"] = test.message.replace('\n', ' |br| '). \
426 replace('\r', '').replace('"', "'")
427 if test.status == "PASS" and ("NDRPDRDISC" in tags or
430 if "NDRDISC" in tags:
432 elif "PDRDISC" in tags:
441 test_result["type"] = test_type
443 if test_type in ("NDR", "PDR"):
445 rate_value = str(re.search(
446 self.REGEX_RATE, test.message).group(1))
447 except AttributeError:
450 rate_unit = str(re.search(
451 self.REGEX_RATE, test.message).group(2))
452 except AttributeError:
455 test_result["throughput"] = dict()
456 test_result["throughput"]["value"] = \
457 int(rate_value.split('.')[0])
458 test_result["throughput"]["unit"] = rate_unit
459 test_result["latency"] = \
460 self._get_latency(test.message, test_type)
461 if test_type == "PDR":
462 test_result["lossTolerance"] = str(re.search(
463 self.REGEX_TOLERANCE, test.message).group(1))
465 elif test_type in ("TCP", ):
466 groups = re.search(self.REGEX_TCP, test.message)
467 test_result["result"] = dict()
468 test_result["result"]["value"] = int(groups.group(2))
469 test_result["result"]["unit"] = groups.group(1)
470 elif test_type in ("MRR", ):
471 groups = re.search(self.REGEX_MRR, test.message)
472 test_result["result"] = dict()
473 test_result["result"]["duration"] = int(groups.group(1))
474 test_result["result"]["tx"] = int(groups.group(2))
475 test_result["result"]["rx"] = int(groups.group(3))
476 test_result["result"]["throughput"] = int(
477 test_result["result"]["rx"] /
478 test_result["result"]["duration"])
480 test_result["status"] = test.status
482 self._test_ID = test.longname.lower()
483 self._data["tests"][self._test_ID] = test_result
485 def end_test(self, test):
486 """Called when test ends.
488 :param test: Test to process.
494 def visit_keyword(self, keyword):
495 """Implements traversing through the keyword and its child keywords.
497 :param keyword: Keyword to process.
498 :type keyword: Keyword
501 if self.start_keyword(keyword) is not False:
502 self.end_keyword(keyword)
504 def start_keyword(self, keyword):
505 """Called when keyword starts. Default implementation does nothing.
507 :param keyword: Keyword to process.
508 :type keyword: Keyword
512 if keyword.type == "setup":
513 self.visit_setup_kw(keyword)
514 elif keyword.type == "teardown":
515 self._lookup_kw_nr = 0
516 self.visit_teardown_kw(keyword)
518 self._lookup_kw_nr = 0
519 self.visit_test_kw(keyword)
520 except AttributeError:
523 def end_keyword(self, keyword):
524 """Called when keyword ends. Default implementation does nothing.
526 :param keyword: Keyword to process.
527 :type keyword: Keyword
532 def visit_test_kw(self, test_kw):
533 """Implements traversing through the test keyword and its child
536 :param test_kw: Keyword to process.
537 :type test_kw: Keyword
540 for keyword in test_kw.keywords:
541 if self.start_test_kw(keyword) is not False:
542 self.visit_test_kw(keyword)
543 self.end_test_kw(keyword)
545 def start_test_kw(self, test_kw):
546 """Called when test keyword starts. Default implementation does
549 :param test_kw: Keyword to process.
550 :type test_kw: Keyword
553 if test_kw.name.count("Show Runtime Counters On All Duts"):
554 self._lookup_kw_nr += 1
555 self._show_run_lookup_nr = 0
556 self._msg_type = "test-show-runtime"
557 test_kw.messages.visit(self)
559 def end_test_kw(self, test_kw):
560 """Called when keyword ends. Default implementation does nothing.
562 :param test_kw: Keyword to process.
563 :type test_kw: Keyword
568 def visit_setup_kw(self, setup_kw):
569 """Implements traversing through the teardown keyword and its child
572 :param setup_kw: Keyword to process.
573 :type setup_kw: Keyword
576 for keyword in setup_kw.keywords:
577 if self.start_setup_kw(keyword) is not False:
578 self.visit_setup_kw(keyword)
579 self.end_setup_kw(keyword)
581 def start_setup_kw(self, setup_kw):
582 """Called when teardown keyword starts. Default implementation does
585 :param setup_kw: Keyword to process.
586 :type setup_kw: Keyword
589 if setup_kw.name.count("Show Vpp Version On All Duts") \
590 and not self._version:
591 self._msg_type = "setup-version"
592 setup_kw.messages.visit(self)
594 def end_setup_kw(self, setup_kw):
595 """Called when keyword ends. Default implementation does nothing.
597 :param setup_kw: Keyword to process.
598 :type setup_kw: Keyword
603 def visit_teardown_kw(self, teardown_kw):
604 """Implements traversing through the teardown keyword and its child
607 :param teardown_kw: Keyword to process.
608 :type teardown_kw: Keyword
611 for keyword in teardown_kw.keywords:
612 if self.start_teardown_kw(keyword) is not False:
613 self.visit_teardown_kw(keyword)
614 self.end_teardown_kw(keyword)
616 def start_teardown_kw(self, teardown_kw):
617 """Called when teardown keyword starts. Default implementation does
620 :param teardown_kw: Keyword to process.
621 :type teardown_kw: Keyword
625 if teardown_kw.name.count("Show Vat History On All Duts"):
626 self._vat_history_lookup_nr = 0
627 self._msg_type = "teardown-vat-history"
628 teardown_kw.messages.visit(self)
630 def end_teardown_kw(self, teardown_kw):
631 """Called when keyword ends. Default implementation does nothing.
633 :param teardown_kw: Keyword to process.
634 :type teardown_kw: Keyword
639 def visit_message(self, msg):
640 """Implements visiting the message.
642 :param msg: Message to process.
646 if self.start_message(msg) is not False:
647 self.end_message(msg)
649 def start_message(self, msg):
650 """Called when message starts. Get required information from messages:
653 :param msg: Message to process.
659 self.parse_msg[self._msg_type](msg)
661 def end_message(self, msg):
662 """Called when message ends. Default implementation does nothing.
664 :param msg: Message to process.
671 class InputData(object):
674 The data is extracted from output.xml files generated by Jenkins jobs and
675 stored in pandas' DataFrames.
686 - ID: test data (as described in ExecutionChecker documentation)
689 def __init__(self, spec):
692 :param spec: Specification.
693 :type spec: Specification
700 self._input_data = None
704 """Getter - Input data.
707 :rtype: pandas.Series
709 return self._input_data
711 def metadata(self, job, build):
714 :param job: Job which metadata we want.
715 :param build: Build which metadata we want.
719 :rtype: pandas.Series
722 return self.data[job][build]["metadata"]
724 def suites(self, job, build):
727 :param job: Job which suites we want.
728 :param build: Build which suites we want.
732 :rtype: pandas.Series
735 return self.data[job][str(build)]["suites"]
737 def tests(self, job, build):
740 :param job: Job which tests we want.
741 :param build: Build which tests we want.
745 :rtype: pandas.Series
748 return self.data[job][build]["tests"]
751 def _parse_tests(job, build, get_timestamp=False):
752 """Process data from robot output.xml file and return JSON structured
755 :param job: The name of job which build output data will be processed.
756 :param build: The build which output data will be processed.
757 :param get_timestamp: If True, timestamp is read form the xml source
761 :type get_timestamp: bool
762 :returns: JSON data structure.
771 tree = ET.parse(build["file-name"])
772 root = tree.getroot()
773 metadata["generated"] = root.attrib["generated"]
775 with open(build["file-name"], 'r') as data_file:
777 result = ExecutionResult(data_file)
778 except errors.DataError as err:
779 logging.error("Error occurred while parsing output.xml: {0}".
782 checker = ExecutionChecker(metadata)
783 result.visit(checker)
787 def download_and_parse_data(self, get_timestamp=False):
788 """Download the input data files, parse input data from input files and
789 store in pandas' Series.
791 :param get_timestamp: If True, timestamp is read form the xml source
793 :type get_timestamp: bool
796 logging.info("Downloading and parsing input files ...")
799 for job, builds in self._cfg.builds.items():
800 logging.info(" Processing data from the job '{0}' ...'".
804 logging.info(" Processing the build '{0}'".
805 format(build["build"]))
806 self._cfg.set_input_state(job, build["build"], "failed")
807 if not download_and_unzip_data_file(self._cfg, job, build):
808 logging.error("It is not possible to download the input "
809 "data file from the job '{job}', build "
810 "'{build}', or it is damaged. Skipped.".
811 format(job=job, build=build["build"]))
814 logging.info(" Processing data from the build '{0}' ...".
815 format(build["build"]))
816 data = InputData._parse_tests(job, build,
817 get_timestamp=get_timestamp)
819 logging.error("Input data file from the job '{job}', build "
820 "'{build}' is damaged. Skipped.".
821 format(job=job, build=build["build"]))
824 self._cfg.set_input_state(job, build["build"], "processed")
827 remove(build["file-name"])
828 except OSError as err:
829 logging.error("Cannot remove the file '{0}': {1}".
830 format(build["file-name"], err))
832 build_data = pd.Series({
833 "metadata": pd.Series(data["metadata"].values(),
834 index=data["metadata"].keys()),
835 "suites": pd.Series(data["suites"].values(),
836 index=data["suites"].keys()),
837 "tests": pd.Series(data["tests"].values(),
838 index=data["tests"].keys())})
839 builds_data[str(build["build"])] = build_data
840 build["status"] = "processed"
841 logging.info(" Done.")
843 job_data[job] = pd.Series(builds_data.values(),
844 index=builds_data.keys())
845 logging.info(" Done.")
847 self._input_data = pd.Series(job_data.values(), index=job_data.keys())
848 logging.info("Done.")
851 def _end_of_tag(tag_filter, start=0, closer="'"):
852 """Return the index of character in the string which is the end of tag.
854 :param tag_filter: The string where the end of tag is being searched.
855 :param start: The index where the searching is stated.
856 :param closer: The character which is the tag closer.
857 :type tag_filter: str
860 :returns: The index of the tag closer.
865 idx_opener = tag_filter.index(closer, start)
866 return tag_filter.index(closer, idx_opener + 1)
871 def _condition(tag_filter):
872 """Create a conditional statement from the given tag filter.
874 :param tag_filter: Filter based on tags from the element specification.
875 :type tag_filter: str
876 :returns: Conditional statement which can be evaluated.
882 index = InputData._end_of_tag(tag_filter, index)
886 tag_filter = tag_filter[:index] + " in tags" + tag_filter[index:]
888 def filter_data(self, element, params=None, data_set="tests",
889 continue_on_error=False):
890 """Filter required data from the given jobs and builds.
892 The output data structure is:
909 :param element: Element which will use the filtered data.
910 :param params: Parameters which will be included in the output. If None,
911 all parameters are included.
912 :param data_set: The set of data to be filtered: tests, suites,
914 :param continue_on_error: Continue if there is error while reading the
915 data. The Item will be empty then
916 :type element: pandas.Series
919 :type continue_on_error: bool
920 :returns: Filtered data.
924 logging.info(" Creating the data set for the {0} '{1}'.".
925 format(element.get("type", ""), element.get("title", "")))
928 if element["filter"] in ("all", "template"):
931 cond = InputData._condition(element["filter"])
932 logging.debug(" Filter: {0}".format(cond))
934 logging.error(" No filter defined.")
938 params = element.get("parameters", None)
942 for job, builds in element["data"].items():
943 data[job] = pd.Series()
945 data[job][str(build)] = pd.Series()
947 data_iter = self.data[job][str(build)][data_set].\
950 if continue_on_error:
954 for test_ID, test_data in data_iter:
955 if eval(cond, {"tags": test_data.get("tags", "")}):
956 data[job][str(build)][test_ID] = pd.Series()
958 for param, val in test_data.items():
959 data[job][str(build)][test_ID][param] = val
963 data[job][str(build)][test_ID][param] =\
966 data[job][str(build)][test_ID][param] =\
970 except (KeyError, IndexError, ValueError) as err:
971 logging.error(" Missing mandatory parameter in the element "
972 "specification: {0}".format(err))
974 except AttributeError:
977 logging.error(" The filter '{0}' is not correct. Check if all "
978 "tags are enclosed by apostrophes.".format(cond))
982 def merge_data(data):
983 """Merge data from more jobs and builds to a simple data structure.
985 The output data structure is:
996 :param data: Data to merge.
997 :type data: pandas.Series
998 :returns: Merged data.
999 :rtype: pandas.Series
1002 logging.info(" Merging data ...")
1004 merged_data = pd.Series()
1005 for _, builds in data.iteritems():
1006 for _, item in builds.iteritems():
1007 for ID, item_data in item.iteritems():
1008 merged_data[ID] = item_data