1 # Copyright (c) 2018 Cisco and/or its affiliates.
2 # Licensed under the Apache License, Version 2.0 (the "License");
3 # you may not use this file except in compliance with the License.
4 # You may obtain a copy of the License at:
6 # http://www.apache.org/licenses/LICENSE-2.0
8 # Unless required by applicable law or agreed to in writing, software
9 # distributed under the License is distributed on an "AS IS" BASIS,
10 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 # See the License for the specific language governing permissions and
12 # limitations under the License.
14 """Data pre-processing
16 - extract data from output.xml files generated by Jenkins jobs and store in
18 - provide access to the data.
24 import xml.etree.ElementTree as ET
26 from robot.api import ExecutionResult, ResultVisitor
27 from robot import errors
28 from collections import OrderedDict
29 from string import replace
32 from input_data_files import download_and_unzip_data_file
35 class ExecutionChecker(ResultVisitor):
36 """Class to traverse through the test suite structure.
38 The functionality implemented in this class generates a json structure:
43 "metadata": { # Optional
44 "version": "VPP version",
45 "job": "Jenkins job name",
46 "build": "Information about the build"
50 "doc": "Suite 1 documentation",
51 "parent": "Suite 1 parent",
52 "level": "Level of the suite in the suite hierarchy"
55 "doc": "Suite N documentation",
56 "parent": "Suite 2 parent",
57 "level": "Level of the suite in the suite hierarchy"
63 "parent": "Name of the parent of the test",
64 "doc": "Test documentation"
66 "tags": ["tag 1", "tag 2", "tag n"],
67 "type": "PDR" | "NDR",
70 "unit": "pps" | "bps" | "percentage"
79 "50": { # Only for NDR
84 "10": { # Only for NDR
96 "50": { # Only for NDR
101 "10": { # Only for NDR
108 "lossTolerance": "lossTolerance", # Only for PDR
109 "vat-history": "DUT1 and DUT2 VAT History"
111 "show-run": "Show Run"
123 "metadata": { # Optional
124 "version": "VPP version",
125 "job": "Jenkins job name",
126 "build": "Information about the build"
130 "doc": "Suite 1 documentation",
131 "parent": "Suite 1 parent",
132 "level": "Level of the suite in the suite hierarchy"
135 "doc": "Suite N documentation",
136 "parent": "Suite 2 parent",
137 "level": "Level of the suite in the suite hierarchy"
143 "parent": "Name of the parent of the test",
144 "doc": "Test documentation"
145 "msg": "Test message"
146 "tags": ["tag 1", "tag 2", "tag n"],
147 "vat-history": "DUT1 and DUT2 VAT History"
148 "show-run": "Show Run"
149 "status": "PASS" | "FAIL"
157 .. note:: ID is the lowercase full path to the test.
160 REGEX_RATE = re.compile(r'^[\D\d]*FINAL_RATE:\s(\d+\.\d+)\s(\w+)')
162 REGEX_LAT_NDR = re.compile(r'^[\D\d]*'
163 r'LAT_\d+%NDR:\s\[\'(-?\d+\/-?\d+/-?\d+)\','
164 r'\s\'(-?\d+/-?\d+/-?\d+)\'\]\s\n'
165 r'LAT_\d+%NDR:\s\[\'(-?\d+/-?\d+/-?\d+)\','
166 r'\s\'(-?\d+/-?\d+/-?\d+)\'\]\s\n'
167 r'LAT_\d+%NDR:\s\[\'(-?\d+/-?\d+/-?\d+)\','
168 r'\s\'(-?\d+/-?\d+/-?\d+)\'\]')
170 REGEX_LAT_PDR = re.compile(r'^[\D\d]*'
171 r'LAT_\d+%PDR:\s\[\'(-?\d+/-?\d+/-?\d+)\','
172 r'\s\'(-?\d+/-?\d+/-?\d+)\'\][\D\d]*')
174 REGEX_TOLERANCE = re.compile(r'^[\D\d]*LOSS_ACCEPTANCE:\s(\d*\.\d*)\s'
177 REGEX_VERSION = re.compile(r"(return STDOUT Version:\s*)(.*)")
179 REGEX_TCP = re.compile(r'Total\s(rps|cps|throughput):\s([0-9]*).*$')
181 REGEX_MRR = re.compile(r'MaxReceivedRate_Results\s\[pkts/(\d*)sec\]:\s'
182 r'tx\s(\d*),\srx\s(\d*)')
184 def __init__(self, metadata):
187 :param metadata: Key-value pairs to be included in "metadata" part of
192 # Type of message to parse out from the test messages
193 self._msg_type = None
198 # Number of VAT History messages found:
200 # 1 - VAT History of DUT1
201 # 2 - VAT History of DUT2
202 self._lookup_kw_nr = 0
203 self._vat_history_lookup_nr = 0
205 # Number of Show Running messages found
207 # 1 - Show run message found
208 self._show_run_lookup_nr = 0
210 # Test ID of currently processed test- the lowercase full path to the
214 # The main data structure
216 "metadata": OrderedDict(),
217 "suites": OrderedDict(),
218 "tests": OrderedDict()
221 # Save the provided metadata
222 for key, val in metadata.items():
223 self._data["metadata"][key] = val
225 # Dictionary defining the methods used to parse different types of
228 "setup-version": self._get_version,
229 "teardown-vat-history": self._get_vat_history,
230 "test-show-runtime": self._get_show_run
235 """Getter - Data parsed from the XML file.
237 :returns: Data parsed from the XML file.
242 def _get_version(self, msg):
243 """Called when extraction of VPP version is required.
245 :param msg: Message to process.
250 if msg.message.count("return STDOUT Version:"):
251 self._version = str(re.search(self.REGEX_VERSION, msg.message).
253 self._data["metadata"]["version"] = self._version
254 self._data["metadata"]["generated"] = msg.timestamp
255 self._msg_type = None
257 def _get_vat_history(self, msg):
258 """Called when extraction of VAT command history is required.
260 :param msg: Message to process.
264 if msg.message.count("VAT command history:"):
265 self._vat_history_lookup_nr += 1
266 if self._vat_history_lookup_nr == 1:
267 self._data["tests"][self._test_ID]["vat-history"] = str()
269 self._msg_type = None
270 text = re.sub("[0-9]{1,3}.[0-9]{1,3}.[0-9]{1,3}.[0-9]{1,3} "
271 "VAT command history:", "", msg.message, count=1). \
272 replace("\n\n", "\n").replace('\n', ' |br| ').\
273 replace('\r', '').replace('"', "'")
275 self._data["tests"][self._test_ID]["vat-history"] += " |br| "
276 self._data["tests"][self._test_ID]["vat-history"] += \
277 "**DUT" + str(self._vat_history_lookup_nr) + ":** " + text
279 def _get_show_run(self, msg):
280 """Called when extraction of VPP operational data (output of CLI command
281 Show Runtime) is required.
283 :param msg: Message to process.
287 if msg.message.count("return STDOUT Thread "):
288 self._show_run_lookup_nr += 1
289 if self._lookup_kw_nr == 1 and self._show_run_lookup_nr == 1:
290 self._data["tests"][self._test_ID]["show-run"] = str()
291 if self._lookup_kw_nr > 1:
292 self._msg_type = None
293 if self._show_run_lookup_nr == 1:
294 text = msg.message.replace("vat# ", "").\
295 replace("return STDOUT ", "").replace("\n\n", "\n").\
296 replace('\n', ' |br| ').\
297 replace('\r', '').replace('"', "'")
299 self._data["tests"][self._test_ID]["show-run"] += " |br| "
300 self._data["tests"][self._test_ID]["show-run"] += \
301 "**DUT" + str(self._lookup_kw_nr) + ":** |br| " + text
305 def _get_latency(self, msg, test_type):
306 """Get the latency data from the test message.
308 :param msg: Message to be parsed.
309 :param test_type: Type of the test - NDR or PDR.
312 :returns: Latencies parsed from the message.
316 if test_type == "NDR":
317 groups = re.search(self.REGEX_LAT_NDR, msg)
318 groups_range = range(1, 7)
319 elif test_type == "PDR":
320 groups = re.search(self.REGEX_LAT_PDR, msg)
321 groups_range = range(1, 3)
326 for idx in groups_range:
328 lat = [int(item) for item in str(groups.group(idx)).split('/')]
329 except (AttributeError, ValueError):
331 latencies.append(lat)
333 keys = ("min", "avg", "max")
341 latency["direction1"]["100"] = dict(zip(keys, latencies[0]))
342 latency["direction2"]["100"] = dict(zip(keys, latencies[1]))
343 if test_type == "NDR":
344 latency["direction1"]["50"] = dict(zip(keys, latencies[2]))
345 latency["direction2"]["50"] = dict(zip(keys, latencies[3]))
346 latency["direction1"]["10"] = dict(zip(keys, latencies[4]))
347 latency["direction2"]["10"] = dict(zip(keys, latencies[5]))
351 def visit_suite(self, suite):
352 """Implements traversing through the suite and its direct children.
354 :param suite: Suite to process.
358 if self.start_suite(suite) is not False:
359 suite.suites.visit(self)
360 suite.tests.visit(self)
361 self.end_suite(suite)
363 def start_suite(self, suite):
364 """Called when suite starts.
366 :param suite: Suite to process.
372 parent_name = suite.parent.name
373 except AttributeError:
376 doc_str = suite.doc.replace('"', "'").replace('\n', ' ').\
377 replace('\r', '').replace('*[', ' |br| *[').replace("*", "**")
378 doc_str = replace(doc_str, ' |br| *[', '*[', maxreplace=1)
380 self._data["suites"][suite.longname.lower().replace('"', "'").
381 replace(" ", "_")] = {
382 "name": suite.name.lower(),
384 "parent": parent_name,
385 "level": len(suite.longname.split("."))
388 suite.keywords.visit(self)
390 def end_suite(self, suite):
391 """Called when suite ends.
393 :param suite: Suite to process.
399 def visit_test(self, test):
400 """Implements traversing through the test.
402 :param test: Test to process.
406 if self.start_test(test) is not False:
407 test.keywords.visit(self)
410 def start_test(self, test):
411 """Called when test starts.
413 :param test: Test to process.
418 tags = [str(tag) for tag in test.tags]
420 test_result["name"] = test.name.lower()
421 test_result["parent"] = test.parent.name.lower()
422 test_result["tags"] = tags
423 doc_str = test.doc.replace('"', "'").replace('\n', ' '). \
424 replace('\r', '').replace('[', ' |br| [')
425 test_result["doc"] = replace(doc_str, ' |br| [', '[', maxreplace=1)
426 test_result["msg"] = test.message.replace('\n', ' |br| '). \
427 replace('\r', '').replace('"', "'")
428 if test.status == "PASS" and ("NDRPDRDISC" in tags or
431 if "NDRDISC" in tags:
433 elif "PDRDISC" in tags:
442 test_result["type"] = test_type
444 if test_type in ("NDR", "PDR"):
446 rate_value = str(re.search(
447 self.REGEX_RATE, test.message).group(1))
448 except AttributeError:
451 rate_unit = str(re.search(
452 self.REGEX_RATE, test.message).group(2))
453 except AttributeError:
456 test_result["throughput"] = dict()
457 test_result["throughput"]["value"] = \
458 int(rate_value.split('.')[0])
459 test_result["throughput"]["unit"] = rate_unit
460 test_result["latency"] = \
461 self._get_latency(test.message, test_type)
462 if test_type == "PDR":
463 test_result["lossTolerance"] = str(re.search(
464 self.REGEX_TOLERANCE, test.message).group(1))
466 elif test_type in ("TCP", ):
467 groups = re.search(self.REGEX_TCP, test.message)
468 test_result["result"] = dict()
469 test_result["result"]["value"] = int(groups.group(2))
470 test_result["result"]["unit"] = groups.group(1)
471 elif test_type in ("MRR", ):
472 groups = re.search(self.REGEX_MRR, test.message)
473 test_result["result"] = dict()
474 test_result["result"]["duration"] = int(groups.group(1))
475 test_result["result"]["tx"] = int(groups.group(2))
476 test_result["result"]["rx"] = int(groups.group(3))
477 test_result["result"]["throughput"] = int(
478 test_result["result"]["rx"] /
479 test_result["result"]["duration"])
481 test_result["status"] = test.status
483 self._test_ID = test.longname.lower()
484 self._data["tests"][self._test_ID] = test_result
486 def end_test(self, test):
487 """Called when test ends.
489 :param test: Test to process.
495 def visit_keyword(self, keyword):
496 """Implements traversing through the keyword and its child keywords.
498 :param keyword: Keyword to process.
499 :type keyword: Keyword
502 if self.start_keyword(keyword) is not False:
503 self.end_keyword(keyword)
505 def start_keyword(self, keyword):
506 """Called when keyword starts. Default implementation does nothing.
508 :param keyword: Keyword to process.
509 :type keyword: Keyword
513 if keyword.type == "setup":
514 self.visit_setup_kw(keyword)
515 elif keyword.type == "teardown":
516 self._lookup_kw_nr = 0
517 self.visit_teardown_kw(keyword)
519 self._lookup_kw_nr = 0
520 self.visit_test_kw(keyword)
521 except AttributeError:
524 def end_keyword(self, keyword):
525 """Called when keyword ends. Default implementation does nothing.
527 :param keyword: Keyword to process.
528 :type keyword: Keyword
533 def visit_test_kw(self, test_kw):
534 """Implements traversing through the test keyword and its child
537 :param test_kw: Keyword to process.
538 :type test_kw: Keyword
541 for keyword in test_kw.keywords:
542 if self.start_test_kw(keyword) is not False:
543 self.visit_test_kw(keyword)
544 self.end_test_kw(keyword)
546 def start_test_kw(self, test_kw):
547 """Called when test keyword starts. Default implementation does
550 :param test_kw: Keyword to process.
551 :type test_kw: Keyword
554 if test_kw.name.count("Show Runtime Counters On All Duts"):
555 self._lookup_kw_nr += 1
556 self._show_run_lookup_nr = 0
557 self._msg_type = "test-show-runtime"
558 test_kw.messages.visit(self)
560 def end_test_kw(self, test_kw):
561 """Called when keyword ends. Default implementation does nothing.
563 :param test_kw: Keyword to process.
564 :type test_kw: Keyword
569 def visit_setup_kw(self, setup_kw):
570 """Implements traversing through the teardown keyword and its child
573 :param setup_kw: Keyword to process.
574 :type setup_kw: Keyword
577 for keyword in setup_kw.keywords:
578 if self.start_setup_kw(keyword) is not False:
579 self.visit_setup_kw(keyword)
580 self.end_setup_kw(keyword)
582 def start_setup_kw(self, setup_kw):
583 """Called when teardown keyword starts. Default implementation does
586 :param setup_kw: Keyword to process.
587 :type setup_kw: Keyword
590 if setup_kw.name.count("Show Vpp Version On All Duts") \
591 and not self._version:
592 self._msg_type = "setup-version"
593 setup_kw.messages.visit(self)
595 def end_setup_kw(self, setup_kw):
596 """Called when keyword ends. Default implementation does nothing.
598 :param setup_kw: Keyword to process.
599 :type setup_kw: Keyword
604 def visit_teardown_kw(self, teardown_kw):
605 """Implements traversing through the teardown keyword and its child
608 :param teardown_kw: Keyword to process.
609 :type teardown_kw: Keyword
612 for keyword in teardown_kw.keywords:
613 if self.start_teardown_kw(keyword) is not False:
614 self.visit_teardown_kw(keyword)
615 self.end_teardown_kw(keyword)
617 def start_teardown_kw(self, teardown_kw):
618 """Called when teardown keyword starts. Default implementation does
621 :param teardown_kw: Keyword to process.
622 :type teardown_kw: Keyword
626 if teardown_kw.name.count("Show Vat History On All Duts"):
627 self._vat_history_lookup_nr = 0
628 self._msg_type = "teardown-vat-history"
629 teardown_kw.messages.visit(self)
631 def end_teardown_kw(self, teardown_kw):
632 """Called when keyword ends. Default implementation does nothing.
634 :param teardown_kw: Keyword to process.
635 :type teardown_kw: Keyword
640 def visit_message(self, msg):
641 """Implements visiting the message.
643 :param msg: Message to process.
647 if self.start_message(msg) is not False:
648 self.end_message(msg)
650 def start_message(self, msg):
651 """Called when message starts. Get required information from messages:
654 :param msg: Message to process.
660 self.parse_msg[self._msg_type](msg)
662 def end_message(self, msg):
663 """Called when message ends. Default implementation does nothing.
665 :param msg: Message to process.
672 class InputData(object):
675 The data is extracted from output.xml files generated by Jenkins jobs and
676 stored in pandas' DataFrames.
687 - ID: test data (as described in ExecutionChecker documentation)
690 def __init__(self, spec):
693 :param spec: Specification.
694 :type spec: Specification
701 self._input_data = None
705 """Getter - Input data.
708 :rtype: pandas.Series
710 return self._input_data
712 def metadata(self, job, build):
715 :param job: Job which metadata we want.
716 :param build: Build which metadata we want.
720 :rtype: pandas.Series
723 return self.data[job][build]["metadata"]
725 def suites(self, job, build):
728 :param job: Job which suites we want.
729 :param build: Build which suites we want.
733 :rtype: pandas.Series
736 return self.data[job][str(build)]["suites"]
738 def tests(self, job, build):
741 :param job: Job which tests we want.
742 :param build: Build which tests we want.
746 :rtype: pandas.Series
749 return self.data[job][build]["tests"]
752 def _parse_tests(job, build):
753 """Process data from robot output.xml file and return JSON structured
756 :param job: The name of job which build output data will be processed.
757 :param build: The build which output data will be processed.
760 :returns: JSON data structure.
769 with open(build["file-name"], 'r') as data_file:
771 result = ExecutionResult(data_file)
772 except errors.DataError as err:
773 logging.error("Error occurred while parsing output.xml: {0}".
776 checker = ExecutionChecker(metadata)
777 result.visit(checker)
781 def download_and_parse_data(self):
782 """Download the input data files, parse input data from input files and
783 store in pandas' Series.
786 logging.info("Downloading and parsing input files ...")
789 for job, builds in self._cfg.builds.items():
790 logging.info(" Processing data from the job '{0}' ...".
794 logging.info(" Processing the build '{0}'".
795 format(build["build"]))
796 self._cfg.set_input_state(job, build["build"], "failed")
797 if not download_and_unzip_data_file(self._cfg, job, build):
798 logging.error("It is not possible to download the input "
799 "data file from the job '{job}', build "
800 "'{build}', or it is damaged. Skipped.".
801 format(job=job, build=build["build"]))
804 logging.info(" Processing data from the build '{0}' ...".
805 format(build["build"]))
806 data = InputData._parse_tests(job, build)
808 logging.error("Input data file from the job '{job}', build "
809 "'{build}' is damaged. Skipped.".
810 format(job=job, build=build["build"]))
813 self._cfg.set_input_state(job, build["build"], "processed")
816 remove(build["file-name"])
817 except OSError as err:
818 logging.error("Cannot remove the file '{0}': {1}".
819 format(build["file-name"], err))
821 build_data = pd.Series({
822 "metadata": pd.Series(data["metadata"].values(),
823 index=data["metadata"].keys()),
824 "suites": pd.Series(data["suites"].values(),
825 index=data["suites"].keys()),
826 "tests": pd.Series(data["tests"].values(),
827 index=data["tests"].keys())})
828 builds_data[str(build["build"])] = build_data
829 build["status"] = "processed"
830 logging.info(" Done.")
832 job_data[job] = pd.Series(builds_data.values(),
833 index=builds_data.keys())
834 logging.info(" Done.")
836 self._input_data = pd.Series(job_data.values(), index=job_data.keys())
837 logging.info("Done.")
840 def _end_of_tag(tag_filter, start=0, closer="'"):
841 """Return the index of character in the string which is the end of tag.
843 :param tag_filter: The string where the end of tag is being searched.
844 :param start: The index where the searching is stated.
845 :param closer: The character which is the tag closer.
846 :type tag_filter: str
849 :returns: The index of the tag closer.
854 idx_opener = tag_filter.index(closer, start)
855 return tag_filter.index(closer, idx_opener + 1)
860 def _condition(tag_filter):
861 """Create a conditional statement from the given tag filter.
863 :param tag_filter: Filter based on tags from the element specification.
864 :type tag_filter: str
865 :returns: Conditional statement which can be evaluated.
871 index = InputData._end_of_tag(tag_filter, index)
875 tag_filter = tag_filter[:index] + " in tags" + tag_filter[index:]
877 def filter_data(self, element, params=None, data_set="tests",
878 continue_on_error=False):
879 """Filter required data from the given jobs and builds.
881 The output data structure is:
898 :param element: Element which will use the filtered data.
899 :param params: Parameters which will be included in the output. If None,
900 all parameters are included.
901 :param data_set: The set of data to be filtered: tests, suites,
903 :param continue_on_error: Continue if there is error while reading the
904 data. The Item will be empty then
905 :type element: pandas.Series
908 :type continue_on_error: bool
909 :returns: Filtered data.
913 logging.info(" Creating the data set for the {0} '{1}'.".
914 format(element.get("type", ""), element.get("title", "")))
917 if element["filter"] in ("all", "template"):
920 cond = InputData._condition(element["filter"])
921 logging.debug(" Filter: {0}".format(cond))
923 logging.error(" No filter defined.")
927 params = element.get("parameters", None)
931 for job, builds in element["data"].items():
932 data[job] = pd.Series()
934 data[job][str(build)] = pd.Series()
936 data_iter = self.data[job][str(build)][data_set].\
939 if continue_on_error:
943 for test_ID, test_data in data_iter:
944 if eval(cond, {"tags": test_data.get("tags", "")}):
945 data[job][str(build)][test_ID] = pd.Series()
947 for param, val in test_data.items():
948 data[job][str(build)][test_ID][param] = val
952 data[job][str(build)][test_ID][param] =\
955 data[job][str(build)][test_ID][param] =\
959 except (KeyError, IndexError, ValueError) as err:
960 logging.error(" Missing mandatory parameter in the element "
961 "specification: {0}".format(err))
963 except AttributeError:
966 logging.error(" The filter '{0}' is not correct. Check if all "
967 "tags are enclosed by apostrophes.".format(cond))
971 def merge_data(data):
972 """Merge data from more jobs and builds to a simple data structure.
974 The output data structure is:
985 :param data: Data to merge.
986 :type data: pandas.Series
987 :returns: Merged data.
988 :rtype: pandas.Series
991 logging.info(" Merging data ...")
993 merged_data = pd.Series()
994 for _, builds in data.iteritems():
995 for _, item in builds.iteritems():
996 for ID, item_data in item.iteritems():
997 merged_data[ID] = item_data