CSIT-1041: Trending dashboard
[csit.git] / resources / tools / presentation / generator_tables.py
1 # Copyright (c) 2017 Cisco and/or its affiliates.
2 # Licensed under the Apache License, Version 2.0 (the "License");
3 # you may not use this file except in compliance with the License.
4 # You may obtain a copy of the License at:
5 #
6 #     http://www.apache.org/licenses/LICENSE-2.0
7 #
8 # Unless required by applicable law or agreed to in writing, software
9 # distributed under the License is distributed on an "AS IS" BASIS,
10 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 # See the License for the specific language governing permissions and
12 # limitations under the License.
13
14 """Algorithms to generate tables.
15 """
16
17
18 import logging
19 import csv
20 import prettytable
21 import pandas as pd
22
23 from string import replace
24 from math import isnan
25
26 from errors import PresentationError
27 from utils import mean, stdev, relative_change, remove_outliers, find_outliers
28
29
30 def generate_tables(spec, data):
31     """Generate all tables specified in the specification file.
32
33     :param spec: Specification read from the specification file.
34     :param data: Data to process.
35     :type spec: Specification
36     :type data: InputData
37     """
38
39     logging.info("Generating the tables ...")
40     for table in spec.tables:
41         try:
42             eval(table["algorithm"])(table, data)
43         except NameError:
44             logging.error("The algorithm '{0}' is not defined.".
45                           format(table["algorithm"]))
46     logging.info("Done.")
47
48
49 def table_details(table, input_data):
50     """Generate the table(s) with algorithm: table_detailed_test_results
51     specified in the specification file.
52
53     :param table: Table to generate.
54     :param input_data: Data to process.
55     :type table: pandas.Series
56     :type input_data: InputData
57     """
58
59     logging.info("  Generating the table {0} ...".
60                  format(table.get("title", "")))
61
62     # Transform the data
63     data = input_data.filter_data(table)
64
65     # Prepare the header of the tables
66     header = list()
67     for column in table["columns"]:
68         header.append('"{0}"'.format(str(column["title"]).replace('"', '""')))
69
70     # Generate the data for the table according to the model in the table
71     # specification
72     job = table["data"].keys()[0]
73     build = str(table["data"][job][0])
74     try:
75         suites = input_data.suites(job, build)
76     except KeyError:
77         logging.error("    No data available. The table will not be generated.")
78         return
79
80     for suite_longname, suite in suites.iteritems():
81         # Generate data
82         suite_name = suite["name"]
83         table_lst = list()
84         for test in data[job][build].keys():
85             if data[job][build][test]["parent"] in suite_name:
86                 row_lst = list()
87                 for column in table["columns"]:
88                     try:
89                         col_data = str(data[job][build][test][column["data"].
90                                        split(" ")[1]]).replace('"', '""')
91                         if column["data"].split(" ")[1] in ("vat-history",
92                                                             "show-run"):
93                             col_data = replace(col_data, " |br| ", "",
94                                                maxreplace=1)
95                             col_data = " |prein| {0} |preout| ".\
96                                 format(col_data[:-5])
97                         row_lst.append('"{0}"'.format(col_data))
98                     except KeyError:
99                         row_lst.append("No data")
100                 table_lst.append(row_lst)
101
102         # Write the data to file
103         if table_lst:
104             file_name = "{0}_{1}{2}".format(table["output-file"], suite_name,
105                                             table["output-file-ext"])
106             logging.info("      Writing file: '{}'".format(file_name))
107             with open(file_name, "w") as file_handler:
108                 file_handler.write(",".join(header) + "\n")
109                 for item in table_lst:
110                     file_handler.write(",".join(item) + "\n")
111
112     logging.info("  Done.")
113
114
115 def table_merged_details(table, input_data):
116     """Generate the table(s) with algorithm: table_merged_details
117     specified in the specification file.
118
119     :param table: Table to generate.
120     :param input_data: Data to process.
121     :type table: pandas.Series
122     :type input_data: InputData
123     """
124
125     logging.info("  Generating the table {0} ...".
126                  format(table.get("title", "")))
127
128     # Transform the data
129     data = input_data.filter_data(table)
130     data = input_data.merge_data(data)
131     data.sort_index(inplace=True)
132
133     suites = input_data.filter_data(table, data_set="suites")
134     suites = input_data.merge_data(suites)
135
136     # Prepare the header of the tables
137     header = list()
138     for column in table["columns"]:
139         header.append('"{0}"'.format(str(column["title"]).replace('"', '""')))
140
141     for _, suite in suites.iteritems():
142         # Generate data
143         suite_name = suite["name"]
144         table_lst = list()
145         for test in data.keys():
146             if data[test]["parent"] in suite_name:
147                 row_lst = list()
148                 for column in table["columns"]:
149                     try:
150                         col_data = str(data[test][column["data"].
151                                        split(" ")[1]]).replace('"', '""')
152                         if column["data"].split(" ")[1] in ("vat-history",
153                                                             "show-run"):
154                             col_data = replace(col_data, " |br| ", "",
155                                                maxreplace=1)
156                             col_data = " |prein| {0} |preout| ".\
157                                 format(col_data[:-5])
158                         row_lst.append('"{0}"'.format(col_data))
159                     except KeyError:
160                         row_lst.append("No data")
161                 table_lst.append(row_lst)
162
163         # Write the data to file
164         if table_lst:
165             file_name = "{0}_{1}{2}".format(table["output-file"], suite_name,
166                                             table["output-file-ext"])
167             logging.info("      Writing file: '{}'".format(file_name))
168             with open(file_name, "w") as file_handler:
169                 file_handler.write(",".join(header) + "\n")
170                 for item in table_lst:
171                     file_handler.write(",".join(item) + "\n")
172
173     logging.info("  Done.")
174
175
176 def table_performance_improvements(table, input_data):
177     """Generate the table(s) with algorithm: table_performance_improvements
178     specified in the specification file.
179
180     :param table: Table to generate.
181     :param input_data: Data to process.
182     :type table: pandas.Series
183     :type input_data: InputData
184     """
185
186     def _write_line_to_file(file_handler, data):
187         """Write a line to the .csv file.
188
189         :param file_handler: File handler for the csv file. It must be open for
190          writing text.
191         :param data: Item to be written to the file.
192         :type file_handler: BinaryIO
193         :type data: list
194         """
195
196         line_lst = list()
197         for item in data:
198             if isinstance(item["data"], str):
199                 # Remove -?drdisc from the end
200                 if item["data"].endswith("drdisc"):
201                     item["data"] = item["data"][:-8]
202                 line_lst.append(item["data"])
203             elif isinstance(item["data"], float):
204                 line_lst.append("{:.1f}".format(item["data"]))
205             elif item["data"] is None:
206                 line_lst.append("")
207         file_handler.write(",".join(line_lst) + "\n")
208
209     logging.info("  Generating the table {0} ...".
210                  format(table.get("title", "")))
211
212     # Read the template
213     file_name = table.get("template", None)
214     if file_name:
215         try:
216             tmpl = _read_csv_template(file_name)
217         except PresentationError:
218             logging.error("  The template '{0}' does not exist. Skipping the "
219                           "table.".format(file_name))
220             return None
221     else:
222         logging.error("The template is not defined. Skipping the table.")
223         return None
224
225     # Transform the data
226     data = input_data.filter_data(table)
227
228     # Prepare the header of the tables
229     header = list()
230     for column in table["columns"]:
231         header.append(column["title"])
232
233     # Generate the data for the table according to the model in the table
234     # specification
235     tbl_lst = list()
236     for tmpl_item in tmpl:
237         tbl_item = list()
238         for column in table["columns"]:
239             cmd = column["data"].split(" ")[0]
240             args = column["data"].split(" ")[1:]
241             if cmd == "template":
242                 try:
243                     val = float(tmpl_item[int(args[0])])
244                 except ValueError:
245                     val = tmpl_item[int(args[0])]
246                 tbl_item.append({"data": val})
247             elif cmd == "data":
248                 jobs = args[0:-1]
249                 operation = args[-1]
250                 data_lst = list()
251                 for job in jobs:
252                     for build in data[job]:
253                         try:
254                             data_lst.append(float(build[tmpl_item[0]]
255                                                   ["throughput"]["value"]))
256                         except (KeyError, TypeError):
257                             # No data, ignore
258                             continue
259                 if data_lst:
260                     tbl_item.append({"data": (eval(operation)(data_lst)) /
261                                              1000000})
262                 else:
263                     tbl_item.append({"data": None})
264             elif cmd == "operation":
265                 operation = args[0]
266                 try:
267                     nr1 = float(tbl_item[int(args[1])]["data"])
268                     nr2 = float(tbl_item[int(args[2])]["data"])
269                     if nr1 and nr2:
270                         tbl_item.append({"data": eval(operation)(nr1, nr2)})
271                     else:
272                         tbl_item.append({"data": None})
273                 except (IndexError, ValueError, TypeError):
274                     logging.error("No data for {0}".format(tbl_item[0]["data"]))
275                     tbl_item.append({"data": None})
276                     continue
277             else:
278                 logging.error("Not supported command {0}. Skipping the table.".
279                               format(cmd))
280                 return None
281         tbl_lst.append(tbl_item)
282
283     # Sort the table according to the relative change
284     tbl_lst.sort(key=lambda rel: rel[-1]["data"], reverse=True)
285
286     # Create the tables and write them to the files
287     file_names = [
288         "{0}_ndr_top{1}".format(table["output-file"], table["output-file-ext"]),
289         "{0}_pdr_top{1}".format(table["output-file"], table["output-file-ext"]),
290         "{0}_ndr_low{1}".format(table["output-file"], table["output-file-ext"]),
291         "{0}_pdr_low{1}".format(table["output-file"], table["output-file-ext"])
292     ]
293
294     for file_name in file_names:
295         logging.info("    Writing the file '{0}'".format(file_name))
296         with open(file_name, "w") as file_handler:
297             file_handler.write(",".join(header) + "\n")
298             for item in tbl_lst:
299                 if isinstance(item[-1]["data"], float):
300                     rel_change = round(item[-1]["data"], 1)
301                 else:
302                     rel_change = item[-1]["data"]
303                 if "ndr_top" in file_name \
304                         and "ndr" in item[0]["data"] \
305                         and rel_change >= 10.0:
306                     _write_line_to_file(file_handler, item)
307                 elif "pdr_top" in file_name \
308                         and "pdr" in item[0]["data"] \
309                         and rel_change >= 10.0:
310                     _write_line_to_file(file_handler, item)
311                 elif "ndr_low" in file_name \
312                         and "ndr" in item[0]["data"] \
313                         and rel_change < 10.0:
314                     _write_line_to_file(file_handler, item)
315                 elif "pdr_low" in file_name \
316                         and "pdr" in item[0]["data"] \
317                         and rel_change < 10.0:
318                     _write_line_to_file(file_handler, item)
319
320     logging.info("  Done.")
321
322
323 def _read_csv_template(file_name):
324     """Read the template from a .csv file.
325
326     :param file_name: Name / full path / relative path of the file to read.
327     :type file_name: str
328     :returns: Data from the template as list (lines) of lists (items on line).
329     :rtype: list
330     :raises: PresentationError if it is not possible to read the file.
331     """
332
333     try:
334         with open(file_name, 'r') as csv_file:
335             tmpl_data = list()
336             for line in csv_file:
337                 tmpl_data.append(line[:-1].split(","))
338         return tmpl_data
339     except IOError as err:
340         raise PresentationError(str(err), level="ERROR")
341
342
343 def table_performance_comparison(table, input_data):
344     """Generate the table(s) with algorithm: table_performance_comparison
345     specified in the specification file.
346
347     :param table: Table to generate.
348     :param input_data: Data to process.
349     :type table: pandas.Series
350     :type input_data: InputData
351     """
352
353     logging.info("  Generating the table {0} ...".
354                  format(table.get("title", "")))
355
356     # Transform the data
357     data = input_data.filter_data(table)
358
359     # Prepare the header of the tables
360     try:
361         header = ["Test case",
362                   "{0} Throughput [Mpps]".format(table["reference"]["title"]),
363                   "{0} stdev [Mpps]".format(table["reference"]["title"]),
364                   "{0} Throughput [Mpps]".format(table["compare"]["title"]),
365                   "{0} stdev [Mpps]".format(table["compare"]["title"]),
366                   "Change [%]"]
367         header_str = ",".join(header) + "\n"
368     except (AttributeError, KeyError) as err:
369         logging.error("The model is invalid, missing parameter: {0}".
370                       format(err))
371         return
372
373     # Prepare data to the table:
374     tbl_dict = dict()
375     for job, builds in table["reference"]["data"].items():
376         for build in builds:
377             for tst_name, tst_data in data[job][str(build)].iteritems():
378                 if tbl_dict.get(tst_name, None) is None:
379                     name = "{0}-{1}".format(tst_data["parent"].split("-")[0],
380                                             "-".join(tst_data["name"].
381                                                      split("-")[1:]))
382                     tbl_dict[tst_name] = {"name": name,
383                                           "ref-data": list(),
384                                           "cmp-data": list()}
385                 try:
386                     tbl_dict[tst_name]["ref-data"].\
387                         append(tst_data["throughput"]["value"])
388                 except TypeError:
389                     pass  # No data in output.xml for this test
390
391     for job, builds in table["compare"]["data"].items():
392         for build in builds:
393             for tst_name, tst_data in data[job][str(build)].iteritems():
394                 try:
395                     tbl_dict[tst_name]["cmp-data"].\
396                         append(tst_data["throughput"]["value"])
397                 except KeyError:
398                     pass
399                 except TypeError:
400                     tbl_dict.pop(tst_name, None)
401
402     tbl_lst = list()
403     for tst_name in tbl_dict.keys():
404         item = [tbl_dict[tst_name]["name"], ]
405         if tbl_dict[tst_name]["ref-data"]:
406             data_t = remove_outliers(tbl_dict[tst_name]["ref-data"],
407                                      table["outlier-const"])
408             item.append(round(mean(data_t) / 1000000, 2))
409             item.append(round(stdev(data_t) / 1000000, 2))
410         else:
411             item.extend([None, None])
412         if tbl_dict[tst_name]["cmp-data"]:
413             data_t = remove_outliers(tbl_dict[tst_name]["cmp-data"],
414                                      table["outlier-const"])
415             item.append(round(mean(data_t) / 1000000, 2))
416             item.append(round(stdev(data_t) / 1000000, 2))
417         else:
418             item.extend([None, None])
419         if item[1] is not None and item[3] is not None:
420             item.append(int(relative_change(float(item[1]), float(item[3]))))
421         if len(item) == 6:
422             tbl_lst.append(item)
423
424     # Sort the table according to the relative change
425     tbl_lst.sort(key=lambda rel: rel[-1], reverse=True)
426
427     # Generate tables:
428     # All tests in csv:
429     tbl_names = ["{0}-ndr-1t1c-full{1}".format(table["output-file"],
430                                                table["output-file-ext"]),
431                  "{0}-ndr-2t2c-full{1}".format(table["output-file"],
432                                                table["output-file-ext"]),
433                  "{0}-ndr-4t4c-full{1}".format(table["output-file"],
434                                                table["output-file-ext"]),
435                  "{0}-pdr-1t1c-full{1}".format(table["output-file"],
436                                                table["output-file-ext"]),
437                  "{0}-pdr-2t2c-full{1}".format(table["output-file"],
438                                                table["output-file-ext"]),
439                  "{0}-pdr-4t4c-full{1}".format(table["output-file"],
440                                                table["output-file-ext"])
441                  ]
442     for file_name in tbl_names:
443         logging.info("      Writing file: '{0}'".format(file_name))
444         with open(file_name, "w") as file_handler:
445             file_handler.write(header_str)
446             for test in tbl_lst:
447                 if (file_name.split("-")[-3] in test[0] and    # NDR vs PDR
448                         file_name.split("-")[-2] in test[0]):  # cores
449                     test[0] = "-".join(test[0].split("-")[:-1])
450                     file_handler.write(",".join([str(item) for item in test]) +
451                                        "\n")
452
453     # All tests in txt:
454     tbl_names_txt = ["{0}-ndr-1t1c-full.txt".format(table["output-file"]),
455                      "{0}-ndr-2t2c-full.txt".format(table["output-file"]),
456                      "{0}-ndr-4t4c-full.txt".format(table["output-file"]),
457                      "{0}-pdr-1t1c-full.txt".format(table["output-file"]),
458                      "{0}-pdr-2t2c-full.txt".format(table["output-file"]),
459                      "{0}-pdr-4t4c-full.txt".format(table["output-file"])
460                      ]
461
462     for i, txt_name in enumerate(tbl_names_txt):
463         txt_table = None
464         logging.info("      Writing file: '{0}'".format(txt_name))
465         with open(tbl_names[i], 'rb') as csv_file:
466             csv_content = csv.reader(csv_file, delimiter=',', quotechar='"')
467             for row in csv_content:
468                 if txt_table is None:
469                     txt_table = prettytable.PrettyTable(row)
470                 else:
471                     txt_table.add_row(row)
472             txt_table.align["Test case"] = "l"
473         with open(txt_name, "w") as txt_file:
474             txt_file.write(str(txt_table))
475
476     # Selected tests in csv:
477     input_file = "{0}-ndr-1t1c-full{1}".format(table["output-file"],
478                                                table["output-file-ext"])
479     with open(input_file, "r") as in_file:
480         lines = list()
481         for line in in_file:
482             lines.append(line)
483
484     output_file = "{0}-ndr-1t1c-top{1}".format(table["output-file"],
485                                                table["output-file-ext"])
486     logging.info("      Writing file: '{0}'".format(output_file))
487     with open(output_file, "w") as out_file:
488         out_file.write(header_str)
489         for i, line in enumerate(lines[1:]):
490             if i == table["nr-of-tests-shown"]:
491                 break
492             out_file.write(line)
493
494     output_file = "{0}-ndr-1t1c-bottom{1}".format(table["output-file"],
495                                                   table["output-file-ext"])
496     logging.info("      Writing file: '{0}'".format(output_file))
497     with open(output_file, "w") as out_file:
498         out_file.write(header_str)
499         for i, line in enumerate(lines[-1:0:-1]):
500             if i == table["nr-of-tests-shown"]:
501                 break
502             out_file.write(line)
503
504     input_file = "{0}-pdr-1t1c-full{1}".format(table["output-file"],
505                                                table["output-file-ext"])
506     with open(input_file, "r") as in_file:
507         lines = list()
508         for line in in_file:
509             lines.append(line)
510
511     output_file = "{0}-pdr-1t1c-top{1}".format(table["output-file"],
512                                                table["output-file-ext"])
513     logging.info("      Writing file: '{0}'".format(output_file))
514     with open(output_file, "w") as out_file:
515         out_file.write(header_str)
516         for i, line in enumerate(lines[1:]):
517             if i == table["nr-of-tests-shown"]:
518                 break
519             out_file.write(line)
520
521     output_file = "{0}-pdr-1t1c-bottom{1}".format(table["output-file"],
522                                                   table["output-file-ext"])
523     logging.info("      Writing file: '{0}'".format(output_file))
524     with open(output_file, "w") as out_file:
525         out_file.write(header_str)
526         for i, line in enumerate(lines[-1:0:-1]):
527             if i == table["nr-of-tests-shown"]:
528                 break
529             out_file.write(line)
530
531
532 def table_performance_trending_dashboard(table, input_data):
533     """Generate the table(s) with algorithm: table_performance_comparison
534     specified in the specification file.
535
536     :param table: Table to generate.
537     :param input_data: Data to process.
538     :type table: pandas.Series
539     :type input_data: InputData
540     """
541
542     logging.info("  Generating the table {0} ...".
543                  format(table.get("title", "")))
544
545     # Transform the data
546     data = input_data.filter_data(table)
547
548     # Prepare the header of the tables
549     header = ["Test case",
550               "Thput trend [Mpps]",
551               "Change [Mpps]",
552               "Change [%]",
553               "Anomaly"]
554     header_str = ",".join(header) + "\n"
555
556     # Prepare data to the table:
557     tbl_dict = dict()
558     for job, builds in table["data"].items():
559         for build in builds:
560             for tst_name, tst_data in data[job][str(build)].iteritems():
561                 if tbl_dict.get(tst_name, None) is None:
562                     name = "{0}-{1}".format(tst_data["parent"].split("-")[0],
563                                             "-".join(tst_data["name"].
564                                                      split("-")[1:]))
565                     tbl_dict[tst_name] = {"name": name,
566                                           "data": list()}
567                 try:
568                     tbl_dict[tst_name]["data"]. \
569                         append(tst_data["result"]["throughput"])
570                 except (TypeError, KeyError):
571                     pass  # No data in output.xml for this test
572
573     tbl_lst = list()
574     for tst_name in tbl_dict.keys():
575         if len(tbl_dict[tst_name]["data"]) > 2:
576             pd_data = pd.Series(tbl_dict[tst_name]["data"])
577             win_size = pd_data.size \
578                 if pd_data.size < table["window"] else table["window"]
579             # Test name:
580             name = tbl_dict[tst_name]["name"]
581             # Throughput trend:
582             trend = list(pd_data.rolling(window=win_size, min_periods=2).
583                          median())[-2]
584             # Anomaly:
585             t_data, _ = find_outliers(pd_data)
586             last = list(t_data)[-1]
587             t_stdev = list(t_data.rolling(window=win_size, min_periods=2).
588                          std())[-2]
589             if isnan(last):
590                 anomaly = "outlier"
591                 last = list(pd_data)[-1]
592             elif last < (trend - 3 * t_stdev):
593                 anomaly = "regression"
594             elif last > (trend + 3 * t_stdev):
595                 anomaly = "progression"
596             else:
597                 anomaly = "normal"
598
599             if not isnan(last) and not isnan(trend) and trend != 0:
600                 # Change:
601                 change = round(float(last - trend) / 1000000, 2)
602                 # Relative change:
603                 rel_change = int(relative_change(float(trend), float(last)))
604
605                 tbl_lst.append([name,
606                                 round(float(trend) / 1000000, 2),
607                                 change,
608                                 rel_change,
609                                 anomaly])
610
611     # Sort the table according to the relative change
612     tbl_lst.sort(key=lambda rel: rel[-2], reverse=True)
613
614     file_name = "{0}.{1}".format(table["output-file"], table["output-file-ext"])
615
616     logging.info("      Writing file: '{0}'".format(file_name))
617     with open(file_name, "w") as file_handler:
618         file_handler.write(header_str)
619         for test in tbl_lst:
620             file_handler.write(",".join([str(item) for item in test]) + '\n')
621
622     txt_file_name = "{0}.txt".format(table["output-file"])
623     txt_table = None
624     logging.info("      Writing file: '{0}'".format(txt_file_name))
625     with open(file_name, 'rb') as csv_file:
626         csv_content = csv.reader(csv_file, delimiter=',', quotechar='"')
627         for row in csv_content:
628             if txt_table is None:
629                 txt_table = prettytable.PrettyTable(row)
630             else:
631                 txt_table.add_row(row)
632         txt_table.align["Test case"] = "l"
633     with open(txt_file_name, "w") as txt_file:
634         txt_file.write(str(txt_table))