CSIT-932: Remove outliers form statistical calculations
[csit.git] / resources / tools / presentation / generator_tables.py
index 58d3b67..f55c41e 100644 (file)
@@ -22,7 +22,7 @@ import prettytable
 from string import replace
 
 from errors import PresentationError
-from utils import mean, stdev, relative_change
+from utils import mean, stdev, relative_change, remove_outliers
 
 
 def generate_tables(spec, data):
@@ -194,6 +194,9 @@ def table_performance_improvements(table, input_data):
         line_lst = list()
         for item in data:
             if isinstance(item["data"], str):
+                # Remove -?drdisc from the end
+                if item["data"].endswith("drdisc"):
+                    item["data"] = item["data"][:-8]
                 line_lst.append(item["data"])
             elif isinstance(item["data"], float):
                 line_lst.append("{:.1f}".format(item["data"]))
@@ -266,7 +269,7 @@ def table_performance_improvements(table, input_data):
                     else:
                         tbl_item.append({"data": None})
                 except (IndexError, ValueError, TypeError):
-                    logging.error("No data for {0}".format(tbl_item[1]["data"]))
+                    logging.error("No data for {0}".format(tbl_item[0]["data"]))
                     tbl_item.append({"data": None})
                     continue
             else:
@@ -296,19 +299,19 @@ def table_performance_improvements(table, input_data):
                 else:
                     rel_change = item[-1]["data"]
                 if "ndr_top" in file_name \
-                        and "ndr" in item[1]["data"] \
+                        and "ndr" in item[0]["data"] \
                         and rel_change >= 10.0:
                     _write_line_to_file(file_handler, item)
                 elif "pdr_top" in file_name \
-                        and "pdr" in item[1]["data"] \
+                        and "pdr" in item[0]["data"] \
                         and rel_change >= 10.0:
                     _write_line_to_file(file_handler, item)
                 elif "ndr_low" in file_name \
-                        and "ndr" in item[1]["data"] \
+                        and "ndr" in item[0]["data"] \
                         and rel_change < 10.0:
                     _write_line_to_file(file_handler, item)
                 elif "pdr_low" in file_name \
-                        and "pdr" in item[1]["data"] \
+                        and "pdr" in item[0]["data"] \
                         and rel_change < 10.0:
                     _write_line_to_file(file_handler, item)
 
@@ -345,6 +348,9 @@ def table_performance_comparison(table, input_data):
     :type input_data: InputData
     """
 
+    logging.info("  Generating the table {0} ...".
+                 format(table.get("title", "")))
+
     # Transform the data
     data = input_data.filter_data(table)
 
@@ -374,30 +380,38 @@ def table_performance_comparison(table, input_data):
                     tbl_dict[tst_name] = {"name": name,
                                           "ref-data": list(),
                                           "cmp-data": list()}
-                tbl_dict[tst_name]["ref-data"].\
-                    append(tst_data["throughput"]["value"])
+                try:
+                    tbl_dict[tst_name]["ref-data"].\
+                        append(tst_data["throughput"]["value"])
+                except TypeError:
+                    pass  # No data in output.xml for this test
 
     for job, builds in table["compare"]["data"].items():
         for build in builds:
             for tst_name, tst_data in data[job][str(build)].iteritems():
-                tbl_dict[tst_name]["cmp-data"].\
-                    append(tst_data["throughput"]["value"])
+                try:
+                    tbl_dict[tst_name]["cmp-data"].\
+                        append(tst_data["throughput"]["value"])
+                except KeyError:
+                    pass
+                except TypeError:
+                    tbl_dict.pop(tst_name, None)
 
     tbl_lst = list()
     for tst_name in tbl_dict.keys():
         item = [tbl_dict[tst_name]["name"], ]
         if tbl_dict[tst_name]["ref-data"]:
-            item.append(round(mean(tbl_dict[tst_name]["ref-data"]) / 1000000,
-                              2))
-            item.append(round(stdev(tbl_dict[tst_name]["ref-data"]) / 1000000,
-                              2))
+            item.append(round(mean(remove_outliers(
+                tbl_dict[tst_name]["ref-data"], 2)) / 1000000, 2))
+            item.append(round(stdev(remove_outliers(
+                tbl_dict[tst_name]["ref-data"], 2)) / 1000000, 2))
         else:
             item.extend([None, None])
         if tbl_dict[tst_name]["cmp-data"]:
-            item.append(round(mean(tbl_dict[tst_name]["cmp-data"]) / 1000000,
-                              2))
-            item.append(round(stdev(tbl_dict[tst_name]["cmp-data"]) / 1000000,
-                              2))
+            item.append(round(mean(remove_outliers(
+                tbl_dict[tst_name]["cmp-data"], 2)) / 1000000, 2))
+            item.append(round(stdev(remove_outliers(
+                tbl_dict[tst_name]["cmp-data"], 2)) / 1000000, 2))
         else:
             item.extend([None, None])
         if item[1] is not None and item[3] is not None:
@@ -424,6 +438,7 @@ def table_performance_comparison(table, input_data):
                                                table["output-file-ext"])
                  ]
     for file_name in tbl_names:
+        logging.info("      Writing file: '{}'".format(file_name))
         with open(file_name, "w") as file_handler:
             file_handler.write(header_str)
             for test in tbl_lst:
@@ -444,6 +459,7 @@ def table_performance_comparison(table, input_data):
 
     for i, txt_name in enumerate(tbl_names_txt):
         txt_table = None
+        logging.info("      Writing file: '{}'".format(txt_name))
         with open(tbl_names[i], 'rb') as csv_file:
             csv_content = csv.reader(csv_file, delimiter=',', quotechar='"')
             for row in csv_content:
@@ -451,6 +467,7 @@ def table_performance_comparison(table, input_data):
                     txt_table = prettytable.PrettyTable(row)
                 else:
                     txt_table.add_row(row)
+            txt_table.align["Test case"] = "l"
         with open(txt_name, "w") as txt_file:
             txt_file.write(str(txt_table))
 
@@ -464,6 +481,7 @@ def table_performance_comparison(table, input_data):
 
     output_file = "{0}-ndr-1t1c-top{1}".format(table["output-file"],
                                                table["output-file-ext"])
+    logging.info("      Writing file: '{}'".format(output_file))
     with open(output_file, "w") as out_file:
         out_file.write(header_str)
         for i, line in enumerate(lines[1:]):
@@ -473,6 +491,7 @@ def table_performance_comparison(table, input_data):
 
     output_file = "{0}-ndr-1t1c-bottom{1}".format(table["output-file"],
                                                   table["output-file-ext"])
+    logging.info("      Writing file: '{}'".format(output_file))
     with open(output_file, "w") as out_file:
         out_file.write(header_str)
         for i, line in enumerate(lines[-1:0:-1]):
@@ -489,6 +508,7 @@ def table_performance_comparison(table, input_data):
 
     output_file = "{0}-pdr-1t1c-top{1}".format(table["output-file"],
                                                table["output-file-ext"])
+    logging.info("      Writing file: '{}'".format(output_file))
     with open(output_file, "w") as out_file:
         out_file.write(header_str)
         for i, line in enumerate(lines[1:]):
@@ -498,6 +518,7 @@ def table_performance_comparison(table, input_data):
 
     output_file = "{0}-pdr-1t1c-bottom{1}".format(table["output-file"],
                                                   table["output-file-ext"])
+    logging.info("      Writing file: '{}'".format(output_file))
     with open(output_file, "w") as out_file:
         out_file.write(header_str)
         for i, line in enumerate(lines[-1:0:-1]):