CSIT-932: Remove outliers form statistical calculations

[csit.git] / resources / tools / presentation / generator_tables.py
diff --git a/resources/tools/presentation/generator_tables.py b/resources/tools/presentation/generator_tables.py

index 58d3b67..f55c41e 100644 (file)
--- a/resources/tools/presentation/generator_tables.py
+++ b/resources/tools/presentation/generator_tables.py
@@ -22,7 +22,7 @@ import prettytable
  from string import replace
  
  from errors import PresentationError
-from utils import mean, stdev, relative_change
+from utils import mean, stdev, relative_change, remove_outliers
  
  
  def generate_tables(spec, data):
@@ -194,6 +194,9 @@ def table_performance_improvements(table, input_data):
          line_lst = list()
          for item in data:
              if isinstance(item["data"], str):
+                # Remove -?drdisc from the end
+                if item["data"].endswith("drdisc"):
+                    item["data"] = item["data"][:-8]
                  line_lst.append(item["data"])
              elif isinstance(item["data"], float):
                  line_lst.append("{:.1f}".format(item["data"]))
@@ -266,7 +269,7 @@ def table_performance_improvements(table, input_data):
                      else:
                          tbl_item.append({"data": None})
                  except (IndexError, ValueError, TypeError):
-                    logging.error("No data for {0}".format(tbl_item[1]["data"]))
+                    logging.error("No data for {0}".format(tbl_item[0]["data"]))
                      tbl_item.append({"data": None})
                      continue
              else:
@@ -296,19 +299,19 @@ def table_performance_improvements(table, input_data):
                  else:
                      rel_change = item[-1]["data"]
                  if "ndr_top" in file_name \
-                        and "ndr" in item[1]["data"] \
+                        and "ndr" in item[0]["data"] \
                          and rel_change >= 10.0:
                      _write_line_to_file(file_handler, item)
                  elif "pdr_top" in file_name \
-                        and "pdr" in item[1]["data"] \
+                        and "pdr" in item[0]["data"] \
                          and rel_change >= 10.0:
                      _write_line_to_file(file_handler, item)
                  elif "ndr_low" in file_name \
-                        and "ndr" in item[1]["data"] \
+                        and "ndr" in item[0]["data"] \
                          and rel_change < 10.0:
                      _write_line_to_file(file_handler, item)
                  elif "pdr_low" in file_name \
-                        and "pdr" in item[1]["data"] \
+                        and "pdr" in item[0]["data"] \
                          and rel_change < 10.0:
                      _write_line_to_file(file_handler, item)
  
@@ -345,6 +348,9 @@ def table_performance_comparison(table, input_data):
      :type input_data: InputData
      """
  
+    logging.info("  Generating the table {0} ...".
+                 format(table.get("title", "")))
+
      # Transform the data
      data = input_data.filter_data(table)
  
@@ -374,30 +380,38 @@ def table_performance_comparison(table, input_data):
                      tbl_dict[tst_name] = {"name": name,
                                            "ref-data": list(),
                                            "cmp-data": list()}
-                tbl_dict[tst_name]["ref-data"].\
-                    append(tst_data["throughput"]["value"])
+                try:
+                    tbl_dict[tst_name]["ref-data"].\
+                        append(tst_data["throughput"]["value"])
+                except TypeError:
+                    pass  # No data in output.xml for this test
  
      for job, builds in table["compare"]["data"].items():
          for build in builds:
              for tst_name, tst_data in data[job][str(build)].iteritems():
-                tbl_dict[tst_name]["cmp-data"].\
-                    append(tst_data["throughput"]["value"])
+                try:
+                    tbl_dict[tst_name]["cmp-data"].\
+                        append(tst_data["throughput"]["value"])
+                except KeyError:
+                    pass
+                except TypeError:
+                    tbl_dict.pop(tst_name, None)
  
      tbl_lst = list()
      for tst_name in tbl_dict.keys():
          item = [tbl_dict[tst_name]["name"], ]
          if tbl_dict[tst_name]["ref-data"]:
-            item.append(round(mean(tbl_dict[tst_name]["ref-data"]) / 1000000,
-                              2))
-            item.append(round(stdev(tbl_dict[tst_name]["ref-data"]) / 1000000,
-                              2))
+            item.append(round(mean(remove_outliers(
+                tbl_dict[tst_name]["ref-data"], 2)) / 1000000, 2))
+            item.append(round(stdev(remove_outliers(
+                tbl_dict[tst_name]["ref-data"], 2)) / 1000000, 2))
          else:
              item.extend([None, None])
          if tbl_dict[tst_name]["cmp-data"]:
-            item.append(round(mean(tbl_dict[tst_name]["cmp-data"]) / 1000000,
-                              2))
-            item.append(round(stdev(tbl_dict[tst_name]["cmp-data"]) / 1000000,
-                              2))
+            item.append(round(mean(remove_outliers(
+                tbl_dict[tst_name]["cmp-data"], 2)) / 1000000, 2))
+            item.append(round(stdev(remove_outliers(
+                tbl_dict[tst_name]["cmp-data"], 2)) / 1000000, 2))
          else:
              item.extend([None, None])
          if item[1] is not None and item[3] is not None:
@@ -424,6 +438,7 @@ def table_performance_comparison(table, input_data):
                                                 table["output-file-ext"])
                   ]
      for file_name in tbl_names:
+        logging.info("      Writing file: '{}'".format(file_name))
          with open(file_name, "w") as file_handler:
              file_handler.write(header_str)
              for test in tbl_lst:
@@ -444,6 +459,7 @@ def table_performance_comparison(table, input_data):
  
      for i, txt_name in enumerate(tbl_names_txt):
          txt_table = None
+        logging.info("      Writing file: '{}'".format(txt_name))
          with open(tbl_names[i], 'rb') as csv_file:
              csv_content = csv.reader(csv_file, delimiter=',', quotechar='"')
              for row in csv_content:
@@ -451,6 +467,7 @@ def table_performance_comparison(table, input_data):
                      txt_table = prettytable.PrettyTable(row)
                  else:
                      txt_table.add_row(row)
+            txt_table.align["Test case"] = "l"
          with open(txt_name, "w") as txt_file:
              txt_file.write(str(txt_table))
  
@@ -464,6 +481,7 @@ def table_performance_comparison(table, input_data):
  
      output_file = "{0}-ndr-1t1c-top{1}".format(table["output-file"],
                                                 table["output-file-ext"])
+    logging.info("      Writing file: '{}'".format(output_file))
      with open(output_file, "w") as out_file:
          out_file.write(header_str)
          for i, line in enumerate(lines[1:]):
@@ -473,6 +491,7 @@ def table_performance_comparison(table, input_data):
  
      output_file = "{0}-ndr-1t1c-bottom{1}".format(table["output-file"],
                                                    table["output-file-ext"])
+    logging.info("      Writing file: '{}'".format(output_file))
      with open(output_file, "w") as out_file:
          out_file.write(header_str)
          for i, line in enumerate(lines[-1:0:-1]):
@@ -489,6 +508,7 @@ def table_performance_comparison(table, input_data):
  
      output_file = "{0}-pdr-1t1c-top{1}".format(table["output-file"],
                                                 table["output-file-ext"])
+    logging.info("      Writing file: '{}'".format(output_file))
      with open(output_file, "w") as out_file:
          out_file.write(header_str)
          for i, line in enumerate(lines[1:]):
@@ -498,6 +518,7 @@ def table_performance_comparison(table, input_data):
  
      output_file = "{0}-pdr-1t1c-bottom{1}".format(table["output-file"],
                                                    table["output-file-ext"])
+    logging.info("      Writing file: '{}'".format(output_file))
      with open(output_file, "w") as out_file:
          out_file.write(header_str)
          for i, line in enumerate(lines[-1:0:-1]):