Report: Add normalized comparison tables

[csit.git] / resources / tools / presentation / generator_tables.py
diff --git a/resources/tools/presentation/generator_tables.py b/resources/tools/presentation/generator_tables.py

index fd33983..b8ceffa 100644 (file)
--- a/resources/tools/presentation/generator_tables.py
+++ b/resources/tools/presentation/generator_tables.py
@@ -39,6 +39,9 @@ from pal_utils import mean, stdev, classify_anomalies, \
  
  
  REGEX_NIC = re.compile(r'(\d*ge\dp\d\D*\d*[a-z]*)')
+REGEX_TOPO_ARCH = re.compile(r'^(\dn-.{3})')
+
+NORM_FREQ = 2.0  # [GHz]
  
  
  def generate_tables(spec, data):
@@ -51,31 +54,38 @@ def generate_tables(spec, data):
      """
  
      generator = {
-        u"table_merged_details": table_merged_details,
-        u"table_soak_vs_ndr": table_soak_vs_ndr,
-        u"table_perf_trending_dash": table_perf_trending_dash,
-        u"table_perf_trending_dash_html": table_perf_trending_dash_html,
-        u"table_last_failed_tests": table_last_failed_tests,
-        u"table_failed_tests": table_failed_tests,
-        u"table_failed_tests_html": table_failed_tests_html,
-        u"table_oper_data_html": table_oper_data_html,
-        u"table_comparison": table_comparison,
-        u"table_weekly_comparison": table_weekly_comparison,
-        u"table_job_spec_duration": table_job_spec_duration
+        "table_merged_details": table_merged_details,
+        "table_soak_vs_ndr": table_soak_vs_ndr,
+        "table_perf_trending_dash": table_perf_trending_dash,
+        "table_perf_trending_dash_html": table_perf_trending_dash_html,
+        "table_last_failed_tests": table_last_failed_tests,
+        "table_failed_tests": table_failed_tests,
+        "table_failed_tests_html": table_failed_tests_html,
+        "table_oper_data_html": table_oper_data_html,
+        "table_comparison": table_comparison,
+        "table_weekly_comparison": table_weekly_comparison,
+        "table_job_spec_duration": table_job_spec_duration
      }
  
      logging.info(u"Generating the tables ...")
+
+    norm_factor = dict()
+    for key, val in spec.environment.get("frequency", dict()).items():
+        norm_factor[key] = NORM_FREQ / val
+
      for table in spec.tables:
          try:
-            if table[u"algorithm"] == u"table_weekly_comparison":
-                table[u"testbeds"] = spec.environment.get(u"testbeds", None)
-            generator[table[u"algorithm"]](table, data)
+            if table["algorithm"] == "table_weekly_comparison":
+                table["testbeds"] = spec.environment.get("testbeds", None)
+            if table["algorithm"] == "table_comparison":
+                table["norm_factor"] = norm_factor
+            generator[table["algorithm"]](table, data)
          except NameError as err:
              logging.error(
-                f"Probably algorithm {table[u'algorithm']} is not defined: "
+                f"Probably algorithm {table['algorithm']} is not defined: "
                  f"{repr(err)}"
              )
-    logging.info(u"Done.")
+    logging.info("Done.")
  
  
  def table_job_spec_duration(table, input_data):
@@ -1722,81 +1732,83 @@ def table_comparison(table, input_data):
      :type table: pandas.Series
      :type input_data: InputData
      """
-    logging.info(f"  Generating the table {table.get(u'title', u'')} ...")
+    logging.info(f"  Generating the table {table.get('title', '')} ...")
  
      # Transform the data
      logging.info(
-        f"    Creating the data set for the {table.get(u'type', u'')} "
-        f"{table.get(u'title', u'')}."
+        f"    Creating the data set for the {table.get('type', '')} "
+        f"{table.get('title', '')}."
      )
  
-    columns = table.get(u"columns", None)
+    normalize = table.get('normalize', False)
+
+    columns = table.get("columns", None)
      if not columns:
          logging.error(
-            f"No columns specified for {table.get(u'title', u'')}. Skipping."
+            f"No columns specified for {table.get('title', '')}. Skipping."
          )
          return
  
      cols = list()
      for idx, col in enumerate(columns):
-        if col.get(u"data-set", None) is None:
-            logging.warning(f"No data for column {col.get(u'title', u'')}")
+        if col.get("data-set", None) is None:
+            logging.warning(f"No data for column {col.get('title', '')}")
              continue
-        tag = col.get(u"tag", None)
+        tag = col.get("tag", None)
          data = input_data.filter_data(
              table,
              params=[
-                u"throughput",
-                u"result",
-                u"latency",
-                u"name",
-                u"parent",
-                u"tags"
+                "throughput",
+                "result",
+                "latency",
+                "name",
+                "parent",
+                "tags"
              ],
-            data=col[u"data-set"],
+            data=col["data-set"],
              continue_on_error=True
          )
          col_data = {
-            u"title": col.get(u"title", f"Column{idx}"),
-            u"data": dict()
+            "title": col.get("title", f"Column{idx}"),
+            "data": dict()
          }
          for builds in data.values:
              for build in builds:
                  for tst_name, tst_data in build.items():
-                    if tag and tag not in tst_data[u"tags"]:
+                    if tag and tag not in tst_data["tags"]:
                          continue
                      tst_name_mod = \
                          _tpc_modify_test_name(tst_name, ignore_nic=True).\
-                        replace(u"2n1l-", u"")
-                    if col_data[u"data"].get(tst_name_mod, None) is None:
-                        name = tst_data[u'name'].rsplit(u'-', 1)[0]
-                        if u"across testbeds" in table[u"title"].lower() or \
-                                u"across topologies" in table[u"title"].lower():
+                        replace("2n1l-", "")
+                    if col_data["data"].get(tst_name_mod, None) is None:
+                        name = tst_data['name'].rsplit('-', 1)[0]
+                        if "across testbeds" in table["title"].lower() or \
+                                "across topologies" in table["title"].lower():
                              name = _tpc_modify_displayed_test_name(name)
-                        col_data[u"data"][tst_name_mod] = {
-                            u"name": name,
-                            u"replace": True,
-                            u"data": list(),
-                            u"mean": None,
-                            u"stdev": None
+                        col_data["data"][tst_name_mod] = {
+                            "name": name,
+                            "replace": True,
+                            "data": list(),
+                            "mean": None,
+                            "stdev": None
                          }
                      _tpc_insert_data(
-                        target=col_data[u"data"][tst_name_mod],
+                        target=col_data["data"][tst_name_mod],
                          src=tst_data,
-                        include_tests=table[u"include-tests"]
+                        include_tests=table["include-tests"]
                      )
  
-        replacement = col.get(u"data-replacement", None)
+        replacement = col.get("data-replacement", None)
          if replacement:
              rpl_data = input_data.filter_data(
                  table,
                  params=[
-                    u"throughput",
-                    u"result",
-                    u"latency",
-                    u"name",
-                    u"parent",
-                    u"tags"
+                    "throughput",
+                    "result",
+                    "latency",
+                    "name",
+                    "parent",
+                    "tags"
                  ],
                  data=replacement,
                  continue_on_error=True
@@ -1804,74 +1816,82 @@ def table_comparison(table, input_data):
              for builds in rpl_data.values:
                  for build in builds:
                      for tst_name, tst_data in build.items():
-                        if tag and tag not in tst_data[u"tags"]:
+                        if tag and tag not in tst_data["tags"]:
                              continue
                          tst_name_mod = \
                              _tpc_modify_test_name(tst_name, ignore_nic=True).\
-                            replace(u"2n1l-", u"")
-                        if col_data[u"data"].get(tst_name_mod, None) is None:
-                            name = tst_data[u'name'].rsplit(u'-', 1)[0]
-                            if u"across testbeds" in table[u"title"].lower() \
-                                    or u"across topologies" in \
-                                    table[u"title"].lower():
+                            replace("2n1l-", "")
+                        if col_data["data"].get(tst_name_mod, None) is None:
+                            name = tst_data['name'].rsplit('-', 1)[0]
+                            if "across testbeds" in table["title"].lower() \
+                                    or "across topologies" in \
+                                    table["title"].lower():
                                  name = _tpc_modify_displayed_test_name(name)
-                            col_data[u"data"][tst_name_mod] = {
-                                u"name": name,
-                                u"replace": False,
-                                u"data": list(),
-                                u"mean": None,
-                                u"stdev": None
+                            col_data["data"][tst_name_mod] = {
+                                "name": name,
+                                "replace": False,
+                                "data": list(),
+                                "mean": None,
+                                "stdev": None
                              }
-                        if col_data[u"data"][tst_name_mod][u"replace"]:
-                            col_data[u"data"][tst_name_mod][u"replace"] = False
-                            col_data[u"data"][tst_name_mod][u"data"] = list()
+                        if col_data["data"][tst_name_mod]["replace"]:
+                            col_data["data"][tst_name_mod]["replace"] = False
+                            col_data["data"][tst_name_mod]["data"] = list()
                          _tpc_insert_data(
-                            target=col_data[u"data"][tst_name_mod],
+                            target=col_data["data"][tst_name_mod],
                              src=tst_data,
-                            include_tests=table[u"include-tests"]
+                            include_tests=table["include-tests"]
                          )
  
-        if table[u"include-tests"] in (u"NDR", u"PDR", u"hoststack", u"vsap") \
-                or u"latency" in table[u"include-tests"]:
-            for tst_name, tst_data in col_data[u"data"].items():
-                if tst_data[u"data"]:
-                    tst_data[u"mean"] = mean(tst_data[u"data"])
-                    tst_data[u"stdev"] = stdev(tst_data[u"data"])
+        if table["include-tests"] in ("NDR", "PDR", "hoststack", "vsap") \
+                or "latency" in table["include-tests"]:
+            for tst_name, tst_data in col_data["data"].items():
+                if tst_data["data"]:
+                    tst_data["mean"] = mean(tst_data["data"])
+                    tst_data["stdev"] = stdev(tst_data["data"])
  
          cols.append(col_data)
  
      tbl_dict = dict()
      for col in cols:
-        for tst_name, tst_data in col[u"data"].items():
+        for tst_name, tst_data in col["data"].items():
              if tbl_dict.get(tst_name, None) is None:
                  tbl_dict[tst_name] = {
-                    "name": tst_data[u"name"]
+                    "name": tst_data["name"]
                  }
-            tbl_dict[tst_name][col[u"title"]] = {
-                u"mean": tst_data[u"mean"],
-                u"stdev": tst_data[u"stdev"]
+            tbl_dict[tst_name][col["title"]] = {
+                "mean": tst_data["mean"],
+                "stdev": tst_data["stdev"]
              }
  
      if not tbl_dict:
-        logging.warning(f"No data for table {table.get(u'title', u'')}!")
+        logging.warning(f"No data for table {table.get('title', '')}!")
          return
  
      tbl_lst = list()
      for tst_data in tbl_dict.values():
          row = [tst_data[u"name"], ]
          for col in cols:
-            row.append(tst_data.get(col[u"title"], None))
+            row_data = tst_data.get(col["title"], None)
+            if normalize and row_data:
+                groups = re.search(REGEX_TOPO_ARCH, col["title"])
+                topo_arch = groups.group(0) if groups else ""
+                norm_factor = table["norm_factor"].get(topo_arch, 1.0)
+                row_data_norm = row_data * norm_factor
+            else:
+                row_data_norm = row_data
+            row.append(row_data_norm)
          tbl_lst.append(row)
  
-    comparisons = table.get(u"comparisons", None)
+    comparisons = table.get("comparisons", None)
      rcas = list()
      if comparisons and isinstance(comparisons, list):
          for idx, comp in enumerate(comparisons):
              try:
-                col_ref = int(comp[u"reference"])
-                col_cmp = int(comp[u"compare"])
+                col_ref = int(comp["reference"])
+                col_cmp = int(comp["compare"])
              except KeyError:
-                logging.warning(u"Comparison: No references defined! Skipping.")
+                logging.warning("Comparison: No references defined! Skipping.")
                  comparisons.pop(idx)
                  continue
              if not (0 < col_ref <= len(cols) and 0 < col_cmp <= len(cols) or
@@ -1880,14 +1900,14 @@ def table_comparison(table, input_data):
                                  f"and/or compare={col_cmp}. Skipping.")
                  comparisons.pop(idx)
                  continue
-            rca_file_name = comp.get(u"rca-file", None)
+            rca_file_name = comp.get("rca-file", None)
              if rca_file_name:
                  try:
-                    with open(rca_file_name, u"r") as file_handler:
+                    with open(rca_file_name, "r") as file_handler:
                          rcas.append(
                              {
-                                u"title": f"RCA{idx + 1}",
-                                u"data": load(file_handler, Loader=FullLoader)
+                                "title": f"RCA{idx + 1}",
+                                "data": load(file_handler, Loader=FullLoader)
                              }
                          )
                  except (YAMLError, IOError) as err:
@@ -1907,28 +1927,28 @@ def table_comparison(table, input_data):
          for row in tbl_lst:
              new_row = deepcopy(row)
              for comp in comparisons:
-                ref_itm = row[int(comp[u"reference"])]
+                ref_itm = row[int(comp["reference"])]
                  if ref_itm is None and \
-                        comp.get(u"reference-alt", None) is not None:
-                    ref_itm = row[int(comp[u"reference-alt"])]
+                        comp.get("reference-alt", None) is not None:
+                    ref_itm = row[int(comp["reference-alt"])]
                  cmp_itm = row[int(comp[u"compare"])]
                  if ref_itm is not None and cmp_itm is not None and \
-                        ref_itm[u"mean"] is not None and \
-                        cmp_itm[u"mean"] is not None and \
-                        ref_itm[u"stdev"] is not None and \
-                        cmp_itm[u"stdev"] is not None:
+                        ref_itm["mean"] is not None and \
+                        cmp_itm["mean"] is not None and \
+                        ref_itm["stdev"] is not None and \
+                        cmp_itm["stdev"] is not None:
                      try:
                          delta, d_stdev = relative_change_stdev(
-                            ref_itm[u"mean"], cmp_itm[u"mean"],
-                            ref_itm[u"stdev"], cmp_itm[u"stdev"]
+                            ref_itm["mean"], cmp_itm["mean"],
+                            ref_itm["stdev"], cmp_itm["stdev"]
                          )
                      except ZeroDivisionError:
                          break
                      if delta is None or math.isnan(delta):
                          break
                      new_row.append({
-                        u"mean": delta * 1e6,
-                        u"stdev": d_stdev * 1e6
+                        "mean": delta * 1e6,
+                        "stdev": d_stdev * 1e6
                      })
                  else:
                      break
@@ -1937,7 +1957,7 @@ def table_comparison(table, input_data):
  
      try:
          tbl_cmp_lst.sort(key=lambda rel: rel[0], reverse=False)
-        tbl_cmp_lst.sort(key=lambda rel: rel[-1][u'mean'], reverse=True)
+        tbl_cmp_lst.sort(key=lambda rel: rel[-1]['mean'], reverse=True)
      except TypeError as err:
          logging.warning(f"Empty data element in table\n{tbl_cmp_lst}\n{err}")
  
@@ -1946,62 +1966,62 @@ def table_comparison(table, input_data):
          row = [line[0], ]
          for idx, itm in enumerate(line[1:]):
              if itm is None or not isinstance(itm, dict) or\
-                    itm.get(u'mean', None) is None or \
-                    itm.get(u'stdev', None) is None:
-                row.append(u"NT")
-                row.append(u"NT")
+                    itm.get('mean', None) is None or \
+                    itm.get('stdev', None) is None:
+                row.append("NT")
+                row.append("NT")
              else:
-                row.append(round(float(itm[u'mean']) / 1e6, 3))
-                row.append(round(float(itm[u'stdev']) / 1e6, 3))
+                row.append(round(float(itm['mean']) / 1e6, 3))
+                row.append(round(float(itm['stdev']) / 1e6, 3))
          for rca in rcas:
              if rca is None:
                  continue
-            rca_nr = rca[u"data"].get(row[0], u"-")
-            row.append(f"[{rca_nr}]" if rca_nr != u"-" else u"-")
+            rca_nr = rca["data"].get(row[0], "-")
+            row.append(f"[{rca_nr}]" if rca_nr != "-" else "-")
          tbl_for_csv.append(row)
  
-    header_csv = [u"Test Case", ]
+    header_csv = ["Test Case", ]
      for col in cols:
-        header_csv.append(f"Avg({col[u'title']})")
-        header_csv.append(f"Stdev({col[u'title']})")
+        header_csv.append(f"Avg({col['title']})")
+        header_csv.append(f"Stdev({col['title']})")
      for comp in comparisons:
          header_csv.append(
-            f"Avg({comp.get(u'title', u'')})"
+            f"Avg({comp.get('title', '')})"
          )
          header_csv.append(
-            f"Stdev({comp.get(u'title', u'')})"
+            f"Stdev({comp.get('title', '')})"
          )
      for rca in rcas:
          if rca:
-            header_csv.append(rca[u"title"])
+            header_csv.append(rca["title"])
  
-    legend_lst = table.get(u"legend", None)
+    legend_lst = table.get("legend", None)
      if legend_lst is None:
-        legend = u""
+        legend = ""
      else:
-        legend = u"\n" + u"\n".join(legend_lst) + u"\n"
+        legend = "\n" + "\n".join(legend_lst) + "\n"
  
-    footnote = u""
+    footnote = ""
      if rcas and any(rcas):
-        footnote += u"\nRoot Cause Analysis:\n"
+        footnote += "\nRoot Cause Analysis:\n"
          for rca in rcas:
              if rca:
-                footnote += f"{rca[u'data'].get(u'footnote', u'')}\n"
+                footnote += f"{rca['data'].get('footnote', '')}\n"
  
-    csv_file_name = f"{table[u'output-file']}-csv.csv"
-    with open(csv_file_name, u"wt", encoding='utf-8') as file_handler:
+    csv_file_name = f"{table['output-file']}-csv.csv"
+    with open(csv_file_name, "wt", encoding='utf-8') as file_handler:
          file_handler.write(
-            u",".join([f'"{itm}"' for itm in header_csv]) + u"\n"
+            ",".join([f'"{itm}"' for itm in header_csv]) + "\n"
          )
          for test in tbl_for_csv:
              file_handler.write(
-                u",".join([f'"{item}"' for item in test]) + u"\n"
+                ",".join([f'"{item}"' for item in test]) + "\n"
              )
          if legend_lst:
              for item in legend_lst:
                  file_handler.write(f'"{item}"\n')
          if footnote:
-            for itm in footnote.split(u"\n"):
+            for itm in footnote.split("\n"):
                  file_handler.write(f'"{itm}"\n')
  
      tbl_tmp = list()
@@ -2010,77 +2030,77 @@ def table_comparison(table, input_data):
          row = [line[0], ]
          for idx, itm in enumerate(line[1:]):
              if itm is None or not isinstance(itm, dict) or \
-                    itm.get(u'mean', None) is None or \
-                    itm.get(u'stdev', None) is None:
-                new_itm = u"NT"
+                    itm.get('mean', None) is None or \
+                    itm.get('stdev', None) is None:
+                new_itm = "NT"
              else:
                  if idx < len(cols):
                      new_itm = (
-                        f"{round(float(itm[u'mean']) / 1e6, 2)} "
-                        f"\u00B1{round(float(itm[u'stdev']) / 1e6, 2)}".
-                        replace(u"nan", u"NaN")
+                        f"{round(float(itm['mean']) / 1e6, 2)} "
+                        f"\u00B1{round(float(itm['stdev']) / 1e6, 2)}".
+                        replace("nan", "NaN")
                      )
                  else:
                      new_itm = (
-                        f"{round(float(itm[u'mean']) / 1e6, 2):+} "
-                        f"\u00B1{round(float(itm[u'stdev']) / 1e6, 2)}".
-                        replace(u"nan", u"NaN")
+                        f"{round(float(itm['mean']) / 1e6, 2):+} "
+                        f"\u00B1{round(float(itm['stdev']) / 1e6, 2)}".
+                        replace("nan", "NaN")
                      )
-            if len(new_itm.rsplit(u" ", 1)[-1]) > max_lens[idx]:
-                max_lens[idx] = len(new_itm.rsplit(u" ", 1)[-1])
+            if len(new_itm.rsplit(" ", 1)[-1]) > max_lens[idx]:
+                max_lens[idx] = len(new_itm.rsplit(" ", 1)[-1])
              row.append(new_itm)
  
          tbl_tmp.append(row)
  
-    header = [u"Test Case", ]
-    header.extend([col[u"title"] for col in cols])
-    header.extend([comp.get(u"title", u"") for comp in comparisons])
+    header = ["Test Case", ]
+    header.extend([col["title"] for col in cols])
+    header.extend([comp.get("title", "") for comp in comparisons])
  
      tbl_final = list()
      for line in tbl_tmp:
          row = [line[0], ]
          for idx, itm in enumerate(line[1:]):
-            if itm in (u"NT", u"NaN"):
+            if itm in ("NT", "NaN"):
                  row.append(itm)
                  continue
-            itm_lst = itm.rsplit(u"\u00B1", 1)
+            itm_lst = itm.rsplit("\u00B1", 1)
              itm_lst[-1] = \
-                f"{u' ' * (max_lens[idx] - len(itm_lst[-1]))}{itm_lst[-1]}"
-            itm_str = u"\u00B1".join(itm_lst)
+                f"{' ' * (max_lens[idx] - len(itm_lst[-1]))}{itm_lst[-1]}"
+            itm_str = "\u00B1".join(itm_lst)
  
              if idx >= len(cols):
                  # Diffs
                  rca = rcas[idx - len(cols)]
                  if rca:
                      # Add rcas to diffs
-                    rca_nr = rca[u"data"].get(row[0], None)
+                    rca_nr = rca["data"].get(row[0], None)
                      if rca_nr:
                          hdr_len = len(header[idx + 1]) - 1
                          if hdr_len < 19:
                              hdr_len = 19
                          rca_nr = f"[{rca_nr}]"
                          itm_str = (
-                            f"{u' ' * (4 - len(rca_nr))}{rca_nr}"
-                            f"{u' ' * (hdr_len - 4 - len(itm_str))}"
+                            f"{' ' * (4 - len(rca_nr))}{rca_nr}"
+                            f"{' ' * (hdr_len - 4 - len(itm_str))}"
                              f"{itm_str}"
                          )
              row.append(itm_str)
          tbl_final.append(row)
  
      # Generate csv tables:
-    csv_file_name = f"{table[u'output-file']}.csv"
+    csv_file_name = f"{table['output-file']}.csv"
      logging.info(f"    Writing the file {csv_file_name}")
-    with open(csv_file_name, u"wt", encoding='utf-8') as file_handler:
-        file_handler.write(u";".join(header) + u"\n")
+    with open(csv_file_name, "wt", encoding='utf-8') as file_handler:
+        file_handler.write(";".join(header) + "\n")
          for test in tbl_final:
-            file_handler.write(u";".join([str(item) for item in test]) + u"\n")
+            file_handler.write(";".join([str(item) for item in test]) + "\n")
  
      # Generate txt table:
-    txt_file_name = f"{table[u'output-file']}.txt"
+    txt_file_name = f"{table['output-file']}.txt"
      logging.info(f"    Writing the file {txt_file_name}")
-    convert_csv_to_pretty_txt(csv_file_name, txt_file_name, delimiter=u";")
+    convert_csv_to_pretty_txt(csv_file_name, txt_file_name, delimiter=";")
  
-    with open(txt_file_name, u'a', encoding='utf-8') as file_handler:
+    with open(txt_file_name, 'a', encoding='utf-8') as file_handler:
          file_handler.write(legend)
          file_handler.write(footnote)
  
@@ -2088,11 +2108,11 @@ def table_comparison(table, input_data):
      _tpc_generate_html_table(
          header,
          tbl_final,
-        table[u'output-file'],
+        table['output-file'],
          legend=legend,
          footnote=footnote,
          sort_data=False,
-        title=table.get(u"title", u"")
+        title=table.get("title", "")
      )