feat(terraform): Refactor ETL

[csit.git] / csit.infra.dash / app / cdash / coverage / tables.py
diff --git a/csit.infra.dash/app/cdash/coverage/tables.py b/csit.infra.dash/app/cdash/coverage/tables.py

index a34b80f..d38e51e 100644 (file)
--- a/csit.infra.dash/app/cdash/coverage/tables.py
+++ b/csit.infra.dash/app/cdash/coverage/tables.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2023 Cisco and/or its affiliates.
+# Copyright (c) 2024 Cisco and/or its affiliates.
  # Licensed under the Apache License, Version 2.0 (the "License");
  # you may not use this file except in compliance with the License.
  # You may obtain a copy of the License at:
@@ -14,6 +14,7 @@
  """The coverage data tables.
  """
  
+
  import hdrh.histogram
  import hdrh.codec
  import pandas as pd
@@ -28,7 +29,8 @@ from ..utils.constants import Constants as C
  def select_coverage_data(
          data: pd.DataFrame,
          selected: dict,
-        csv: bool=False
+        csv: bool=False,
+        show_latency: bool=True
      ) -> list:
      """Select coverage data for the tables and generate tables as pandas data
      frames.
@@ -37,9 +39,11 @@ def select_coverage_data(
      :param selected: Dictionary with user selection.
      :param csv: If True, pandas data frame with selected coverage data is
          returned for "Download Data" feature.
+    :param show_latency: If True, latency is displayed in the tables.
      :type data: pandas.DataFrame
      :type selected: dict
      :type csv: bool
+    :type show_latency: bool
      :returns: List of tuples with suite name (str) and data (pandas dataframe)
          or pandas dataframe if csv is True.
      :rtype: list[tuple[str, pandas.DataFrame], ] or pandas.DataFrame
@@ -48,12 +52,12 @@ def select_coverage_data(
      l_data = list()
  
      # Filter data selected by the user.
-    phy = selected["phy"].split("-")
-    if len(phy) == 4:
-        topo, arch, nic, drv = phy
-        drv = "" if drv == "dpdk" else drv.replace("_", "-")
+    phy = selected["phy"].rsplit("-", maxsplit=2)
+    if len(phy) == 3:
+        topo_arch, nic, drv = phy
+        drv_str = "" if drv == "dpdk" else drv.replace("_", "-")
      else:
-        return l_data
+        return l_data, None
  
      df = pd.DataFrame(data.loc[(
          (data["passed"] == True) &
@@ -62,9 +66,9 @@ def select_coverage_data(
          (data["release"] == selected["rls"])
      )])
      df = df[
-        (df.job.str.endswith(f"{topo}-{arch}")) &
+        (df.job.str.endswith(topo_arch)) &
          (df.test_id.str.contains(
-            f"^.*\.{selected['area']}\..*{nic}.*{drv}.*$",
+            f"^.*\.{selected['area']}\..*{nic}.*{drv_str}.*$",
              regex=True
          ))
      ]
@@ -74,11 +78,13 @@ def select_coverage_data(
                  df[df.test_id.str.contains(f"-{driver}-")].index,
                  inplace=True
              )
-
-    ttype = df["test_type"].to_list()[0]
+    try:
+        ttype = df["test_type"].to_list()[0]
+    except IndexError:
+        return l_data, None
  
      # Prepare the coverage data
-    def _laten(hdrh_string: str, percentile: float) -> int:
+    def _latency(hdrh_string: str, percentile: float) -> int:
          """Get latency from HDRH string for given percentile.
  
          :param hdrh_string: Encoded HDRH string.
@@ -112,92 +118,38 @@ def select_coverage_data(
  
      if ttype == "device":
          cov = cov.assign(Result="PASS")
-    else:
+    elif ttype == "mrr":
+        cov["Throughput_Unit"] = df["result_receive_rate_rate_unit"]
+        cov["Throughput_AVG"] = df.apply(
+            lambda row: row["result_receive_rate_rate_avg"] / 1e9, axis=1
+        )
+        cov["Throughput_STDEV"] = df.apply(
+            lambda row: row["result_receive_rate_rate_stdev"] / 1e9, axis=1
+        )
+    else:  # NDRPDR
          cov["Throughput_Unit"] = df["result_pdr_lower_rate_unit"]
          cov["Throughput_NDR"] = df.apply(
              lambda row: row["result_ndr_lower_rate_value"] / 1e6, axis=1
          )
-        cov["Throughput_NDR_Mbps"] = df.apply(
-            lambda row: row["result_ndr_lower_bandwidth_value"] /1e9, axis=1
+        cov["Throughput_NDR_Gbps"] = df.apply(
+            lambda row: row["result_ndr_lower_bandwidth_value"] / 1e9, axis=1
          )
          cov["Throughput_PDR"] = df.apply(
              lambda row: row["result_pdr_lower_rate_value"] / 1e6, axis=1
          )
-        cov["Throughput_PDR_Mbps"] = df.apply(
-            lambda row: row["result_pdr_lower_bandwidth_value"] /1e9, axis=1
-        )
-        cov["Latency Forward [us]_10% PDR_P50"] = df.apply(
-            lambda row: _laten(row["result_latency_forward_pdr_10_hdrh"], 50.0),
-            axis=1
-        )
-        cov["Latency Forward [us]_10% PDR_P90"] = df.apply(
-            lambda row: _laten(row["result_latency_forward_pdr_10_hdrh"], 90.0),
-            axis=1
-        )
-        cov["Latency Forward [us]_10% PDR_P99"] = df.apply(
-            lambda row: _laten(row["result_latency_forward_pdr_10_hdrh"], 99.0),
-            axis=1
-        )
-        cov["Latency Forward [us]_50% PDR_P50"] = df.apply(
-            lambda row: _laten(row["result_latency_forward_pdr_50_hdrh"], 50.0),
-            axis=1
-        )
-        cov["Latency Forward [us]_50% PDR_P90"] = df.apply(
-            lambda row: _laten(row["result_latency_forward_pdr_50_hdrh"], 90.0),
-            axis=1
-        )
-        cov["Latency Forward [us]_50% PDR_P99"] = df.apply(
-            lambda row: _laten(row["result_latency_forward_pdr_50_hdrh"], 99.0),
-            axis=1
-        )
-        cov["Latency Forward [us]_90% PDR_P50"] = df.apply(
-            lambda row: _laten(row["result_latency_forward_pdr_90_hdrh"], 50.0),
-            axis=1
-        )
-        cov["Latency Forward [us]_90% PDR_P90"] = df.apply(
-            lambda row: _laten(row["result_latency_forward_pdr_90_hdrh"], 90.0),
-            axis=1
-        )
-        cov["Latency Forward [us]_90% PDR_P99"] = df.apply(
-            lambda row: _laten(row["result_latency_forward_pdr_90_hdrh"], 99.0),
-            axis=1
-        )
-        cov["Latency Reverse [us]_10% PDR_P50"] = df.apply(
-            lambda row: _laten(row["result_latency_reverse_pdr_10_hdrh"], 50.0),
-            axis=1
-        )
-        cov["Latency Reverse [us]_10% PDR_P90"] = df.apply(
-            lambda row: _laten(row["result_latency_reverse_pdr_10_hdrh"], 90.0),
-            axis=1
-        )
-        cov["Latency Reverse [us]_10% PDR_P99"] = df.apply(
-            lambda row: _laten(row["result_latency_reverse_pdr_10_hdrh"], 99.0),
-            axis=1
-        )
-        cov["Latency Reverse [us]_50% PDR_P50"] = df.apply(
-            lambda row: _laten(row["result_latency_reverse_pdr_50_hdrh"], 50.0),
-            axis=1
-        )
-        cov["Latency Reverse [us]_50% PDR_P90"] = df.apply(
-            lambda row: _laten(row["result_latency_reverse_pdr_50_hdrh"], 90.0),
-            axis=1
-        )
-        cov["Latency Reverse [us]_50% PDR_P99"] = df.apply(
-            lambda row: _laten(row["result_latency_reverse_pdr_50_hdrh"], 99.0),
-            axis=1
-        )
-        cov["Latency Reverse [us]_90% PDR_P50"] = df.apply(
-            lambda row: _laten(row["result_latency_reverse_pdr_90_hdrh"], 50.0),
-            axis=1
-        )
-        cov["Latency Reverse [us]_90% PDR_P90"] = df.apply(
-            lambda row: _laten(row["result_latency_reverse_pdr_90_hdrh"], 90.0),
-            axis=1
-        )
-        cov["Latency Reverse [us]_90% PDR_P99"] = df.apply(
-            lambda row: _laten(row["result_latency_reverse_pdr_90_hdrh"], 99.0),
-            axis=1
+        cov["Throughput_PDR_Gbps"] = df.apply(
+            lambda row: row["result_pdr_lower_bandwidth_value"] / 1e9, axis=1
          )
+        if show_latency:
+            for way in ("Forward", "Reverse"):
+                for pdr in (10, 50, 90):
+                    for perc in (50, 90, 99):
+                        latency = f"result_latency_{way.lower()}_pdr_{pdr}_hdrh"
+                        cov[f"Latency {way} [us]_{pdr}% PDR_P{perc}"] = \
+                            df.apply(
+                                lambda row: _latency(row[latency], perc),
+                                axis=1
+                            )
  
      if csv:
          return cov
@@ -211,7 +163,9 @@ def select_coverage_data(
              df_suite.rename(
                  columns={
                      "Throughput_NDR": f"Throughput_NDR_M{unit}",
-                    "Throughput_PDR": f"Throughput_PDR_M{unit}"
+                    "Throughput_PDR": f"Throughput_PDR_M{unit}",
+                    "Throughput_AVG": f"Throughput_G{unit}_AVG",
+                    "Throughput_STDEV": f"Throughput_G{unit}_STDEV"
                  },
                  inplace=True
              )
@@ -219,23 +173,35 @@ def select_coverage_data(
  
          l_data.append((suite, df_suite, ))
  
-    return l_data
+    return l_data, ttype
  
  
-def coverage_tables(data: pd.DataFrame, selected: dict) -> list:
+def coverage_tables(
+        data: pd.DataFrame,
+        selected: dict,
+        show_latency: bool=True,
+        start_collapsed: bool=True
+    ) -> dbc.Accordion:
      """Generate an accordion with coverage tables.
  
      :param data: Coverage data.
      :param selected: Dictionary with user selection.
+    :param show_latency: If True, latency is displayed in the tables.
+    :param start_collapsed: If True, the accordion with tables is collapsed when
+        displayed.
      :type data: pandas.DataFrame
      :type selected: dict
+    :type show_latency: bool
+    :type start_collapsed: bool
      :returns: Accordion with suite names (titles) and tables.
      :rtype: dash_bootstrap_components.Accordion
      """
  
      accordion_items = list()
-    for suite, cov_data in select_coverage_data(data, selected):
-        if len(cov_data.columns) == 3:  # VPP Device
+    sel_data, ttype = \
+        select_coverage_data(data, selected, show_latency=show_latency)
+    for suite, cov_data in sel_data:
+        if ttype == "device":  # VPP Device
              cols = [
                  {
                      "name": col,
@@ -252,7 +218,34 @@ def coverage_tables(data: pd.DataFrame, selected: dict) -> list:
                      "textAlign": "right"
                  }
              ]
-        else:  # Performance
+        elif ttype == "mrr":  # MRR
+            cols = list()
+            for idx, col in enumerate(cov_data.columns):
+                if idx == 0:
+                    cols.append({
+                        "name": ["", "", col],
+                        "id": col,
+                        "deletable": False,
+                        "selectable": False,
+                        "type": "text"
+                    })
+                else:
+                    cols.append({
+                        "name": col.split("_"),
+                        "id": col,
+                        "deletable": False,
+                        "selectable": False,
+                        "type": "numeric",
+                        "format": Format(precision=2, scheme=Scheme.fixed)
+                    })
+            style_cell={"textAlign": "right"}
+            style_cell_conditional=[
+                {
+                    "if": {"column_id": "Test Name"},
+                    "textAlign": "left"
+                }
+            ]
+        else:  # Performance NDRPDR
              cols = list()
              for idx, col in enumerate(cov_data.columns):
                  if idx == 0:
@@ -308,9 +301,15 @@ def coverage_tables(data: pd.DataFrame, selected: dict) -> list:
                  )
              )
          )
+    if not accordion_items:
+        accordion_items.append(dbc.AccordionItem(
+            title="No data.",
+            children="No data."
+        ))
+        start_collapsed = True
      return dbc.Accordion(
          children=accordion_items,
          class_name="gy-1 p-0",
-        start_collapsed=True,
+        start_collapsed=start_collapsed,
          always_open=True
      )