C-Dash: Add regexp filtering to comparison tables

[csit.git] / csit.infra.dash / app / cdash / comparisons / tables.py
diff --git a/csit.infra.dash/app/cdash/comparisons/tables.py b/csit.infra.dash/app/cdash/comparisons/tables.py

index b2bee7f..8c19d3c 100644 (file)
--- a/csit.infra.dash/app/cdash/comparisons/tables.py
+++ b/csit.infra.dash/app/cdash/comparisons/tables.py
@@ -18,6 +18,7 @@ import pandas as pd
  
  from numpy import mean, std
  from copy import deepcopy
+
  from ..utils.constants import Constants as C
  from ..utils.utils import relative_change_stdev
  
@@ -70,8 +71,12 @@ def select_comparison_data(
          for itm in data_in["test_id"].unique().tolist():
              itm_lst = itm.split(".")
              test = itm_lst[-1].rsplit("-", 1)[0]
+            if "hoststack" in itm:
+                test_type = f"hoststack-{ttype}"
+            else:
+                test_type = ttype
              df = data_in.loc[(data_in["test_id"] == itm)]
-            l_df = df[C.VALUE_ITER[ttype]].to_list()
+            l_df = df[C.VALUE_ITER[test_type]].to_list()
              if len(l_df) and isinstance(l_df[0], list):
                  tmp_df = list()
                  for l_itm in l_df:
@@ -85,7 +90,7 @@ def select_comparison_data(
              d_data["name"].append(f"{test.replace(f'{drv}-', '')}-{ttype}")
              d_data["mean"].append(int(mean_val * norm_factor))
              d_data["stdev"].append(int(std_val * norm_factor))
-            d_data["unit"].append(df[C.UNIT[ttype]].to_list()[0])
+            d_data["unit"].append(df[C.UNIT[test_type]].to_list()[0])
          return pd.DataFrame(d_data)
  
      lst_df = list()
@@ -108,11 +113,17 @@ def select_comparison_data(
  
          drv = "" if itm["driver"] == "dpdk" else itm["driver"].replace("_", "-")
          core = str() if itm["dut"] == "trex" else itm["core"].lower()
-        reg_id = \
-            f"^.*[.|-]{itm['nic']}.*{itm['frmsize'].lower()}-{core}-{drv}.*$"
+        ttype = "ndrpdr" if itm["ttype"] in ("NDR", "PDR", "Latency") \
+            else itm["ttype"].lower()
          tmp_df = tmp_df[
              (tmp_df.job.str.endswith(itm["tbed"])) &
-            (tmp_df.test_id.str.contains(reg_id, regex=True))
+            (tmp_df.test_id.str.contains(
+                (
+                    f"^.*[.|-]{itm['nic']}.*{itm['frmsize'].lower()}-"
+                    f"{core}-{drv}.*-{ttype}$"
+                ),
+                regex=True
+            ))
          ]
          if itm["driver"] == "dpdk":
              for drv in C.DRIVERS:
@@ -298,3 +309,65 @@ def comparison_table(
      )
  
      return (title, df_cmp)
+
+
+def filter_table_data(
+        store_table_data: list,
+        table_filter: str
+    ) -> list:
+    """Filter table data using user specified filter.
+
+    :param store_table_data: Table data represented as a list of records.
+    :param table_filter: User specified filter.
+    :type store_table_data: list
+    :type table_filter: str
+    :returns: A new table created by filtering of table data represented as
+        a list of records.
+    :rtype: list
+    """
+
+    # Checks:
+    if not any((table_filter, store_table_data, )):
+        return store_table_data
+
+    def _split_filter_part(filter_part: str) -> tuple:
+        """Split a part of filter into column name, operator and value.
+        A "part of filter" is a sting berween "&&" operator.
+
+        :param filter_part: A part of filter.
+        :type filter_part: str
+        :returns: Column name, operator, value
+        :rtype: tuple[str, str, str|float]
+        """
+        for operator_type in C.OPERATORS:
+            for operator in operator_type:
+                if operator in filter_part:
+                    name_p, val_p = filter_part.split(operator, 1)
+                    name = name_p[name_p.find("{") + 1 : name_p.rfind("}")]
+                    val_p = val_p.strip()
+                    if (val_p[0] == val_p[-1] and val_p[0] in ("'", '"', '`')):
+                        value = val_p[1:-1].replace("\\" + val_p[0], val_p[0])
+                    else:
+                        try:
+                            value = float(val_p)
+                        except ValueError:
+                            value = val_p
+
+                    return name, operator_type[0].strip(), value
+        return (None, None, None)
+
+    df = pd.DataFrame.from_records(store_table_data)
+    for filter_part in table_filter.split(" && "):
+        col_name, operator, filter_value = _split_filter_part(filter_part)
+        if operator == "contains":
+            df = df.loc[df[col_name].str.contains(filter_value, regex=True)]
+        elif operator in ("eq", "ne", "lt", "le", "gt", "ge"):
+            # These operators match pandas series operator method names.
+            df = df.loc[getattr(df[col_name], operator)(filter_value)]
+        elif operator == "datestartswith":
+            # This is a simplification of the front-end filtering logic,
+            # only works with complete fields in standard format.
+            # Currently not used in comparison tables.
+            df = df.loc[df[col_name].str.startswith(filter_value)]
+
+    return df.to_dict("records")