C-Dash: URLs to jobs and builds
[csit.git] / csit.infra.dash / app / cdash / report / graphs.py
index 44c57d4..e13ec54 100644 (file)
 """Implementation of graphs for iterative data.
 """
 
 """Implementation of graphs for iterative data.
 """
 
-
 import plotly.graph_objects as go
 import pandas as pd
 
 from copy import deepcopy
 import plotly.graph_objects as go
 import pandas as pd
 
 from copy import deepcopy
+from numpy import percentile
 
 from ..utils.constants import Constants as C
 from ..utils.utils import get_color, get_hdrh_latencies
 
 from ..utils.constants import Constants as C
 from ..utils.utils import get_color, get_hdrh_latencies
@@ -51,6 +51,8 @@ def select_iterative_data(data: pd.DataFrame, itm:dict) -> pd.DataFrame:
         test_type = "ndrpdr"
     elif itm["testtype"] == "mrr":
         test_type = "mrr"
         test_type = "ndrpdr"
     elif itm["testtype"] == "mrr":
         test_type = "mrr"
+    elif itm["testtype"] == "soak":
+        test_type = "soak"
     elif itm["area"] == "hoststack":
         test_type = "hoststack"
     df = data.loc[(
     elif itm["area"] == "hoststack":
         test_type = "hoststack"
     df = data.loc[(
@@ -74,7 +76,7 @@ def select_iterative_data(data: pd.DataFrame, itm:dict) -> pd.DataFrame:
 
 
 def graph_iterative(data: pd.DataFrame, sel: list, layout: dict,
 
 
 def graph_iterative(data: pd.DataFrame, sel: list, layout: dict,
-        normalize: bool=False) -> tuple:
+        normalize: bool=False, remove_outliers: bool=False) -> tuple:
     """Generate the statistical box graph with iterative data (MRR, NDR and PDR,
     for PDR also Latencies).
 
     """Generate the statistical box graph with iterative data (MRR, NDR and PDR,
     for PDR also Latencies).
 
@@ -83,23 +85,38 @@ def graph_iterative(data: pd.DataFrame, sel: list, layout: dict,
     :param layout: Layout of plot.ly graph.
     :param normalize: If True, the data is normalized to CPU frequency
         Constants.NORM_FREQUENCY.
     :param layout: Layout of plot.ly graph.
     :param normalize: If True, the data is normalized to CPU frequency
         Constants.NORM_FREQUENCY.
-    :param data: pandas.DataFrame
-    :param sel: list
-    :param layout: dict
-    :param normalize: bool
+    :param remove_outliers: If True the outliers are removed before
+        generating the table.
+    :type data: pandas.DataFrame
+    :type sel: list
+    :type layout: dict
+    :type normalize: bool
+    :type remove_outliers: bool
     :returns: Tuple of graphs - throughput and latency.
     :rtype: tuple(plotly.graph_objects.Figure, plotly.graph_objects.Figure)
     """
 
     :returns: Tuple of graphs - throughput and latency.
     :rtype: tuple(plotly.graph_objects.Figure, plotly.graph_objects.Figure)
     """
 
-    def get_y_values(data, y_data_max, param, norm_factor, release=str()):
+    def get_y_values(data, y_data_max, param, norm_factor, release=str(),
+                     remove_outliers=False):
         if param == "result_receive_rate_rate_values":
         if param == "result_receive_rate_rate_values":
-            if release == "rls2402":
+            if release in ("rls2402", "rls2406", "rls2410"):
                 y_vals_raw = data["result_receive_rate_rate_avg"].to_list()
             else:
                 y_vals_raw = data[param].to_list()[0]
         else:
             y_vals_raw = data[param].to_list()
         y_data = [(y * norm_factor) for y in y_vals_raw]
                 y_vals_raw = data["result_receive_rate_rate_avg"].to_list()
             else:
                 y_vals_raw = data[param].to_list()[0]
         else:
             y_vals_raw = data[param].to_list()
         y_data = [(y * norm_factor) for y in y_vals_raw]
+
+        if remove_outliers:
+            try:
+                q1 = percentile(y_data, 25, method=C.COMP_PERCENTILE_METHOD)
+                q3 = percentile(y_data, 75, method=C.COMP_PERCENTILE_METHOD)
+                irq = q3 - q1
+                lif = q1 - C.COMP_OUTLIER_TYPE * irq
+                uif = q3 + C.COMP_OUTLIER_TYPE * irq
+                y_data = [i for i in y_data if i >= lif and i <= uif]
+            except TypeError:
+                pass
         try:
             y_data_max = max(max(y_data), y_data_max)
         except TypeError:
         try:
             y_data_max = max(max(y_data), y_data_max)
         except TypeError:
@@ -142,7 +159,12 @@ def graph_iterative(data: pd.DataFrame, sel: list, layout: dict,
         y_units.update(itm_data[C.UNIT[ttype]].unique().tolist())
 
         y_data, y_tput_max = get_y_values(
         y_units.update(itm_data[C.UNIT[ttype]].unique().tolist())
 
         y_data, y_tput_max = get_y_values(
-            itm_data, y_tput_max, C.VALUE_ITER[ttype], norm_factor, itm["rls"]
+            itm_data,
+            y_tput_max,
+            C.VALUE_ITER[ttype],
+            norm_factor,
+            itm["rls"],
+            remove_outliers
         )
 
         nr_of_samples = len(y_data)
         )
 
         nr_of_samples = len(y_data)
@@ -159,7 +181,7 @@ def graph_iterative(data: pd.DataFrame, sel: list, layout: dict,
             )
         }
 
             )
         }
 
-        if itm["testtype"] == "mrr" and itm["rls"] in ("rls2306", "rls2310"):
+        if itm["testtype"] == "mrr" and itm["rls"] == "rls2310":
             trial_run = "trial"
             metadata["csit-ref"] = (
                 f"{itm_data['job'].to_list()[0]}/",
             trial_run = "trial"
             metadata["csit-ref"] = (
                 f"{itm_data['job'].to_list()[0]}/",
@@ -170,6 +192,10 @@ def graph_iterative(data: pd.DataFrame, sel: list, layout: dict,
             trial_run = "run"
             for _, row in itm_data.iterrows():
                 metadata["csit-ref"] = f"{row['job']}/{row['build']}"
             trial_run = "run"
             for _, row in itm_data.iterrows():
                 metadata["csit-ref"] = f"{row['job']}/{row['build']}"
+                try:
+                    metadata["hosts"] = ", ".join(row["hosts"])
+                except (KeyError, TypeError):
+                    pass
                 customdata.append({"metadata": deepcopy(metadata)})
         tput_kwargs = dict(
             y=y_data,
                 customdata.append({"metadata": deepcopy(metadata)})
         tput_kwargs = dict(
             y=y_data,
@@ -187,12 +213,13 @@ def graph_iterative(data: pd.DataFrame, sel: list, layout: dict,
         )
         tput_traces.append(go.Box(**tput_kwargs))
 
         )
         tput_traces.append(go.Box(**tput_kwargs))
 
-        if ttype in ("ndr", "pdr", "mrr"):
+        if ttype in C.TESTS_WITH_BANDWIDTH:
             y_band, y_band_max = get_y_values(
                 itm_data,
                 y_band_max,
                 C.VALUE_ITER[f"{ttype}-bandwidth"],
             y_band, y_band_max = get_y_values(
                 itm_data,
                 y_band_max,
                 C.VALUE_ITER[f"{ttype}-bandwidth"],
-                norm_factor
+                norm_factor,
+                remove_outliers=remove_outliers
             )
             if not all(pd.isna(y_band)):
                 y_band_units.update(
             )
             if not all(pd.isna(y_band)):
                 y_band_units.update(
@@ -216,12 +243,13 @@ def graph_iterative(data: pd.DataFrame, sel: list, layout: dict,
                 x_band.append(idx + 1)
                 band_traces.append(go.Box(**band_kwargs))
 
                 x_band.append(idx + 1)
                 band_traces.append(go.Box(**band_kwargs))
 
-        if ttype == "pdr":
+        if ttype in C.TESTS_WITH_LATENCY:
             y_lat, y_lat_max = get_y_values(
                 itm_data,
                 y_lat_max,
                 C.VALUE_ITER["latency"],
             y_lat, y_lat_max = get_y_values(
                 itm_data,
                 y_lat_max,
                 C.VALUE_ITER["latency"],
-                1 / norm_factor
+                1 / norm_factor,
+                remove_outliers=remove_outliers
             )
             if not all(pd.isna(y_lat)):
                 customdata = list()
             )
             if not all(pd.isna(y_lat)):
                 customdata = list()
@@ -259,7 +287,7 @@ def graph_iterative(data: pd.DataFrame, sel: list, layout: dict,
         pl_tput["xaxis"]["ticktext"] = [str(i + 1) for i in range(len(sel))]
         pl_tput["yaxis"]["title"] = f"Throughput [{'|'.join(sorted(y_units))}]"
         if y_tput_max:
         pl_tput["xaxis"]["ticktext"] = [str(i + 1) for i in range(len(sel))]
         pl_tput["yaxis"]["title"] = f"Throughput [{'|'.join(sorted(y_units))}]"
         if y_tput_max:
-            pl_tput["yaxis"]["range"] = [0, int(y_tput_max) + 2e6]
+            pl_tput["yaxis"]["range"] = [0, int(y_tput_max) * 1.1]
         fig_tput = go.Figure(data=tput_traces, layout=pl_tput)
 
     if band_traces:
         fig_tput = go.Figure(data=tput_traces, layout=pl_tput)
 
     if band_traces:
@@ -269,7 +297,7 @@ def graph_iterative(data: pd.DataFrame, sel: list, layout: dict,
         pl_band["yaxis"]["title"] = \
             f"Bandwidth [{'|'.join(sorted(y_band_units))}]"
         if y_band_max:
         pl_band["yaxis"]["title"] = \
             f"Bandwidth [{'|'.join(sorted(y_band_units))}]"
         if y_band_max:
-            pl_band["yaxis"]["range"] = [0, int(y_band_max) + 2e9]
+            pl_band["yaxis"]["range"] = [0, int(y_band_max) * 1.1]
         fig_band = go.Figure(data=band_traces, layout=pl_band)
 
     if lat_traces:
         fig_band = go.Figure(data=band_traces, layout=pl_band)
 
     if lat_traces: