C-Dash: Fix anomaly detection for the news
[csit.git] / csit.infra.dash / app / cdash / trending / graphs.py
index 79e2697..ba94eef 100644 (file)
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-"""
+"""Implementation of graphs for trending data.
 """
 
 import plotly.graph_objects as go
 import pandas as pd
 
-import hdrh.histogram
-import hdrh.codec
-
 from ..utils.constants import Constants as C
-from ..utils.utils import classify_anomalies, get_color
-
-
-def _get_hdrh_latencies(row: pd.Series, name: str) -> dict:
-    """Get the HDRH latencies from the test data.
-
-    :param row: A row fron the data frame with test data.
-    :param name: The test name to be displayed as the graph title.
-    :type row: pandas.Series
-    :type name: str
-    :returns: Dictionary with HDRH latencies.
-    :rtype: dict
-    """
-
-    latencies = {"name": name}
-    for key in C.LAT_HDRH:
-        try:
-            latencies[key] = row[key]
-        except KeyError:
-            return None
-
-    return latencies
+from ..utils.utils import get_color, get_hdrh_latencies
+from ..utils.anomalies import classify_anomalies
 
 
 def select_trending_data(data: pd.DataFrame, itm: dict) -> pd.DataFrame:
@@ -68,14 +45,19 @@ def select_trending_data(data: pd.DataFrame, itm: dict) -> pd.DataFrame:
     else:
         return None
 
-    core = str() if itm["dut"] == "trex" else f"{itm['core']}"
-    ttype = "ndrpdr" if itm["testtype"] in ("ndr", "pdr") else itm["testtype"]
-
+    if itm["testtype"] in ("ndr", "pdr"):
+        test_type = "ndrpdr"
+    elif itm["testtype"] == "mrr":
+        test_type = "mrr"
+    elif itm["area"] == "hoststack":
+        test_type = "hoststack"
     df = data.loc[(
-        (data["test_type"] == ttype) &
+        (data["test_type"] == test_type) &
         (data["passed"] == True)
     )]
     df = df[df.job.str.endswith(f"{topo}-{arch}")]
+    core = str() if itm["dut"] == "trex" else f"{itm['core']}"
+    ttype = "ndrpdr" if itm["testtype"] in ("ndr", "pdr") else itm["testtype"]
     df = df[df.test_id.str.contains(
         f"^.*[.|-]{nic}.*{itm['framesize']}-{core}-{drv}{itm['test']}-{ttype}$",
         regex=True
@@ -136,13 +118,14 @@ def graph_trending(
 
         df = df.dropna(subset=[C.VALUE[ttype], ])
         if df.empty:
-            return list()
+            return list(), list()
 
         x_axis = df["start_time"].tolist()
-        if ttype == "pdr-lat":
+        if ttype == "latency":
             y_data = [(v / norm_factor) for v in df[C.VALUE[ttype]].tolist()]
         else:
             y_data = [(v * norm_factor) for v in df[C.VALUE[ttype]].tolist()]
+        units = df[C.UNIT[ttype]].unique().tolist()
 
         anomalies, trend_avg, trend_stdev = classify_anomalies(
             {k: v for k, v in zip(x_axis, y_data)}
@@ -157,6 +140,7 @@ def graph_trending(
                 f"date: {row['start_time'].strftime('%Y-%m-%d %H:%M:%S')}<br>"
                 f"<prop> [{row[C.UNIT[ttype]]}]: {y_data[idx]:,.0f}<br>"
                 f"<stdev>"
+                f"<additional-info>"
                 f"{d_type}-ref: {row['dut_version']}<br>"
                 f"csit-ref: {row['job']}/{row['build']}<br>"
                 f"hosts: {', '.join(row['hosts'])}"
@@ -167,13 +151,22 @@ def graph_trending(
                     f"{row['result_receive_rate_rate_stdev']:,.0f}<br>"
                 )
             else:
-                stdev = ""
+                stdev = str()
+            if ttype in ("hoststack-cps", "hoststack-rps"):
+                add_info = (
+                    f"bandwidth [{row[C.UNIT['hoststack-bps']]}]: "
+                    f"{row[C.VALUE['hoststack-bps']]:,.0f}<br>"
+                    f"latency [{row[C.UNIT['hoststack-lat']]}]: "
+                    f"{row[C.VALUE['hoststack-lat']]:,.0f}<br>"
+                )
+            else:
+                add_info = str()
             hover_itm = hover_itm.replace(
-                "<prop>", "latency" if ttype == "pdr-lat" else "average"
-            ).replace("<stdev>", stdev)
+                "<prop>", "latency" if ttype == "latency" else "average"
+            ).replace("<stdev>", stdev).replace("<additional-info>", add_info)
             hover.append(hover_itm)
-            if ttype == "pdr-lat":
-                customdata_samples.append(_get_hdrh_latencies(row, name))
+            if ttype == "latency":
+                customdata_samples.append(get_hdrh_latencies(row, name))
                 customdata.append({"name": name})
             else:
                 customdata_samples.append(
@@ -186,13 +179,13 @@ def graph_trending(
             d_type = "trex" if row["dut_type"] == "none" else row["dut_type"]
             hover_itm = (
                 f"date: {row['start_time'].strftime('%Y-%m-%d %H:%M:%S')}<br>"
-                f"trend [pps]: {avg:,.0f}<br>"
-                f"stdev [pps]: {stdev:,.0f}<br>"
+                f"trend [{row[C.UNIT[ttype]]}]: {avg:,.0f}<br>"
+                f"stdev [{row[C.UNIT[ttype]]}]: {stdev:,.0f}<br>"
                 f"{d_type}-ref: {row['dut_version']}<br>"
                 f"csit-ref: {row['job']}/{row['build']}<br>"
                 f"hosts: {', '.join(row['hosts'])}"
             )
-            if ttype == "pdr-lat":
+            if ttype == "latency":
                 hover_itm = hover_itm.replace("[pps]", "[us]")
             hover_trend.append(hover_itm)
 
@@ -246,7 +239,7 @@ def graph_trending(
                         f"trend [pps]: {trend_avg[idx]:,.0f}<br>"
                         f"classification: {anomaly}"
                     )
-                    if ttype == "pdr-lat":
+                    if ttype == "latency":
                         hover_itm = hover_itm.replace("[pps]", "[us]")
                     hover.append(hover_itm)
             anomaly_color.extend([0.0, 0.5, 1.0])
@@ -266,7 +259,7 @@ def graph_trending(
                         "symbol": "circle-open",
                         "color": anomaly_color,
                         "colorscale": C.COLORSCALE_LAT \
-                            if ttype == "pdr-lat" else C.COLORSCALE_TPUT,
+                            if ttype == "latency" else C.COLORSCALE_TPUT,
                         "showscale": True,
                         "line": {
                             "width": 2
@@ -279,7 +272,7 @@ def graph_trending(
                             "tickmode": "array",
                             "tickvals": [0.167, 0.500, 0.833],
                             "ticktext": C.TICK_TEXT_LAT \
-                                if ttype == "pdr-lat" else C.TICK_TEXT_TPUT,
+                                if ttype == "latency" else C.TICK_TEXT_TPUT,
                             "ticks": "",
                             "ticklen": 0,
                             "tickangle": -90,
@@ -289,11 +282,12 @@ def graph_trending(
                 )
             )
 
-        return traces
+        return traces, units
 
 
     fig_tput = None
     fig_lat = None
+    y_units = set()
     for idx, itm in enumerate(sel):
         df = select_trending_data(data, itm)
         if df is None or df.empty:
@@ -306,106 +300,50 @@ def graph_trending(
                 if topo_arch else 1.0
         else:
             norm_factor = 1.0
-        traces = _generate_trending_traces(itm["testtype"], itm["id"], df,
-            get_color(idx), norm_factor)
+
+        if itm["area"] == "hoststack":
+            ttype = f"hoststack-{itm['testtype']}"
+        else:
+            ttype = itm["testtype"]
+
+        traces, units = _generate_trending_traces(
+            ttype,
+            itm["id"],
+            df,
+            get_color(idx),
+            norm_factor
+        )
         if traces:
             if not fig_tput:
                 fig_tput = go.Figure()
             fig_tput.add_traces(traces)
 
         if itm["testtype"] == "pdr":
-            traces = _generate_trending_traces("pdr-lat", itm["id"], df,
-                get_color(idx), norm_factor)
+            traces, _ = _generate_trending_traces(
+                "latency",
+                itm["id"],
+                df,
+                get_color(idx),
+                norm_factor
+            )
             if traces:
                 if not fig_lat:
                     fig_lat = go.Figure()
                 fig_lat.add_traces(traces)
 
+        y_units.update(units)
+
     if fig_tput:
-        fig_tput.update_layout(layout.get("plot-trending-tput", dict()))
+        fig_layout = layout.get("plot-trending-tput", dict())
+        fig_layout["yaxis"]["title"] = \
+            f"Throughput [{'|'.join(sorted(y_units))}]"
+        fig_tput.update_layout(fig_layout)
     if fig_lat:
         fig_lat.update_layout(layout.get("plot-trending-lat", dict()))
 
     return fig_tput, fig_lat
 
 
-def graph_hdrh_latency(data: dict, layout: dict) -> go.Figure:
-    """Generate HDR Latency histogram graphs.
-
-    :param data: HDRH data.
-    :param layout: Layout of plot.ly graph.
-    :type data: dict
-    :type layout: dict
-    :returns: HDR latency Histogram.
-    :rtype: plotly.graph_objects.Figure
-    """
-
-    fig = None
-
-    traces = list()
-    for idx, (lat_name, lat_hdrh) in enumerate(data.items()):
-        try:
-            decoded = hdrh.histogram.HdrHistogram.decode(lat_hdrh)
-        except (hdrh.codec.HdrLengthException, TypeError):
-            continue
-        previous_x = 0.0
-        prev_perc = 0.0
-        xaxis = list()
-        yaxis = list()
-        hovertext = list()
-        for item in decoded.get_recorded_iterator():
-            # The real value is "percentile".
-            # For 100%, we cut that down to "x_perc" to avoid
-            # infinity.
-            percentile = item.percentile_level_iterated_to
-            x_perc = min(percentile, C.PERCENTILE_MAX)
-            xaxis.append(previous_x)
-            yaxis.append(item.value_iterated_to)
-            hovertext.append(
-                f"<b>{C.GRAPH_LAT_HDRH_DESC[lat_name]}</b><br>"
-                f"Direction: {('W-E', 'E-W')[idx % 2]}<br>"
-                f"Percentile: {prev_perc:.5f}-{percentile:.5f}%<br>"
-                f"Latency: {item.value_iterated_to}uSec"
-            )
-            next_x = 100.0 / (100.0 - x_perc)
-            xaxis.append(next_x)
-            yaxis.append(item.value_iterated_to)
-            hovertext.append(
-                f"<b>{C.GRAPH_LAT_HDRH_DESC[lat_name]}</b><br>"
-                f"Direction: {('W-E', 'E-W')[idx % 2]}<br>"
-                f"Percentile: {prev_perc:.5f}-{percentile:.5f}%<br>"
-                f"Latency: {item.value_iterated_to}uSec"
-            )
-            previous_x = next_x
-            prev_perc = percentile
-
-        traces.append(
-            go.Scatter(
-                x=xaxis,
-                y=yaxis,
-                name=C.GRAPH_LAT_HDRH_DESC[lat_name],
-                mode="lines",
-                legendgroup=C.GRAPH_LAT_HDRH_DESC[lat_name],
-                showlegend=bool(idx % 2),
-                line=dict(
-                    color=get_color(int(idx/2)),
-                    dash="solid",
-                    width=1 if idx % 2 else 2
-                ),
-                hovertext=hovertext,
-                hoverinfo="text"
-            )
-        )
-    if traces:
-        fig = go.Figure()
-        fig.add_traces(traces)
-        layout_hdrh = layout.get("plot-hdrh-latency", None)
-        if lat_hdrh:
-            fig.update_layout(layout_hdrh)
-
-    return fig
-
-
 def graph_tm_trending(data: pd.DataFrame, layout: dict) -> list:
     """Generates one trending graph per test, each graph includes all selected
     metrics.
@@ -449,10 +387,33 @@ def graph_tm_trending(data: pd.DataFrame, layout: dict) -> list:
             y_data = [float(itm) for itm in df["tm_value"].tolist()]
             hover = list()
             for i, (_, row) in enumerate(df.iterrows()):
+                if row["test_type"] == "mrr":
+                    rate = (
+                        f"mrr avg [{row[C.UNIT['mrr']]}]: "
+                        f"{row[C.VALUE['mrr']]:,.0f}<br>"
+                        f"mrr stdev [{row[C.UNIT['mrr']]}]: "
+                        f"{row['result_receive_rate_rate_stdev']:,.0f}<br>"
+                    )
+                elif row["test_type"] == "ndrpdr":
+                    if "-pdr" in test:
+                        rate = (
+                            f"pdr [{row[C.UNIT['pdr']]}]: "
+                            f"{row[C.VALUE['pdr']]:,.0f}<br>"
+                        )
+                    elif "-ndr" in test:
+                        rate = (
+                            f"ndr [{row[C.UNIT['ndr']]}]: "
+                            f"{row[C.VALUE['ndr']]:,.0f}<br>"
+                        )
+                    else:
+                        rate = str()
+                else:
+                    rate = str()
                 hover.append(
                     f"date: "
                     f"{row['start_time'].strftime('%Y-%m-%d %H:%M:%S')}<br>"
                     f"value: {y_data[i]:,.0f}<br>"
+                    f"{rate}"
                     f"{row['dut_type']}-ref: {row['dut_version']}<br>"
                     f"csit-ref: {row['job']}/{row['build']}<br>"
                 )