UTI: Move common functions to utils.
[csit.git] / resources / tools / dash / app / pal / trending / graphs.py
index 6e0bcb5..4cd8285 100644 (file)
 
 import plotly.graph_objects as go
 import pandas as pd
-import re
 
 import hdrh.histogram
 import hdrh.codec
 
 from datetime import datetime
-from numpy import isnan
-
-from ..jumpavg import classify
-
-
-_COLORS = (
-    u"#1A1110", u"#DA2647", u"#214FC6", u"#01786F", u"#BD8260", u"#FFD12A",
-    u"#A6E7FF", u"#738276", u"#C95A49", u"#FC5A8D", u"#CEC8EF", u"#391285",
-    u"#6F2DA8", u"#FF878D", u"#45A27D", u"#FFD0B9", u"#FD5240", u"#DB91EF",
-    u"#44D7A8", u"#4F86F7", u"#84DE02", u"#FFCFF1", u"#614051"
-)
-_ANOMALY_COLOR = {
-    u"regression": 0.0,
-    u"normal": 0.5,
-    u"progression": 1.0
-}
-_COLORSCALE_TPUT = [
-    [0.00, u"red"],
-    [0.33, u"red"],
-    [0.33, u"white"],
-    [0.66, u"white"],
-    [0.66, u"green"],
-    [1.00, u"green"]
-]
-_TICK_TEXT_TPUT = [u"Regression", u"Normal", u"Progression"]
-_COLORSCALE_LAT = [
-    [0.00, u"green"],
-    [0.33, u"green"],
-    [0.33, u"white"],
-    [0.66, u"white"],
-    [0.66, u"red"],
-    [1.00, u"red"]
-]
-_TICK_TEXT_LAT = [u"Progression", u"Normal", u"Regression"]
-_VALUE = {
-    "mrr": "result_receive_rate_rate_avg",
-    "ndr": "result_ndr_lower_rate_value",
-    "pdr": "result_pdr_lower_rate_value",
-    "pdr-lat": "result_latency_forward_pdr_50_avg"
-}
-_UNIT = {
-    "mrr": "result_receive_rate_rate_unit",
-    "ndr": "result_ndr_lower_rate_unit",
-    "pdr": "result_pdr_lower_rate_unit",
-    "pdr-lat": "result_latency_forward_pdr_50_unit"
-}
-_LAT_HDRH = (  # Do not change the order
-    "result_latency_forward_pdr_0_hdrh",
-    "result_latency_reverse_pdr_0_hdrh",
-    "result_latency_forward_pdr_10_hdrh",
-    "result_latency_reverse_pdr_10_hdrh",
-    "result_latency_forward_pdr_50_hdrh",
-    "result_latency_reverse_pdr_50_hdrh",
-    "result_latency_forward_pdr_90_hdrh",
-    "result_latency_reverse_pdr_90_hdrh",
-)
-# This value depends on latency stream rate (9001 pps) and duration (5s).
-# Keep it slightly higher to ensure rounding errors to not remove tick mark.
-PERCENTILE_MAX = 99.999501
-
-_GRAPH_LAT_HDRH_DESC = {
-    u"result_latency_forward_pdr_0_hdrh": u"No-load.",
-    u"result_latency_reverse_pdr_0_hdrh": u"No-load.",
-    u"result_latency_forward_pdr_10_hdrh": u"Low-load, 10% PDR.",
-    u"result_latency_reverse_pdr_10_hdrh": u"Low-load, 10% PDR.",
-    u"result_latency_forward_pdr_50_hdrh": u"Mid-load, 50% PDR.",
-    u"result_latency_reverse_pdr_50_hdrh": u"Mid-load, 50% PDR.",
-    u"result_latency_forward_pdr_90_hdrh": u"High-load, 90% PDR.",
-    u"result_latency_reverse_pdr_90_hdrh": u"High-load, 90% PDR."
-}
+
+from ..utils.constants import Constants as C
+from ..utils.utils import classify_anomalies, get_color
 
 
 def _get_hdrh_latencies(row: pd.Series, name: str) -> dict:
@@ -99,7 +31,7 @@ def _get_hdrh_latencies(row: pd.Series, name: str) -> dict:
     """
 
     latencies = {"name": name}
-    for key in _LAT_HDRH:
+    for key in C.LAT_HDRH:
         try:
             latencies[key] = row[key]
         except KeyError:
@@ -108,56 +40,6 @@ def _get_hdrh_latencies(row: pd.Series, name: str) -> dict:
     return latencies
 
 
-def _classify_anomalies(data):
-    """Process the data and return anomalies and trending values.
-
-    Gather data into groups with average as trend value.
-    Decorate values within groups to be normal,
-    the first value of changed average as a regression, or a progression.
-
-    :param data: Full data set with unavailable samples replaced by nan.
-    :type data: OrderedDict
-    :returns: Classification and trend values
-    :rtype: 3-tuple, list of strings, list of floats and list of floats
-    """
-    # NaN means something went wrong.
-    # Use 0.0 to cause that being reported as a severe regression.
-    bare_data = [0.0 if isnan(sample) else sample for sample in data.values()]
-    # TODO: Make BitCountingGroupList a subclass of list again?
-    group_list = classify(bare_data).group_list
-    group_list.reverse()  # Just to use .pop() for FIFO.
-    classification = list()
-    avgs = list()
-    stdevs = list()
-    active_group = None
-    values_left = 0
-    avg = 0.0
-    stdv = 0.0
-    for sample in data.values():
-        if isnan(sample):
-            classification.append(u"outlier")
-            avgs.append(sample)
-            stdevs.append(sample)
-            continue
-        if values_left < 1 or active_group is None:
-            values_left = 0
-            while values_left < 1:  # Ignore empty groups (should not happen).
-                active_group = group_list.pop()
-                values_left = len(active_group.run_list)
-            avg = active_group.stats.avg
-            stdv = active_group.stats.stdev
-            classification.append(active_group.comment)
-            avgs.append(avg)
-            stdevs.append(stdv)
-            values_left -= 1
-            continue
-        classification.append(u"normal")
-        avgs.append(avg)
-        stdevs.append(stdv)
-        values_left -= 1
-    return classification, avgs, stdevs
-
-
 def select_trending_data(data: pd.DataFrame, itm:dict) -> pd.DataFrame:
     """
     """
@@ -165,64 +47,80 @@ def select_trending_data(data: pd.DataFrame, itm:dict) -> pd.DataFrame:
     phy = itm["phy"].split("-")
     if len(phy) == 4:
         topo, arch, nic, drv = phy
-        if drv in ("dpdk", "ixgbe"):
+        if drv == "dpdk":
             drv = ""
         else:
             drv += "-"
             drv = drv.replace("_", "-")
     else:
         return None
-    cadence = \
-        "weekly" if (arch == "aws" or itm["testtype"] != "mrr") else "daily"
-    sel_topo_arch = (
-        f"csit-vpp-perf-"
-        f"{itm['testtype'] if itm['testtype'] == 'mrr' else 'ndrpdr'}-"
-        f"{cadence}-master-{topo}-{arch}"
-    )
-    df_sel = data.loc[(data["job"] == sel_topo_arch)]
-    regex = (
-        f"^.*{nic}.*\.{itm['framesize']}-{itm['core']}-{drv}{itm['test']}-"
-        f"{'mrr' if itm['testtype'] == 'mrr' else 'ndrpdr'}$"
-    )
-    df = df_sel.loc[
-        df_sel["test_id"].apply(
-            lambda x: True if re.search(regex, x) else False
-        )
-    ].sort_values(by="start_time", ignore_index=True)
+
+    core = str() if itm["dut"] == "trex" else f"{itm['core']}"
+    ttype = "ndrpdr" if itm["testtype"] in ("ndr", "pdr") else itm["testtype"]
+    dut_v100 = "none" if itm["dut"] == "trex" else itm["dut"]
+    dut_v101 = itm["dut"]
+
+    df = data.loc[(
+        (
+            (
+                (data["version"] == "1.0.0") &
+                (data["dut_type"].str.lower() == dut_v100)
+            ) |
+            (
+                (data["version"] == "1.0.1") &
+                (data["dut_type"].str.lower() == dut_v101)
+            )
+        ) &
+        (data["test_type"] == ttype) &
+        (data["passed"] == True)
+    )]
+    df = df[df.job.str.endswith(f"{topo}-{arch}")]
+    df = df[df.test_id.str.contains(
+        f"^.*[.|-]{nic}.*{itm['framesize']}-{core}-{drv}{itm['test']}-{ttype}$",
+        regex=True
+    )].sort_values(by="start_time", ignore_index=True)
 
     return df
 
 
 def _generate_trending_traces(ttype: str, name: str, df: pd.DataFrame,
-    start: datetime, end: datetime, color: str) -> list:
+    start: datetime, end: datetime, color: str, norm_factor: float) -> list:
     """
     """
 
-    df = df.dropna(subset=[_VALUE[ttype], ])
+    df = df.dropna(subset=[C.VALUE[ttype], ])
+    if df.empty:
+        return list()
+    df = df.loc[((df["start_time"] >= start) & (df["start_time"] <= end))]
     if df.empty:
         return list()
 
-    x_axis = [d for d in df["start_time"] if d >= start and d <= end]
+    x_axis = df["start_time"].tolist()
+    if ttype == "pdr-lat":
+        y_data = [(itm / norm_factor) for itm in df[C.VALUE[ttype]].tolist()]
+    else:
+        y_data = [(itm * norm_factor) for itm in df[C.VALUE[ttype]].tolist()]
 
-    anomalies, trend_avg, trend_stdev = _classify_anomalies(
-        {k: v for k, v in zip(x_axis, df[_VALUE[ttype]])}
+    anomalies, trend_avg, trend_stdev = classify_anomalies(
+        {k: v for k, v in zip(x_axis, y_data)}
     )
 
     hover = list()
     customdata = list()
-    for _, row in df.iterrows():
+    for idx, (_, row) in enumerate(df.iterrows()):
+        d_type = "trex" if row["dut_type"] == "none" else row["dut_type"]
         hover_itm = (
-            f"date: {row['start_time'].strftime('%d-%m-%Y %H:%M:%S')}<br>"
-            f"<prop> [{row[_UNIT[ttype]]}]: {row[_VALUE[ttype]]}<br>"
+            f"date: {row['start_time'].strftime('%Y-%m-%d %H:%M:%S')}<br>"
+            f"<prop> [{row[C.UNIT[ttype]]}]: {y_data[idx]:,.0f}<br>"
             f"<stdev>"
-            f"{row['dut_type']}-ref: {row['dut_version']}<br>"
+            f"{d_type}-ref: {row['dut_version']}<br>"
             f"csit-ref: {row['job']}/{row['build']}<br>"
             f"hosts: {', '.join(row['hosts'])}"
         )
         if ttype == "mrr":
             stdev = (
                 f"stdev [{row['result_receive_rate_rate_unit']}]: "
-                f"{row['result_receive_rate_rate_stdev']}<br>"
+                f"{row['result_receive_rate_rate_stdev']:,.0f}<br>"
             )
         else:
             stdev = ""
@@ -235,11 +133,12 @@ def _generate_trending_traces(ttype: str, name: str, df: pd.DataFrame,
 
     hover_trend = list()
     for avg, stdev, (_, row) in zip(trend_avg, trend_stdev, df.iterrows()):
+        d_type = "trex" if row["dut_type"] == "none" else row["dut_type"]
         hover_itm = (
-            f"date: {row['start_time'].strftime('%d-%m-%Y %H:%M:%S')}<br>"
-            f"trend [pps]: {avg}<br>"
-            f"stdev [pps]: {stdev}<br>"
-            f"{row['dut_type']}-ref: {row['dut_version']}<br>"
+            f"date: {row['start_time'].strftime('%Y-%m-%d %H:%M:%S')}<br>"
+            f"trend [pps]: {avg:,.0f}<br>"
+            f"stdev [pps]: {stdev:,.0f}<br>"
+            f"{d_type}-ref: {row['dut_version']}<br>"
             f"csit-ref: {row['job']}/{row['build']}<br>"
             f"hosts: {', '.join(row['hosts'])}"
         )
@@ -250,16 +149,16 @@ def _generate_trending_traces(ttype: str, name: str, df: pd.DataFrame,
     traces = [
         go.Scatter(  # Samples
             x=x_axis,
-            y=df[_VALUE[ttype]],
+            y=y_data,
             name=name,
             mode="markers",
             marker={
-                u"size": 5,
-                u"color": color,
-                u"symbol": u"circle",
+                "size": 5,
+                "color": color,
+                "symbol": "circle",
             },
             text=hover,
-            hoverinfo=u"text+name",
+            hoverinfo="text+name",
             showlegend=True,
             legendgroup=name,
             customdata=customdata
@@ -270,12 +169,12 @@ def _generate_trending_traces(ttype: str, name: str, df: pd.DataFrame,
             name=name,
             mode="lines",
             line={
-                u"shape": u"linear",
-                u"width": 1,
-                u"color": color,
+                "shape": "linear",
+                "width": 1,
+                "color": color,
             },
             text=hover_trend,
-            hoverinfo=u"text+name",
+            hoverinfo="text+name",
             showlegend=False,
             legendgroup=name,
         )
@@ -287,13 +186,13 @@ def _generate_trending_traces(ttype: str, name: str, df: pd.DataFrame,
         anomaly_color = list()
         hover = list()
         for idx, anomaly in enumerate(anomalies):
-            if anomaly in (u"regression", u"progression"):
+            if anomaly in ("regression", "progression"):
                 anomaly_x.append(x_axis[idx])
                 anomaly_y.append(trend_avg[idx])
-                anomaly_color.append(_ANOMALY_COLOR[anomaly])
+                anomaly_color.append(C.ANOMALY_COLOR[anomaly])
                 hover_itm = (
-                    f"date: {x_axis[idx].strftime('%d-%m-%Y %H:%M:%S')}<br>"
-                    f"trend [pps]: {trend_avg[idx]}<br>"
+                    f"date: {x_axis[idx].strftime('%Y-%m-%d %H:%M:%S')}<br>"
+                    f"trend [pps]: {trend_avg[idx]:,.0f}<br>"
                     f"classification: {anomaly}"
                 )
                 if ttype == "pdr-lat":
@@ -304,38 +203,35 @@ def _generate_trending_traces(ttype: str, name: str, df: pd.DataFrame,
             go.Scatter(
                 x=anomaly_x,
                 y=anomaly_y,
-                mode=u"markers",
+                mode="markers",
                 text=hover,
-                hoverinfo=u"text+name",
+                hoverinfo="text+name",
                 showlegend=False,
                 legendgroup=name,
                 name=name,
                 marker={
-                    u"size": 15,
-                    u"symbol": u"circle-open",
-                    u"color": anomaly_color,
-                    u"colorscale": _COLORSCALE_LAT \
-                        if ttype == "pdr-lat" else _COLORSCALE_TPUT,
-                    u"showscale": True,
-                    u"line": {
-                        u"width": 2
+                    "size": 15,
+                    "symbol": "circle-open",
+                    "color": anomaly_color,
+                    "colorscale": C.COLORSCALE_LAT \
+                        if ttype == "pdr-lat" else C.COLORSCALE_TPUT,
+                    "showscale": True,
+                    "line": {
+                        "width": 2
                     },
-                    u"colorbar": {
-                        u"y": 0.5,
-                        u"len": 0.8,
-                        u"title": u"Circles Marking Data Classification",
-                        u"titleside": u"right",
-                        # u"titlefont": {
-                        #     u"size": 14
-                        # },
-                        u"tickmode": u"array",
-                        u"tickvals": [0.167, 0.500, 0.833],
-                        u"ticktext": _TICK_TEXT_LAT \
-                            if ttype == "pdr-lat" else _TICK_TEXT_TPUT,
-                        u"ticks": u"",
-                        u"ticklen": 0,
-                        u"tickangle": -90,
-                        u"thickness": 10
+                    "colorbar": {
+                        "y": 0.5,
+                        "len": 0.8,
+                        "title": "Circles Marking Data Classification",
+                        "titleside": "right",
+                        "tickmode": "array",
+                        "tickvals": [0.167, 0.500, 0.833],
+                        "ticktext": C.TICK_TEXT_LAT \
+                            if ttype == "pdr-lat" else C.TICK_TEXT_TPUT,
+                        "ticks": "",
+                        "ticklen": 0,
+                        "tickangle": -90,
+                        "thickness": 10
                     }
                 }
             )
@@ -345,7 +241,7 @@ def _generate_trending_traces(ttype: str, name: str, df: pd.DataFrame,
 
 
 def graph_trending(data: pd.DataFrame, sel:dict, layout: dict,
-    start: datetime, end: datetime) -> tuple:
+    start: datetime, end: datetime, normalize: bool) -> tuple:
     """
     """
 
@@ -357,16 +253,20 @@ def graph_trending(data: pd.DataFrame, sel:dict, layout: dict,
     for idx, itm in enumerate(sel):
 
         df = select_trending_data(data, itm)
-        if df is None:
+        if df is None or df.empty:
             continue
 
-        name = (
-            f"{itm['phy']}-{itm['framesize']}-{itm['core']}-"
-            f"{itm['test']}-{itm['testtype']}"
-        )
-
+        name = "-".join((itm["dut"], itm["phy"], itm["framesize"], itm["core"],
+            itm["test"], itm["testtype"], ))
+        if normalize:
+            phy = itm["phy"].split("-")
+            topo_arch = f"{phy[0]}-{phy[1]}" if len(phy) == 4 else str()
+            norm_factor = (C.NORM_FREQUENCY / C.FREQUENCY[topo_arch]) \
+                if topo_arch else 1.0
+        else:
+            norm_factor = 1.0
         traces = _generate_trending_traces(
-            itm["testtype"], name, df, start, end, _COLORS[idx % len(_COLORS)]
+            itm["testtype"], name, df, start, end, get_color(idx), norm_factor
         )
         if traces:
             if not fig_tput:
@@ -375,7 +275,7 @@ def graph_trending(data: pd.DataFrame, sel:dict, layout: dict,
 
         if itm["testtype"] == "pdr":
             traces = _generate_trending_traces(
-                "pdr-lat", name, df, start, end, _COLORS[idx % len(_COLORS)]
+                "pdr-lat", name, df, start, end, get_color(idx), norm_factor
             )
             if traces:
                 if not fig_lat:
@@ -412,12 +312,12 @@ def graph_hdrh_latency(data: dict, layout: dict) -> go.Figure:
             # For 100%, we cut that down to "x_perc" to avoid
             # infinity.
             percentile = item.percentile_level_iterated_to
-            x_perc = min(percentile, PERCENTILE_MAX)
+            x_perc = min(percentile, C.PERCENTILE_MAX)
             xaxis.append(previous_x)
             yaxis.append(item.value_iterated_to)
             hovertext.append(
-                f"<b>{_GRAPH_LAT_HDRH_DESC[lat_name]}</b><br>"
-                f"Direction: {(u'W-E', u'E-W')[idx % 2]}<br>"
+                f"<b>{C.GRAPH_LAT_HDRH_DESC[lat_name]}</b><br>"
+                f"Direction: {('W-E', 'E-W')[idx % 2]}<br>"
                 f"Percentile: {prev_perc:.5f}-{percentile:.5f}%<br>"
                 f"Latency: {item.value_iterated_to}uSec"
             )
@@ -425,8 +325,8 @@ def graph_hdrh_latency(data: dict, layout: dict) -> go.Figure:
             xaxis.append(next_x)
             yaxis.append(item.value_iterated_to)
             hovertext.append(
-                f"<b>{_GRAPH_LAT_HDRH_DESC[lat_name]}</b><br>"
-                f"Direction: {(u'W-E', u'E-W')[idx % 2]}<br>"
+                f"<b>{C.GRAPH_LAT_HDRH_DESC[lat_name]}</b><br>"
+                f"Direction: {('W-E', 'E-W')[idx % 2]}<br>"
                 f"Percentile: {prev_perc:.5f}-{percentile:.5f}%<br>"
                 f"Latency: {item.value_iterated_to}uSec"
             )
@@ -437,17 +337,17 @@ def graph_hdrh_latency(data: dict, layout: dict) -> go.Figure:
             go.Scatter(
                 x=xaxis,
                 y=yaxis,
-                name=_GRAPH_LAT_HDRH_DESC[lat_name],
-                mode=u"lines",
-                legendgroup=_GRAPH_LAT_HDRH_DESC[lat_name],
+                name=C.GRAPH_LAT_HDRH_DESC[lat_name],
+                mode="lines",
+                legendgroup=C.GRAPH_LAT_HDRH_DESC[lat_name],
                 showlegend=bool(idx % 2),
                 line=dict(
-                    color=_COLORS[int(idx/2)],
-                    dash=u"solid",
+                    color=get_color(int(idx/2)),
+                    dash="solid",
                     width=1 if idx % 2 else 2
                 ),
                 hovertext=hovertext,
-                hoverinfo=u"text"
+                hoverinfo="text"
             )
         )
     if traces: