C-Dash: Fix anomaly detection for the news

author Tibor Frank <tifrank@cisco.com>

Mon, 24 Apr 2023 14:29:08 +0000 (16:29 +0200)

committer Tibor Frank <tifrank@cisco.com>

Mon, 24 Apr 2023 14:44:51 +0000 (16:44 +0200)
author Tibor Frank <tifrank@cisco.com>
Mon, 24 Apr 2023 14:29:08 +0000 (16:29 +0200)
committer Tibor Frank <tifrank@cisco.com>
Mon, 24 Apr 2023 14:44:51 +0000 (16:44 +0200)
diff --git a/csit.infra.dash/app/cdash/data/data.yaml b/csit.infra.dash/app/cdash/data/data.yaml

index 8beee0b..720109b 100644 (file)
--- a/csit.infra.dash/app/cdash/data/data.yaml
+++ b/csit.infra.dash/app/cdash/data/data.yaml
@@ -240,7 +240,6 @@
      # - result_latency_value
      - start_time
      - passed
-    - telemetry
      - test_id
      - version
  - data_type: coverage
diff --git a/csit.infra.dash/app/cdash/news/layout.py b/csit.infra.dash/app/cdash/news/layout.py

index da36b14..d8ad92a 100644 (file)
--- a/csit.infra.dash/app/cdash/news/layout.py
+++ b/csit.infra.dash/app/cdash/news/layout.py
@@ -24,7 +24,8 @@ from dash import callback_context
  from dash import Input, Output, State
  
  from ..utils.constants import Constants as C
-from ..utils.utils import classify_anomalies, gen_new_url
+from ..utils.utils import gen_new_url
+from ..utils.anomalies import classify_anomalies
  from ..utils.url_processing import url_decode
  from .tables import table_summary
  
@@ -132,15 +133,17 @@ class Layout:
  
              tests = df_job["test_id"].unique()
              for test in tests:
-                tst_data = df_job.loc[df_job["test_id"] == test].sort_values(
-                    by="start_time", ignore_index=True)
-                x_axis = tst_data["start_time"].tolist()
+                tst_data = df_job.loc[(
+                    (df_job["test_id"] == test) &
+                    (df_job["passed"] == True)
+                )].sort_values(by="start_time", ignore_index=True)
                  if "-ndrpdr" in test:
                      tst_data = tst_data.dropna(
                          subset=["result_pdr_lower_rate_value", ]
                      )
                      if tst_data.empty:
                          continue
+                    x_axis = tst_data["start_time"].tolist()
                      try:
                          anomalies, _, _ = classify_anomalies({
                              k: v for k, v in zip(
@@ -185,6 +188,7 @@ class Layout:
                      )
                      if tst_data.empty:
                          continue
+                    x_axis = tst_data["start_time"].tolist()
                      try:
                          anomalies, _, _ = classify_anomalies({
                              k: v for k, v in zip(
diff --git a/csit.infra.dash/app/cdash/trending/graphs.py b/csit.infra.dash/app/cdash/trending/graphs.py

index fc26f8b..ba94eef 100644 (file)
--- a/csit.infra.dash/app/cdash/trending/graphs.py
+++ b/csit.infra.dash/app/cdash/trending/graphs.py
@@ -18,7 +18,8 @@ import plotly.graph_objects as go
  import pandas as pd
  
  from ..utils.constants import Constants as C
-from ..utils.utils import classify_anomalies, get_color, get_hdrh_latencies
+from ..utils.utils import get_color, get_hdrh_latencies
+from ..utils.anomalies import classify_anomalies
  
  
  def select_trending_data(data: pd.DataFrame, itm: dict) -> pd.DataFrame:
diff --git a/csit.infra.dash/app/cdash/utils/anomalies.py b/csit.infra.dash/app/cdash/utils/anomalies.py

new file mode 100644 (file)

index 0000000..9a7b232
--- /dev/null
+++ b/csit.infra.dash/app/cdash/utils/anomalies.py
@@ -0,0 +1,69 @@
+# Copyright (c) 2023 Cisco and/or its affiliates.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Functions used by Dash applications to detect anomalies.
+"""
+
+from numpy import isnan
+
+from ..jumpavg import classify
+
+
+def classify_anomalies(data):
+    """Process the data and return anomalies and trending values.
+
+    Gather data into groups with average as trend value.
+    Decorate values within groups to be normal,
+    the first value of changed average as a regression, or a progression.
+
+    :param data: Full data set with unavailable samples replaced by nan.
+    :type data: OrderedDict
+    :returns: Classification and trend values
+    :rtype: 3-tuple, list of strings, list of floats and list of floats
+    """
+    # NaN means something went wrong.
+    # Use 0.0 to cause that being reported as a severe regression.
+    bare_data = [0.0 if isnan(sample) else sample for sample in data.values()]
+    # TODO: Make BitCountingGroupList a subclass of list again?
+    group_list = classify(bare_data).group_list
+    group_list.reverse()  # Just to use .pop() for FIFO.
+    classification = list()
+    avgs = list()
+    stdevs = list()
+    active_group = None
+    values_left = 0
+    avg = 0.0
+    stdv = 0.0
+    for sample in data.values():
+        if isnan(sample):
+            classification.append("outlier")
+            avgs.append(sample)
+            stdevs.append(sample)
+            continue
+        if values_left < 1 or active_group is None:
+            values_left = 0
+            while values_left < 1:  # Ignore empty groups (should not happen).
+                active_group = group_list.pop()
+                values_left = len(active_group.run_list)
+            avg = active_group.stats.avg
+            stdv = active_group.stats.stdev
+            classification.append(active_group.comment)
+            avgs.append(avg)
+            stdevs.append(stdv)
+            values_left -= 1
+            continue
+        classification.append("normal")
+        avgs.append(avg)
+        stdevs.append(stdv)
+        values_left -= 1
+    return classification, avgs, stdevs
diff --git a/csit.infra.dash/app/cdash/utils/utils.py b/csit.infra.dash/app/cdash/utils/utils.py

index d9347b1..29bee3d 100644 (file)
--- a/csit.infra.dash/app/cdash/utils/utils.py
+++ b/csit.infra.dash/app/cdash/utils/utils.py
@@ -11,7 +11,7 @@
  # See the License for the specific language governing permissions and
  # limitations under the License.
  
-"""Function used by Dash applications.
+"""Functions used by Dash applications.
  """
  
  import pandas as pd
@@ -22,65 +22,13 @@ import hdrh.histogram
  import hdrh.codec
  
  from math import sqrt
-from numpy import isnan
  from dash import dcc
  from datetime import datetime
  
-from ..jumpavg import classify
  from ..utils.constants import Constants as C
  from ..utils.url_processing import url_encode
  
  
-def classify_anomalies(data):
-    """Process the data and return anomalies and trending values.
-
-    Gather data into groups with average as trend value.
-    Decorate values within groups to be normal,
-    the first value of changed average as a regression, or a progression.
-
-    :param data: Full data set with unavailable samples replaced by nan.
-    :type data: OrderedDict
-    :returns: Classification and trend values
-    :rtype: 3-tuple, list of strings, list of floats and list of floats
-    """
-    # NaN means something went wrong.
-    # Use 0.0 to cause that being reported as a severe regression.
-    bare_data = [0.0 if isnan(sample) else sample for sample in data.values()]
-    # TODO: Make BitCountingGroupList a subclass of list again?
-    group_list = classify(bare_data).group_list
-    group_list.reverse()  # Just to use .pop() for FIFO.
-    classification = list()
-    avgs = list()
-    stdevs = list()
-    active_group = None
-    values_left = 0
-    avg = 0.0
-    stdv = 0.0
-    for sample in data.values():
-        if isnan(sample):
-            classification.append("outlier")
-            avgs.append(sample)
-            stdevs.append(sample)
-            continue
-        if values_left < 1 or active_group is None:
-            values_left = 0
-            while values_left < 1:  # Ignore empty groups (should not happen).
-                active_group = group_list.pop()
-                values_left = len(active_group.run_list)
-            avg = active_group.stats.avg
-            stdv = active_group.stats.stdev
-            classification.append(active_group.comment)
-            avgs.append(avg)
-            stdevs.append(stdv)
-            values_left -= 1
-            continue
-        classification.append("normal")
-        avgs.append(avg)
-        stdevs.append(stdv)
-        values_left -= 1
-    return classification, avgs, stdevs
-
-
  def get_color(idx: int) -> str:
      """Returns a color from the list defined in Constants.PLOT_COLORS defined by
      its index.
author	Tibor Frank <tifrank@cisco.com>
	Mon, 24 Apr 2023 14:29:08 +0000 (16:29 +0200)
committer	Tibor Frank <tifrank@cisco.com>
	Mon, 24 Apr 2023 14:44:51 +0000 (16:44 +0200)
csit.infra.dash/app/cdash/data/data.yaml		patch \| blob \| history
csit.infra.dash/app/cdash/news/layout.py		patch \| blob \| history
csit.infra.dash/app/cdash/trending/graphs.py		patch \| blob \| history
csit.infra.dash/app/cdash/utils/anomalies.py	[new file with mode: 0644]	patch \| blob
csit.infra.dash/app/cdash/utils/utils.py		patch \| blob \| history