CSIT-1041: Trending dashboard
[csit.git] / resources / tools / presentation / generator_CPTA.py
index 066bfbd..51787e4 100644 (file)
@@ -25,7 +25,7 @@ import numpy as np
 import pandas as pd
 
 from collections import OrderedDict
-from utils import find_outliers, archive_input_data, execute_command
+from utils import split_outliers, archive_input_data, execute_command
 
 
 # Command to build the html format of the report
@@ -164,24 +164,26 @@ def _evaluate_results(in_data, trimmed_data, window=10):
 
     if len(in_data) > 2:
         win_size = in_data.size if in_data.size < window else window
-        results = [0.0, ]
-        median = in_data.rolling(window=win_size).median()
+        results = [0.66, ]
+        median = trimmed_data.rolling(window=win_size, min_periods=2).median()
         stdev_t = trimmed_data.rolling(window=win_size, min_periods=2).std()
-        m_vals = median.values
-        s_vals = stdev_t.values
-        d_vals = in_data.values
-        for day in range(1, in_data.size):
-            if np.isnan(m_vals[day]) \
-                    or np.isnan(s_vals[day]) \
-                    or np.isnan(d_vals[day]):
+
+        first = True
+        for build_nr, value in in_data.iteritems():
+            if first:
+                first = False
+                continue
+            if np.isnan(trimmed_data[build_nr]) \
+                    or np.isnan(median[build_nr]) \
+                    or np.isnan(stdev_t[build_nr]) \
+                    or np.isnan(value):
                 results.append(0.0)
-            elif d_vals[day] < (m_vals[day] - 3 * s_vals[day]):
+            elif value < (median[build_nr] - 3 * stdev_t[build_nr]):
                 results.append(0.33)
-            elif (m_vals[day] - 3 * s_vals[day]) <= d_vals[day] <= \
-                    (m_vals[day] + 3 * s_vals[day]):
-                results.append(0.66)
-            else:
+            elif value > (median[build_nr] + 3 * stdev_t[build_nr]):
                 results.append(1.0)
+            else:
+                results.append(0.66)
     else:
         results = [0.0, ]
         try:
@@ -234,30 +236,24 @@ def _generate_trending_traces(in_data, build_info, period, moving_win_size=10,
         in_data = _select_data(in_data, period,
                                fill_missing=fill_missing,
                                use_first=use_first)
-    # try:
-    #     data_x = ["{0}/{1}".format(key, build_info[str(key)][1].split("~")[-1])
-    #               for key in in_data.keys()]
-    # except KeyError:
-    #     data_x = [key for key in in_data.keys()]
-    hover_text = ["vpp-build: {0}".format(x[1].split("~")[-1])
-                  for x in build_info.values()]
-    data_x = [key for key in in_data.keys()]
 
+    data_x = [key for key in in_data.keys()]
     data_y = [val for val in in_data.values()]
-    data_pd = pd.Series(data_y, index=data_x)
 
-    t_data, outliers = find_outliers(data_pd, outlier_const=1.5)
+    hover_text = list()
+    for idx in data_x:
+        hover_text.append("vpp-build: {0}".
+                          format(build_info[str(idx)][1].split("~")[-1]))
+
+    data_pd = pd.Series(data_y, index=data_x)
 
+    t_data, outliers = split_outliers(data_pd, outlier_const=1.5,
+                                      window=moving_win_size)
     results = _evaluate_results(data_pd, t_data, window=moving_win_size)
 
     anomalies = pd.Series()
     anomalies_res = list()
     for idx, item in enumerate(in_data.items()):
-        # item_pd = pd.Series([item[1], ],
-        #                     index=["{0}/{1}".
-        #                     format(item[0],
-        #                            build_info[str(item[0])][1].split("~")[-1]),
-        #                            ])
         item_pd = pd.Series([item[1], ], index=[item[0], ])
         if item[0] in outliers.keys():
             anomalies = anomalies.append(item_pd)
@@ -300,9 +296,9 @@ def _generate_trending_traces(in_data, build_info, period, moving_win_size=10,
         y=anomalies.values,
         mode='markers',
         hoverinfo="none",
-        showlegend=False,
+        showlegend=True,
         legendgroup=name,
-        name="{name}: outliers".format(name=name),
+        name="{name}-anomalies".format(name=name),
         marker={
             "size": 15,
             "symbol": "circle-open",
@@ -389,7 +385,7 @@ def _generate_all_charts(spec, input_data):
             builds_lst.append(str(build["build"]))
 
     # Get "build ID": "date" dict:
-    build_info = dict()
+    build_info = OrderedDict()
     for build in builds_lst:
         try:
             build_info[build] = (
@@ -398,6 +394,9 @@ def _generate_all_charts(spec, input_data):
             )
         except KeyError:
             build_info[build] = ("", "")
+        logging.info("{}: {}, {}".format(build,
+                                         build_info[build][0],
+                                         build_info[build][1]))
 
     # Create the header:
     csv_table = list()
@@ -438,13 +437,14 @@ def _generate_all_charts(spec, input_data):
             tst_lst = list()
             for build in builds_lst:
                 item = tst_data.get(int(build), '')
-                tst_lst.append(str(item) if item else '')
+                tst_lst.append(str(item))
+                # tst_lst.append(str(item) if item else '')
             csv_table.append("{0},".format(tst_name) + ",".join(tst_lst) + '\n')
 
         for period in chart["periods"]:
             # Generate traces:
             traces = list()
-            win_size = 10 if period == 1 else 5 if period < 20 else 3
+            win_size = 14 if period == 1 else 5 if period < 20 else 3
             idx = 0
             for test_name, test_data in chart_data.items():
                 if not test_data: