CSIT-1041: Trending dashboard
[csit.git] / resources / tools / presentation / generator_CPTA.py
index 69a8df6..51787e4 100644 (file)
@@ -25,7 +25,7 @@ import numpy as np
 import pandas as pd
 
 from collections import OrderedDict
-from utils import find_outliers, archive_input_data, execute_command
+from utils import split_outliers, archive_input_data, execute_command
 
 
 # Command to build the html format of the report
@@ -164,22 +164,26 @@ def _evaluate_results(in_data, trimmed_data, window=10):
 
     if len(in_data) > 2:
         win_size = in_data.size if in_data.size < window else window
-        results = [0.0, ] * win_size
-        median = in_data.rolling(window=win_size).median()
+        results = [0.66, ]
+        median = trimmed_data.rolling(window=win_size, min_periods=2).median()
         stdev_t = trimmed_data.rolling(window=win_size, min_periods=2).std()
-        m_vals = median.values
-        s_vals = stdev_t.values
-        d_vals = in_data.values
-        for day in range(win_size, in_data.size):
-            if np.isnan(m_vals[day - 1]) or np.isnan(s_vals[day - 1]):
+
+        first = True
+        for build_nr, value in in_data.iteritems():
+            if first:
+                first = False
+                continue
+            if np.isnan(trimmed_data[build_nr]) \
+                    or np.isnan(median[build_nr]) \
+                    or np.isnan(stdev_t[build_nr]) \
+                    or np.isnan(value):
                 results.append(0.0)
-            elif d_vals[day] < (m_vals[day - 1] - 3 * s_vals[day - 1]):
+            elif value < (median[build_nr] - 3 * stdev_t[build_nr]):
                 results.append(0.33)
-            elif (m_vals[day - 1] - 3 * s_vals[day - 1]) <= d_vals[day] <= \
-                    (m_vals[day - 1] + 3 * s_vals[day - 1]):
-                results.append(0.66)
-            else:
+            elif value > (median[build_nr] + 3 * stdev_t[build_nr]):
                 results.append(1.0)
+            else:
+                results.append(0.66)
     else:
         results = [0.0, ]
         try:
@@ -197,7 +201,7 @@ def _evaluate_results(in_data, trimmed_data, window=10):
     return results
 
 
-def _generate_trending_traces(in_data, period, moving_win_size=10,
+def _generate_trending_traces(in_data, build_info, period, moving_win_size=10,
                               fill_missing=True, use_first=False,
                               show_moving_median=True, name="", color=""):
     """Generate the trending traces:
@@ -206,6 +210,7 @@ def _generate_trending_traces(in_data, period, moving_win_size=10,
      - outliers, regress, progress
 
     :param in_data: Full data set.
+    :param build_info: Information about the builds.
     :param period: Sampling period.
     :param moving_win_size: Window size.
     :param fill_missing: If the chosen sample is missing in the full set, its
@@ -215,6 +220,7 @@ def _generate_trending_traces(in_data, period, moving_win_size=10,
     :param name: Name of the plot
     :param color: Name of the color for the plot.
     :type in_data: OrderedDict
+    :type build_info: dict
     :type period: int
     :type moving_win_size: int
     :type fill_missing: bool
@@ -233,10 +239,16 @@ def _generate_trending_traces(in_data, period, moving_win_size=10,
 
     data_x = [key for key in in_data.keys()]
     data_y = [val for val in in_data.values()]
-    data_pd = pd.Series(data_y, index=data_x)
 
-    t_data, outliers = find_outliers(data_pd)
+    hover_text = list()
+    for idx in data_x:
+        hover_text.append("vpp-build: {0}".
+                          format(build_info[str(idx)][1].split("~")[-1]))
+
+    data_pd = pd.Series(data_y, index=data_x)
 
+    t_data, outliers = split_outliers(data_pd, outlier_const=1.5,
+                                      window=moving_win_size)
     results = _evaluate_results(data_pd, t_data, window=moving_win_size)
 
     anomalies = pd.Series()
@@ -274,6 +286,8 @@ def _generate_trending_traces(in_data, period, moving_win_size=10,
             "color": color,
             "symbol": "circle",
         },
+        text=hover_text,
+        hoverinfo="x+y+text+name"
     )
     traces = [trace_samples, ]
 
@@ -282,9 +296,9 @@ def _generate_trending_traces(in_data, period, moving_win_size=10,
         y=anomalies.values,
         mode='markers',
         hoverinfo="none",
-        showlegend=False,
+        showlegend=True,
         legendgroup=name,
-        name="{name}: outliers".format(name=name),
+        name="{name}-anomalies".format(name=name),
         marker={
             "size": 15,
             "symbol": "circle-open",
@@ -371,19 +385,28 @@ def _generate_all_charts(spec, input_data):
             builds_lst.append(str(build["build"]))
 
     # Get "build ID": "date" dict:
-    build_dates = dict()
+    build_info = OrderedDict()
     for build in builds_lst:
         try:
-            build_dates[build] = \
-                input_data.metadata(job_name, build)["generated"][:14]
+            build_info[build] = (
+                input_data.metadata(job_name, build)["generated"][:14],
+                input_data.metadata(job_name, build)["version"]
+            )
         except KeyError:
-            pass
+            build_info[build] = ("", "")
+        logging.info("{}: {}, {}".format(build,
+                                         build_info[build][0],
+                                         build_info[build][1]))
 
     # Create the header:
     csv_table = list()
     header = "Build Number:," + ",".join(builds_lst) + '\n'
     csv_table.append(header)
-    header = "Build Date:," + ",".join(build_dates.values()) + '\n'
+    build_dates = [x[0] for x in build_info.values()]
+    header = "Build Date:," + ",".join(build_dates) + '\n'
+    csv_table.append(header)
+    vpp_versions = [x[1] for x in build_info.values()]
+    header = "VPP Version:," + ",".join(vpp_versions) + '\n'
     csv_table.append(header)
 
     results = list()
@@ -414,13 +437,14 @@ def _generate_all_charts(spec, input_data):
             tst_lst = list()
             for build in builds_lst:
                 item = tst_data.get(int(build), '')
-                tst_lst.append(str(item) if item else '')
+                tst_lst.append(str(item))
+                # tst_lst.append(str(item) if item else '')
             csv_table.append("{0},".format(tst_name) + ",".join(tst_lst) + '\n')
 
         for period in chart["periods"]:
             # Generate traces:
             traces = list()
-            win_size = 10 if period == 1 else 5 if period < 20 else 3
+            win_size = 14 if period == 1 else 5 if period < 20 else 3
             idx = 0
             for test_name, test_data in chart_data.items():
                 if not test_data:
@@ -430,6 +454,7 @@ def _generate_all_charts(spec, input_data):
                 test_name = test_name.split('.')[-1]
                 trace, result = _generate_trending_traces(
                     test_data,
+                    build_info=build_info,
                     period=period,
                     moving_win_size=win_size,
                     fill_missing=True,
@@ -472,7 +497,11 @@ def _generate_all_charts(spec, input_data):
                             row[idx] = str(round(float(item) / 1000000, 2))
                         except ValueError:
                             pass
-                txt_table.add_row(row)
+                try:
+                    txt_table.add_row(row)
+                except Exception as err:
+                    logging.warning("Error occurred while generating TXT table:"
+                                    "\n{0}".format(err))
             line_nr += 1
         txt_table.align["Build Number:"] = "l"
     with open("{0}.txt".format(file_name), "w") as txt_file: