X-Git-Url: https://gerrit.fd.io/r/gitweb?p=csit.git;a=blobdiff_plain;f=resources%2Ftools%2Fpresentation%2Fgenerator_CPTA.py;h=cfd4c5817a6ccddc0d7aa700208d4592595fc074;hp=0c317c05ce4987a138f08c2c76f6f5b2b4d4e61b;hb=a76fb6bd39a0a6ec6183bfb6da9c3ebefb248f33;hpb=5ae1aea05849a69a97f559756e80b9794482f302

diff --git a/resources/tools/presentation/generator_CPTA.py b/resources/tools/presentation/generator_CPTA.py
index 0c317c05ce..cfd4c5817a 100644
--- a/resources/tools/presentation/generator_CPTA.py
+++ b/resources/tools/presentation/generator_CPTA.py
@@ -14,7 +14,6 @@
 """Generation of Continuous Performance Trending and Analysis.
 """
 
-import datetime
 import logging
 import csv
 import prettytable
@@ -25,14 +24,16 @@ import numpy as np
 import pandas as pd
 
 from collections import OrderedDict
-from utils import find_outliers, archive_input_data, execute_command
+from datetime import datetime, timedelta
+
+from utils import split_outliers, archive_input_data, execute_command
 
 
 # Command to build the html format of the report
 HTML_BUILDER = 'sphinx-build -v -c conf_cpta -a ' \
                '-b html -E ' \
                '-t html ' \
-               '-D version="Generated on {date}" ' \
+               '-D version="{date}" ' \
                '{working_dir} ' \
                '{build_dir}/'
 
@@ -64,7 +65,7 @@ def generate_cpta(spec, data):
     ret_code = _generate_all_charts(spec, data)
 
     cmd = HTML_BUILDER.format(
-        date=datetime.date.today().strftime('%d-%b-%Y'),
+        date=datetime.utcnow().strftime('%m/%d/%Y %H:%M UTC'),
         working_dir=spec.environment["paths"]["DIR[WORKING,SRC]"],
         build_dir=spec.environment["paths"]["DIR[BUILD,HTML]"])
     execute_command(cmd)
@@ -144,51 +145,53 @@ def _select_data(in_data, period, fill_missing=False, use_first=False):
     return OrderedDict(sorted(data_dict.items(), key=lambda t: t[0]))
 
 
-def _evaluate_results(in_data, trimmed_data, window=10):
+def _evaluate_results(trimmed_data, window=10):
     """Evaluates if the sample value is regress, normal or progress compared to
     previous data within the window.
     We use the intervals defined as:
-    - regress: less than median - 3 * stdev
-    - normal: between median - 3 * stdev and median + 3 * stdev
-    - progress: more than median + 3 * stdev
+    - regress: less than trimmed moving median - 3 * stdev
+    - normal: between trimmed moving median - 3 * stdev and median + 3 * stdev
+    - progress: more than trimmed moving median + 3 * stdev
+    where stdev is trimmed moving standard deviation.
 
-    :param in_data: Full data set.
-    :param trimmed_data: Full data set without the outliers.
-    :param window: Window size used to calculate moving median and moving stdev.
-    :type in_data: pandas.Series
+    :param trimmed_data: Full data set with the outliers replaced by nan.
+    :param window: Window size used to calculate moving average and moving stdev.
     :type trimmed_data: pandas.Series
     :type window: int
     :returns: Evaluated results.
     :rtype: list
     """
 
-    if len(in_data) > 2:
-        win_size = in_data.size if in_data.size < window else window
-        results = [0.0, ] * win_size
-        median = in_data.rolling(window=win_size).median()
-        stdev_t = trimmed_data.rolling(window=win_size, min_periods=2).std()
-        m_vals = median.values
-        s_vals = stdev_t.values
-        d_vals = in_data.values
-        for day in range(win_size, in_data.size):
-            if np.isnan(m_vals[day - 1]) or np.isnan(s_vals[day - 1]):
+    if len(trimmed_data) > 2:
+        win_size = trimmed_data.size if trimmed_data.size < window else window
+        results = [0.66, ]
+        tmm = trimmed_data.rolling(window=win_size, min_periods=2).median()
+        tmstd = trimmed_data.rolling(window=win_size, min_periods=2).std()
+
+        first = True
+        for build_nr, value in trimmed_data.iteritems():
+            if first:
+                first = False
+                continue
+            if (np.isnan(value)
+                    or np.isnan(tmm[build_nr])
+                    or np.isnan(tmstd[build_nr])):
                 results.append(0.0)
-            elif d_vals[day] < (m_vals[day - 1] - 3 * s_vals[day - 1]):
+            elif value < (tmm[build_nr] - 3 * tmstd[build_nr]):
                 results.append(0.33)
-            elif (m_vals[day - 1] - 3 * s_vals[day - 1]) <= d_vals[day] <= \
-                    (m_vals[day - 1] + 3 * s_vals[day - 1]):
-                results.append(0.66)
-            else:
+            elif value > (tmm[build_nr] + 3 * tmstd[build_nr]):
                 results.append(1.0)
+            else:
+                results.append(0.66)
     else:
         results = [0.0, ]
         try:
-            median = np.median(in_data)
-            stdev = np.std(in_data)
-            if in_data.values[-1] < (median - 3 * stdev):
+            tmm = np.median(trimmed_data)
+            tmstd = np.std(trimmed_data)
+            if trimmed_data.values[-1] < (tmm - 3 * tmstd):
                 results.append(0.33)
-            elif (median - 3 * stdev) <= in_data.values[-1] <= (
-                    median + 3 * stdev):
+            elif (tmm - 3 * tmstd) <= trimmed_data.values[-1] <= (
+                    tmm + 3 * tmstd):
                 results.append(0.66)
             else:
                 results.append(1.0)
@@ -197,32 +200,35 @@ def _evaluate_results(in_data, trimmed_data, window=10):
     return results
 
 
-def _generate_trending_traces(in_data, period, moving_win_size=10,
+def _generate_trending_traces(in_data, build_info, period, moving_win_size=10,
                               fill_missing=True, use_first=False,
-                              show_moving_median=True, name="", color=""):
+                              show_trend_line=True, name="", color=""):
     """Generate the trending traces:
      - samples,
-     - moving median (trending plot)
+     - trimmed moving median (trending line)
      - outliers, regress, progress
 
     :param in_data: Full data set.
+    :param build_info: Information about the builds.
     :param period: Sampling period.
     :param moving_win_size: Window size.
     :param fill_missing: If the chosen sample is missing in the full set, its
-    nearest neighbour is used.
+        nearest neighbour is used.
     :param use_first: Use the first sample even though it is not chosen.
-    :param show_moving_median: Show moving median (trending plot).
+    :param show_trend_line: Show moving median (trending plot).
     :param name: Name of the plot
     :param color: Name of the color for the plot.
     :type in_data: OrderedDict
+    :type build_info: dict
     :type period: int
     :type moving_win_size: int
     :type fill_missing: bool
     :type use_first: bool
-    :type show_moving_median: bool
+    :type show_trend_line: bool
     :type name: str
     :type color: str
-    :returns: Generated traces (list) and the evaluated result (float).
+    :returns: Generated traces (list), the evaluated result (float) and the
+        first and last date.
     :rtype: tuple(traces, result)
     """
 
@@ -231,17 +237,28 @@ def _generate_trending_traces(in_data, period, moving_win_size=10,
                                fill_missing=fill_missing,
                                use_first=use_first)
 
-    data_x = [key for key in in_data.keys()]
-    data_y = [val for val in in_data.values()]
-    data_pd = pd.Series(data_y, index=data_x)
+    data_x = list(in_data.keys())
+    data_y = list(in_data.values())
 
-    t_data, outliers = find_outliers(data_pd)
+    hover_text = list()
+    xaxis = list()
+    for idx in data_x:
+        hover_text.append("vpp-ref: {0}<br>csit-ref: mrr-daily-build-{1}".
+                          format(build_info[str(idx)][1].rsplit('~', 1)[0],
+                                 idx))
+        date = build_info[str(idx)][0]
+        xaxis.append(datetime(int(date[0:4]), int(date[4:6]), int(date[6:8]),
+                              int(date[9:11]), int(date[12:])))
 
-    results = _evaluate_results(data_pd, t_data, window=moving_win_size)
+    data_pd = pd.Series(data_y, index=xaxis)
+
+    t_data, outliers = split_outliers(data_pd, outlier_const=1.5,
+                                      window=moving_win_size)
+    results = _evaluate_results(t_data, window=moving_win_size)
 
     anomalies = pd.Series()
     anomalies_res = list()
-    for idx, item in enumerate(in_data.items()):
+    for idx, item in enumerate(data_pd.items()):
         item_pd = pd.Series([item[1], ], index=[item[0], ])
         if item[0] in outliers.keys():
             anomalies = anomalies.append(item_pd)
@@ -262,7 +279,7 @@ def _generate_trending_traces(in_data, period, moving_win_size=10,
                    [1.00, "green"]]
 
     trace_samples = plgo.Scatter(
-        x=data_x,
+        x=xaxis,
         y=data_y,
         mode='markers',
         line={
@@ -274,6 +291,8 @@ def _generate_trending_traces(in_data, period, moving_win_size=10,
             "color": color,
             "symbol": "circle",
         },
+        text=hover_text,
+        hoverinfo="x+y+text+name"
     )
     traces = [trace_samples, ]
 
@@ -282,28 +301,30 @@ def _generate_trending_traces(in_data, period, moving_win_size=10,
         y=anomalies.values,
         mode='markers',
         hoverinfo="none",
-        showlegend=False,
+        showlegend=True,
         legendgroup=name,
-        name="{name}: outliers".format(name=name),
+        name="{name}-anomalies".format(name=name),
         marker={
             "size": 15,
             "symbol": "circle-open",
             "color": anomalies_res,
             "colorscale": color_scale,
             "showscale": True,
-
+            "line": {
+                "width": 2
+            },
             "colorbar": {
                 "y": 0.5,
                 "len": 0.8,
-                "title": "Results Clasification",
+                "title": "Circles Marking Data Classification",
                 "titleside": 'right',
                 "titlefont": {
                     "size": 14
                 },
                 "tickmode": 'array',
                 "tickvals": [0.125, 0.375, 0.625, 0.875],
-                "ticktext": ["Outlier", "Regress", "Normal", "Progress"],
-                "ticks": 'outside',
+                "ticktext": ["Outlier", "Regression", "Normal", "Progression"],
+                "ticks": "",
                 "ticklen": 0,
                 "tickangle": -90,
                 "thickness": 10
@@ -312,13 +333,12 @@ def _generate_trending_traces(in_data, period, moving_win_size=10,
     )
     traces.append(trace_anomalies)
 
-    if show_moving_median:
-        min_periods = moving_win_size / 2 + 1
-        data_mean_y = pd.Series(data_y).rolling(
-            window=moving_win_size, min_periods=min_periods).median()
-        trace_median = plgo.Scatter(
-            x=data_x,
-            y=data_mean_y,
+    if show_trend_line:
+        data_trend = t_data.rolling(window=moving_win_size,
+                                    min_periods=2).median()
+        trace_trend = plgo.Scatter(
+            x=data_trend.keys(),
+            y=data_trend.tolist(),
             mode='lines',
             line={
                 "shape": "spline",
@@ -327,9 +347,9 @@ def _generate_trending_traces(in_data, period, moving_win_size=10,
             },
             name='{name}-trend'.format(name=name)
         )
-        traces.append(trace_median)
+        traces.append(trace_trend)
 
-    return traces, results[-1]
+    return traces, results[-1], xaxis[0], xaxis[-1]
 
 
 def _generate_chart(traces, layout, file_name):
@@ -361,12 +381,38 @@ def _generate_all_charts(spec, input_data):
     :type input_data: InputData
     """
 
-    csv_table = list()
+    job_name = spec.cpta["data"].keys()[0]
+
+    builds_lst = list()
+    for build in spec.input["builds"][job_name]:
+        status = build["status"]
+        if status != "failed" and status != "not found":
+            builds_lst.append(str(build["build"]))
+
+    # Get "build ID": "date" dict:
+    build_info = OrderedDict()
+    for build in builds_lst:
+        try:
+            build_info[build] = (
+                input_data.metadata(job_name, build)["generated"][:14],
+                input_data.metadata(job_name, build)["version"]
+            )
+        except KeyError:
+            build_info[build] = ("", "")
+        logging.info("{}: {}, {}".format(build,
+                                         build_info[build][0],
+                                         build_info[build][1]))
+
     # Create the header:
-    builds = spec.cpta["data"].values()[0]
-    builds_lst = [str(build) for build in range(builds[0], builds[-1] + 1)]
+    csv_table = list()
     header = "Build Number:," + ",".join(builds_lst) + '\n'
     csv_table.append(header)
+    build_dates = [x[0] for x in build_info.values()]
+    header = "Build Date:," + ",".join(build_dates) + '\n'
+    csv_table.append(header)
+    vpp_versions = [x[1] for x in build_info.values()]
+    header = "VPP Version:," + ",".join(vpp_versions) + '\n'
+    csv_table.append(header)
 
     results = list()
     for chart in spec.cpta["plots"]:
@@ -396,13 +442,13 @@ def _generate_all_charts(spec, input_data):
             tst_lst = list()
             for build in builds_lst:
                 item = tst_data.get(int(build), '')
-                tst_lst.append(str(item) if item else '')
+                tst_lst.append(str(item))
             csv_table.append("{0},".format(tst_name) + ",".join(tst_lst) + '\n')
 
         for period in chart["periods"]:
             # Generate traces:
             traces = list()
-            win_size = 10 if period == 1 else 5 if period < 20 else 3
+            win_size = 14
             idx = 0
             for test_name, test_data in chart_data.items():
                 if not test_data:
@@ -410,29 +456,36 @@ def _generate_all_charts(spec, input_data):
                                     format(test_name))
                     continue
                 test_name = test_name.split('.')[-1]
-                trace, result = _generate_trending_traces(
-                    test_data,
-                    period=period,
-                    moving_win_size=win_size,
-                    fill_missing=True,
-                    use_first=False,
-                    name='-'.join(test_name.split('-')[3:-1]),
-                    color=COLORS[idx])
+                trace, result, first_date, last_date = \
+                    _generate_trending_traces(
+                        test_data,
+                        build_info=build_info,
+                        period=period,
+                        moving_win_size=win_size,
+                        fill_missing=True,
+                        use_first=False,
+                        name='-'.join(test_name.split('-')[3:-1]),
+                        color=COLORS[idx])
                 traces.extend(trace)
                 results.append(result)
                 idx += 1
 
-            # Generate the chart:
-            period_name = "Daily" if period == 1 else \
-                "Weekly" if period < 20 else "Monthly"
-            chart["layout"]["title"] = chart["title"].format(period=period_name)
-            _generate_chart(traces,
-                            chart["layout"],
-                            file_name="{0}-{1}-{2}{3}".format(
-                                spec.cpta["output-file"],
-                                chart["output-file-name"],
-                                period,
-                                spec.cpta["output-file-type"]))
+            if traces:
+                # Generate the chart:
+                chart["layout"]["xaxis"]["title"] = \
+                    chart["layout"]["xaxis"]["title"].format(job=job_name)
+                delta = timedelta(days=30)
+                start = last_date - delta
+                start = first_date if start < first_date else start
+                chart["layout"]["xaxis"]["range"] = [str(start.date()),
+                                                     str(last_date.date())]
+                _generate_chart(traces,
+                                chart["layout"],
+                                file_name="{0}-{1}-{2}{3}".format(
+                                    spec.cpta["output-file"],
+                                    chart["output-file-name"],
+                                    period,
+                                    spec.cpta["output-file-type"]))
 
         logging.info("  Done.")
 
@@ -444,11 +497,23 @@ def _generate_all_charts(spec, input_data):
     txt_table = None
     with open("{0}.csv".format(file_name), 'rb') as csv_file:
         csv_content = csv.reader(csv_file, delimiter=',', quotechar='"')
+        line_nr = 0
         for row in csv_content:
             if txt_table is None:
                 txt_table = prettytable.PrettyTable(row)
             else:
-                txt_table.add_row(row)
+                if line_nr > 1:
+                    for idx, item in enumerate(row):
+                        try:
+                            row[idx] = str(round(float(item) / 1000000, 2))
+                        except ValueError:
+                            pass
+                try:
+                    txt_table.add_row(row)
+                except Exception as err:
+                    logging.warning("Error occurred while generating TXT table:"
+                                    "\n{0}".format(err))
+            line_nr += 1
         txt_table.align["Build Number:"] = "l"
     with open("{0}.txt".format(file_name), "w") as txt_file:
         txt_file.write(str(txt_table))