X-Git-Url: https://gerrit.fd.io/r/gitweb?a=blobdiff_plain;ds=sidebyside;f=resources%2Ftools%2Fpresentation%2Fgenerator_CPTA.py;h=c996aca0bdb20ec141a81b42d914f91a61933f02;hb=8b7416d72f67a8ccd408d81a56d8ae1094305d18;hp=73d55affa2638145fa41b01b731058c9dc73cad3;hpb=6f5de201aadfbb31419c05dfae6495107a745899;p=csit.git

diff --git a/resources/tools/presentation/generator_CPTA.py b/resources/tools/presentation/generator_CPTA.py
index 73d55affa2..c996aca0bd 100644
--- a/resources/tools/presentation/generator_CPTA.py
+++ b/resources/tools/presentation/generator_CPTA.py
@@ -22,13 +22,13 @@ import prettytable
 import plotly.offline as ploff
 import plotly.graph_objs as plgo
 import plotly.exceptions as plerr
-import numpy as np
 import pandas as pd
 
 from collections import OrderedDict
 from datetime import datetime
 
-from utils import split_outliers, archive_input_data, execute_command, Worker
+from utils import split_outliers, archive_input_data, execute_command,\
+    classify_anomalies, Worker
 
 
 # Command to build the html format of the report
@@ -87,62 +87,7 @@ def generate_cpta(spec, data):
     return ret_code
 
 
-def _evaluate_results(trimmed_data, window=10):
-    """Evaluates if the sample value is regress, normal or progress compared to
-    previous data within the window.
-    We use the intervals defined as:
-    - regress: less than trimmed moving median - 3 * stdev
-    - normal: between trimmed moving median - 3 * stdev and median + 3 * stdev
-    - progress: more than trimmed moving median + 3 * stdev
-    where stdev is trimmed moving standard deviation.
-
-    :param trimmed_data: Full data set with the outliers replaced by nan.
-    :param window: Window size used to calculate moving average and moving stdev.
-    :type trimmed_data: pandas.Series
-    :type window: int
-    :returns: Evaluated results.
-    :rtype: list
-    """
-
-    if len(trimmed_data) > 2:
-        win_size = trimmed_data.size if trimmed_data.size < window else window
-        results = [0.66, ]
-        tmm = trimmed_data.rolling(window=win_size, min_periods=2).median()
-        tmstd = trimmed_data.rolling(window=win_size, min_periods=2).std()
-
-        first = True
-        for build_nr, value in trimmed_data.iteritems():
-            if first:
-                first = False
-                continue
-            if (np.isnan(value)
-                    or np.isnan(tmm[build_nr])
-                    or np.isnan(tmstd[build_nr])):
-                results.append(0.0)
-            elif value < (tmm[build_nr] - 3 * tmstd[build_nr]):
-                results.append(0.33)
-            elif value > (tmm[build_nr] + 3 * tmstd[build_nr]):
-                results.append(1.0)
-            else:
-                results.append(0.66)
-    else:
-        results = [0.0, ]
-        try:
-            tmm = np.median(trimmed_data)
-            tmstd = np.std(trimmed_data)
-            if trimmed_data.values[-1] < (tmm - 3 * tmstd):
-                results.append(0.33)
-            elif (tmm - 3 * tmstd) <= trimmed_data.values[-1] <= (
-                    tmm + 3 * tmstd):
-                results.append(0.66)
-            else:
-                results.append(1.0)
-        except TypeError:
-            results.append(None)
-    return results
-
-
-def _generate_trending_traces(in_data, build_info, moving_win_size=10,
+def _generate_trending_traces(in_data, job_name, build_info, moving_win_size=10,
                               show_trend_line=True, name="", color=""):
     """Generate the trending traces:
      - samples,
@@ -150,12 +95,14 @@ def _generate_trending_traces(in_data, build_info, moving_win_size=10,
      - outliers, regress, progress
 
     :param in_data: Full data set.
+    :param job_name: The name of job which generated the data.
     :param build_info: Information about the builds.
     :param moving_win_size: Window size.
     :param show_trend_line: Show moving median (trending plot).
     :param name: Name of the plot
     :param color: Name of the color for the plot.
     :type in_data: OrderedDict
+    :type job_name: str
     :type build_info: dict
     :type moving_win_size: int
     :type show_trend_line: bool
@@ -171,10 +118,15 @@ def _generate_trending_traces(in_data, build_info, moving_win_size=10,
     hover_text = list()
     xaxis = list()
     for idx in data_x:
-        hover_text.append("vpp-ref: {0}<br>csit-ref: mrr-daily-build-{1}".
-                          format(build_info[str(idx)][1].rsplit('~', 1)[0],
-                                 idx))
-        date = build_info[str(idx)][0]
+        if "dpdk" in job_name:
+            hover_text.append("dpdk-ref: {0}<br>csit-ref: mrr-weekly-build-{1}".
+                              format(build_info[job_name][str(idx)][1].
+                                     rsplit('~', 1)[0], idx))
+        elif "vpp" in job_name:
+            hover_text.append("vpp-ref: {0}<br>csit-ref: mrr-daily-build-{1}".
+                              format(build_info[job_name][str(idx)][1].
+                                     rsplit('~', 1)[0], idx))
+        date = build_info[job_name][str(idx)][0]
         xaxis.append(datetime(int(date[0:4]), int(date[4:6]), int(date[6:8]),
                               int(date[9:11]), int(date[12:])))
 
@@ -182,29 +134,27 @@ def _generate_trending_traces(in_data, build_info, moving_win_size=10,
 
     t_data, outliers = split_outliers(data_pd, outlier_const=1.5,
                                       window=moving_win_size)
-    results = _evaluate_results(t_data, window=moving_win_size)
+    anomaly_classification = classify_anomalies(t_data, window=moving_win_size)
 
     anomalies = pd.Series()
-    anomalies_res = list()
-    for idx, item in enumerate(data_pd.items()):
-        item_pd = pd.Series([item[1], ], index=[item[0], ])
-        if item[0] in outliers.keys():
-            anomalies = anomalies.append(item_pd)
-            anomalies_res.append(0.0)
-        elif results[idx] in (0.33, 1.0):
-            anomalies = anomalies.append(item_pd)
-            anomalies_res.append(results[idx])
-    anomalies_res.extend([0.0, 0.33, 0.66, 1.0])
+    anomalies_colors = list()
+    anomaly_color = {
+        "outlier": 0.0,
+        "regression": 0.33,
+        "normal": 0.66,
+        "progression": 1.0
+    }
+    if anomaly_classification:
+        for idx, item in enumerate(data_pd.items()):
+            if anomaly_classification[idx] in \
+                    ("outlier", "regression", "progression"):
+                anomalies = anomalies.append(pd.Series([item[1], ],
+                                                       index=[item[0], ]))
+                anomalies_colors.append(
+                    anomaly_color[anomaly_classification[idx]])
+        anomalies_colors.extend([0.0, 0.33, 0.66, 1.0])
 
     # Create traces
-    color_scale = [[0.00, "grey"],
-                   [0.25, "grey"],
-                   [0.25, "red"],
-                   [0.50, "red"],
-                   [0.50, "white"],
-                   [0.75, "white"],
-                   [0.75, "green"],
-                   [1.00, "green"]]
 
     trace_samples = plgo.Scatter(
         x=xaxis,
@@ -236,8 +186,15 @@ def _generate_trending_traces(in_data, build_info, moving_win_size=10,
         marker={
             "size": 15,
             "symbol": "circle-open",
-            "color": anomalies_res,
-            "colorscale": color_scale,
+            "color": anomalies_colors,
+            "colorscale": [[0.00, "grey"],
+                           [0.25, "grey"],
+                           [0.25, "red"],
+                           [0.50, "red"],
+                           [0.50, "white"],
+                           [0.75, "white"],
+                           [0.75, "green"],
+                           [1.00, "green"]],
             "showscale": True,
             "line": {
                 "width": 2
@@ -279,7 +236,10 @@ def _generate_trending_traces(in_data, build_info, moving_win_size=10,
         )
         traces.append(trace_trend)
 
-    return traces, results[-1]
+    if anomaly_classification:
+        return traces, anomaly_classification[-1]
+    else:
+        return traces, None
 
 
 def _generate_all_charts(spec, input_data):
@@ -302,7 +262,7 @@ def _generate_all_charts(spec, input_data):
         logs.append(("INFO", "  Generating the chart '{0}' ...".
                      format(graph.get("title", ""))))
 
-        job_name = spec.cpta["data"].keys()[0]
+        job_name = graph["data"].keys()[0]
 
         csv_tbl = list()
         res = list()
@@ -316,8 +276,10 @@ def _generate_all_charts(spec, input_data):
             return
 
         chart_data = dict()
-        for job in data:
-            for index, bld in job.items():
+        for job, job_data in data.iteritems():
+            if job != job_name:
+                continue
+            for index, bld in job_data.items():
                 for test_name, test in bld.items():
                     if chart_data.get(test_name, None) is None:
                         chart_data[test_name] = OrderedDict()
@@ -330,7 +292,7 @@ def _generate_all_charts(spec, input_data):
         # Add items to the csv table:
         for tst_name, tst_data in chart_data.items():
             tst_lst = list()
-            for bld in builds_lst:
+            for bld in builds_dict[job_name]:
                 itm = tst_data.get(int(bld), '')
                 tst_lst.append(str(itm))
             csv_tbl.append("{0},".format(tst_name) + ",".join(tst_lst) + '\n')
@@ -346,6 +308,7 @@ def _generate_all_charts(spec, input_data):
             test_name = test_name.split('.')[-1]
             trace, rslt = _generate_trending_traces(
                 test_data,
+                job_name=job_name,
                 build_info=build_info,
                 moving_win_size=win_size,
                 name='-'.join(test_name.split('-')[3:-1]),
@@ -371,33 +334,33 @@ def _generate_all_charts(spec, input_data):
             except plerr.PlotlyEmptyDataError:
                 logs.append(("WARNING", "No data for the plot. Skipped."))
 
-        logging.info("  Done.")
-
         data_out = {
+            "job_name": job_name,
             "csv_table": csv_tbl,
             "results": res,
             "logs": logs
         }
         data_q.put(data_out)
 
-    job_name = spec.cpta["data"].keys()[0]
-
-    builds_lst = list()
-    for build in spec.input["builds"][job_name]:
-        status = build["status"]
-        if status != "failed" and status != "not found":
-            builds_lst.append(str(build["build"]))
-
-    # Get "build ID": "date" dict:
-    build_info = OrderedDict()
-    for build in builds_lst:
-        try:
-            build_info[build] = (
-                input_data.metadata(job_name, build)["generated"][:14],
-                input_data.metadata(job_name, build)["version"]
+    builds_dict = dict()
+    for job in spec.input["builds"].keys():
+        if builds_dict.get(job, None) is None:
+            builds_dict[job] = list()
+        for build in spec.input["builds"][job]:
+            status = build["status"]
+            if status != "failed" and status != "not found":
+                builds_dict[job].append(str(build["build"]))
+
+    # Create "build ID": "date" dict:
+    build_info = dict()
+    for job_name, job_data in builds_dict.items():
+        if build_info.get(job_name, None) is None:
+            build_info[job_name] = OrderedDict()
+        for build in job_data:
+            build_info[job_name][build] = (
+                input_data.metadata(job_name, build).get("generated", ""),
+                input_data.metadata(job_name, build).get("version", "")
             )
-        except KeyError:
-            build_info[build] = ("", "")
 
     work_queue = multiprocessing.JoinableQueue()
     manager = multiprocessing.Manager()
@@ -419,24 +382,27 @@ def _generate_all_charts(spec, input_data):
         work_queue.put((chart, ))
     work_queue.join()
 
-    results = list()
+    anomaly_classifications = list()
 
     # Create the header:
-    csv_table = list()
-    header = "Build Number:," + ",".join(builds_lst) + '\n'
-    csv_table.append(header)
-    build_dates = [x[0] for x in build_info.values()]
-    header = "Build Date:," + ",".join(build_dates) + '\n'
-    csv_table.append(header)
-    vpp_versions = [x[1] for x in build_info.values()]
-    header = "VPP Version:," + ",".join(vpp_versions) + '\n'
-    csv_table.append(header)
+    csv_tables = dict()
+    for job_name in builds_dict.keys():
+        if csv_tables.get(job_name, None) is None:
+            csv_tables[job_name] = list()
+        header = "Build Number:," + ",".join(builds_dict[job_name]) + '\n'
+        csv_tables[job_name].append(header)
+        build_dates = [x[0] for x in build_info[job_name].values()]
+        header = "Build Date:," + ",".join(build_dates) + '\n'
+        csv_tables[job_name].append(header)
+        versions = [x[1] for x in build_info[job_name].values()]
+        header = "Version:," + ",".join(versions) + '\n'
+        csv_tables[job_name].append(header)
 
     while not data_queue.empty():
         result = data_queue.get()
 
-        results.extend(result["results"])
-        csv_table.extend(result["csv_table"])
+        anomaly_classifications.extend(result["results"])
+        csv_tables[result["job_name"]].extend(result["csv_table"])
 
         for item in result["logs"]:
             if item[0] == "INFO":
@@ -458,46 +424,46 @@ def _generate_all_charts(spec, input_data):
         worker.join()
 
     # Write the tables:
-    file_name = spec.cpta["output-file"] + "-trending"
-    with open("{0}.csv".format(file_name), 'w') as file_handler:
-        file_handler.writelines(csv_table)
-
-    txt_table = None
-    with open("{0}.csv".format(file_name), 'rb') as csv_file:
-        csv_content = csv.reader(csv_file, delimiter=',', quotechar='"')
-        line_nr = 0
-        for row in csv_content:
-            if txt_table is None:
-                txt_table = prettytable.PrettyTable(row)
-            else:
-                if line_nr > 1:
-                    for idx, item in enumerate(row):
-                        try:
-                            row[idx] = str(round(float(item) / 1000000, 2))
-                        except ValueError:
-                            pass
-                try:
-                    txt_table.add_row(row)
-                except Exception as err:
-                    logging.warning("Error occurred while generating TXT table:"
-                                    "\n{0}".format(err))
-            line_nr += 1
-        txt_table.align["Build Number:"] = "l"
-    with open("{0}.txt".format(file_name), "w") as txt_file:
-        txt_file.write(str(txt_table))
+    for job_name, csv_table in csv_tables.items():
+        file_name = spec.cpta["output-file"] + "-" + job_name + "-trending"
+        with open("{0}.csv".format(file_name), 'w') as file_handler:
+            file_handler.writelines(csv_table)
+
+        txt_table = None
+        with open("{0}.csv".format(file_name), 'rb') as csv_file:
+            csv_content = csv.reader(csv_file, delimiter=',', quotechar='"')
+            line_nr = 0
+            for row in csv_content:
+                if txt_table is None:
+                    txt_table = prettytable.PrettyTable(row)
+                else:
+                    if line_nr > 1:
+                        for idx, item in enumerate(row):
+                            try:
+                                row[idx] = str(round(float(item) / 1000000, 2))
+                            except ValueError:
+                                pass
+                    try:
+                        txt_table.add_row(row)
+                    except Exception as err:
+                        logging.warning("Error occurred while generating TXT "
+                                        "table:\n{0}".format(err))
+                line_nr += 1
+            txt_table.align["Build Number:"] = "l"
+        with open("{0}.txt".format(file_name), "w") as txt_file:
+            txt_file.write(str(txt_table))
 
     # Evaluate result:
-    result = "PASS"
-    for item in results:
-        if item is None:
-            result = "FAIL"
-            break
-        if item == 0.66 and result == "PASS":
-            result = "PASS"
-        elif item == 0.33 or item == 0.0:
-            result = "FAIL"
-
-    logging.info("Partial results: {0}".format(results))
+    if anomaly_classifications:
+        result = "PASS"
+        for classification in anomaly_classifications:
+            if classification == "regression" or classification == "outlier":
+                result = "FAIL"
+                break
+    else:
+        result = "FAIL"
+
+    logging.info("Partial results: {0}".format(anomaly_classifications))
     logging.info("Result: {0}".format(result))
 
     return result