X-Git-Url: https://gerrit.fd.io/r/gitweb?p=csit.git;a=blobdiff_plain;f=resources%2Ftools%2Fpresentation%2Fgenerator_CPTA.py;h=cfd4c5817a6ccddc0d7aa700208d4592595fc074;hp=0c317c05ce4987a138f08c2c76f6f5b2b4d4e61b;hb=a76fb6bd39a0a6ec6183bfb6da9c3ebefb248f33;hpb=5ae1aea05849a69a97f559756e80b9794482f302 diff --git a/resources/tools/presentation/generator_CPTA.py b/resources/tools/presentation/generator_CPTA.py index 0c317c05ce..cfd4c5817a 100644 --- a/resources/tools/presentation/generator_CPTA.py +++ b/resources/tools/presentation/generator_CPTA.py @@ -14,7 +14,6 @@ """Generation of Continuous Performance Trending and Analysis. """ -import datetime import logging import csv import prettytable @@ -25,14 +24,16 @@ import numpy as np import pandas as pd from collections import OrderedDict -from utils import find_outliers, archive_input_data, execute_command +from datetime import datetime, timedelta + +from utils import split_outliers, archive_input_data, execute_command # Command to build the html format of the report HTML_BUILDER = 'sphinx-build -v -c conf_cpta -a ' \ '-b html -E ' \ '-t html ' \ - '-D version="Generated on {date}" ' \ + '-D version="{date}" ' \ '{working_dir} ' \ '{build_dir}/' @@ -64,7 +65,7 @@ def generate_cpta(spec, data): ret_code = _generate_all_charts(spec, data) cmd = HTML_BUILDER.format( - date=datetime.date.today().strftime('%d-%b-%Y'), + date=datetime.utcnow().strftime('%m/%d/%Y %H:%M UTC'), working_dir=spec.environment["paths"]["DIR[WORKING,SRC]"], build_dir=spec.environment["paths"]["DIR[BUILD,HTML]"]) execute_command(cmd) @@ -144,51 +145,53 @@ def _select_data(in_data, period, fill_missing=False, use_first=False): return OrderedDict(sorted(data_dict.items(), key=lambda t: t[0])) -def _evaluate_results(in_data, trimmed_data, window=10): +def _evaluate_results(trimmed_data, window=10): """Evaluates if the sample value is regress, normal or progress compared to previous data within the window. We use the intervals defined as: - - regress: less than median - 3 * stdev - - normal: between median - 3 * stdev and median + 3 * stdev - - progress: more than median + 3 * stdev + - regress: less than trimmed moving median - 3 * stdev + - normal: between trimmed moving median - 3 * stdev and median + 3 * stdev + - progress: more than trimmed moving median + 3 * stdev + where stdev is trimmed moving standard deviation. - :param in_data: Full data set. - :param trimmed_data: Full data set without the outliers. - :param window: Window size used to calculate moving median and moving stdev. - :type in_data: pandas.Series + :param trimmed_data: Full data set with the outliers replaced by nan. + :param window: Window size used to calculate moving average and moving stdev. :type trimmed_data: pandas.Series :type window: int :returns: Evaluated results. :rtype: list """ - if len(in_data) > 2: - win_size = in_data.size if in_data.size < window else window - results = [0.0, ] * win_size - median = in_data.rolling(window=win_size).median() - stdev_t = trimmed_data.rolling(window=win_size, min_periods=2).std() - m_vals = median.values - s_vals = stdev_t.values - d_vals = in_data.values - for day in range(win_size, in_data.size): - if np.isnan(m_vals[day - 1]) or np.isnan(s_vals[day - 1]): + if len(trimmed_data) > 2: + win_size = trimmed_data.size if trimmed_data.size < window else window + results = [0.66, ] + tmm = trimmed_data.rolling(window=win_size, min_periods=2).median() + tmstd = trimmed_data.rolling(window=win_size, min_periods=2).std() + + first = True + for build_nr, value in trimmed_data.iteritems(): + if first: + first = False + continue + if (np.isnan(value) + or np.isnan(tmm[build_nr]) + or np.isnan(tmstd[build_nr])): results.append(0.0) - elif d_vals[day] < (m_vals[day - 1] - 3 * s_vals[day - 1]): + elif value < (tmm[build_nr] - 3 * tmstd[build_nr]): results.append(0.33) - elif (m_vals[day - 1] - 3 * s_vals[day - 1]) <= d_vals[day] <= \ - (m_vals[day - 1] + 3 * s_vals[day - 1]): - results.append(0.66) - else: + elif value > (tmm[build_nr] + 3 * tmstd[build_nr]): results.append(1.0) + else: + results.append(0.66) else: results = [0.0, ] try: - median = np.median(in_data) - stdev = np.std(in_data) - if in_data.values[-1] < (median - 3 * stdev): + tmm = np.median(trimmed_data) + tmstd = np.std(trimmed_data) + if trimmed_data.values[-1] < (tmm - 3 * tmstd): results.append(0.33) - elif (median - 3 * stdev) <= in_data.values[-1] <= ( - median + 3 * stdev): + elif (tmm - 3 * tmstd) <= trimmed_data.values[-1] <= ( + tmm + 3 * tmstd): results.append(0.66) else: results.append(1.0) @@ -197,32 +200,35 @@ def _evaluate_results(in_data, trimmed_data, window=10): return results -def _generate_trending_traces(in_data, period, moving_win_size=10, +def _generate_trending_traces(in_data, build_info, period, moving_win_size=10, fill_missing=True, use_first=False, - show_moving_median=True, name="", color=""): + show_trend_line=True, name="", color=""): """Generate the trending traces: - samples, - - moving median (trending plot) + - trimmed moving median (trending line) - outliers, regress, progress :param in_data: Full data set. + :param build_info: Information about the builds. :param period: Sampling period. :param moving_win_size: Window size. :param fill_missing: If the chosen sample is missing in the full set, its - nearest neighbour is used. + nearest neighbour is used. :param use_first: Use the first sample even though it is not chosen. - :param show_moving_median: Show moving median (trending plot). + :param show_trend_line: Show moving median (trending plot). :param name: Name of the plot :param color: Name of the color for the plot. :type in_data: OrderedDict + :type build_info: dict :type period: int :type moving_win_size: int :type fill_missing: bool :type use_first: bool - :type show_moving_median: bool + :type show_trend_line: bool :type name: str :type color: str - :returns: Generated traces (list) and the evaluated result (float). + :returns: Generated traces (list), the evaluated result (float) and the + first and last date. :rtype: tuple(traces, result) """ @@ -231,17 +237,28 @@ def _generate_trending_traces(in_data, period, moving_win_size=10, fill_missing=fill_missing, use_first=use_first) - data_x = [key for key in in_data.keys()] - data_y = [val for val in in_data.values()] - data_pd = pd.Series(data_y, index=data_x) + data_x = list(in_data.keys()) + data_y = list(in_data.values()) - t_data, outliers = find_outliers(data_pd) + hover_text = list() + xaxis = list() + for idx in data_x: + hover_text.append("vpp-ref: {0}
csit-ref: mrr-daily-build-{1}". + format(build_info[str(idx)][1].rsplit('~', 1)[0], + idx)) + date = build_info[str(idx)][0] + xaxis.append(datetime(int(date[0:4]), int(date[4:6]), int(date[6:8]), + int(date[9:11]), int(date[12:]))) - results = _evaluate_results(data_pd, t_data, window=moving_win_size) + data_pd = pd.Series(data_y, index=xaxis) + + t_data, outliers = split_outliers(data_pd, outlier_const=1.5, + window=moving_win_size) + results = _evaluate_results(t_data, window=moving_win_size) anomalies = pd.Series() anomalies_res = list() - for idx, item in enumerate(in_data.items()): + for idx, item in enumerate(data_pd.items()): item_pd = pd.Series([item[1], ], index=[item[0], ]) if item[0] in outliers.keys(): anomalies = anomalies.append(item_pd) @@ -262,7 +279,7 @@ def _generate_trending_traces(in_data, period, moving_win_size=10, [1.00, "green"]] trace_samples = plgo.Scatter( - x=data_x, + x=xaxis, y=data_y, mode='markers', line={ @@ -274,6 +291,8 @@ def _generate_trending_traces(in_data, period, moving_win_size=10, "color": color, "symbol": "circle", }, + text=hover_text, + hoverinfo="x+y+text+name" ) traces = [trace_samples, ] @@ -282,28 +301,30 @@ def _generate_trending_traces(in_data, period, moving_win_size=10, y=anomalies.values, mode='markers', hoverinfo="none", - showlegend=False, + showlegend=True, legendgroup=name, - name="{name}: outliers".format(name=name), + name="{name}-anomalies".format(name=name), marker={ "size": 15, "symbol": "circle-open", "color": anomalies_res, "colorscale": color_scale, "showscale": True, - + "line": { + "width": 2 + }, "colorbar": { "y": 0.5, "len": 0.8, - "title": "Results Clasification", + "title": "Circles Marking Data Classification", "titleside": 'right', "titlefont": { "size": 14 }, "tickmode": 'array', "tickvals": [0.125, 0.375, 0.625, 0.875], - "ticktext": ["Outlier", "Regress", "Normal", "Progress"], - "ticks": 'outside', + "ticktext": ["Outlier", "Regression", "Normal", "Progression"], + "ticks": "", "ticklen": 0, "tickangle": -90, "thickness": 10 @@ -312,13 +333,12 @@ def _generate_trending_traces(in_data, period, moving_win_size=10, ) traces.append(trace_anomalies) - if show_moving_median: - min_periods = moving_win_size / 2 + 1 - data_mean_y = pd.Series(data_y).rolling( - window=moving_win_size, min_periods=min_periods).median() - trace_median = plgo.Scatter( - x=data_x, - y=data_mean_y, + if show_trend_line: + data_trend = t_data.rolling(window=moving_win_size, + min_periods=2).median() + trace_trend = plgo.Scatter( + x=data_trend.keys(), + y=data_trend.tolist(), mode='lines', line={ "shape": "spline", @@ -327,9 +347,9 @@ def _generate_trending_traces(in_data, period, moving_win_size=10, }, name='{name}-trend'.format(name=name) ) - traces.append(trace_median) + traces.append(trace_trend) - return traces, results[-1] + return traces, results[-1], xaxis[0], xaxis[-1] def _generate_chart(traces, layout, file_name): @@ -361,12 +381,38 @@ def _generate_all_charts(spec, input_data): :type input_data: InputData """ - csv_table = list() + job_name = spec.cpta["data"].keys()[0] + + builds_lst = list() + for build in spec.input["builds"][job_name]: + status = build["status"] + if status != "failed" and status != "not found": + builds_lst.append(str(build["build"])) + + # Get "build ID": "date" dict: + build_info = OrderedDict() + for build in builds_lst: + try: + build_info[build] = ( + input_data.metadata(job_name, build)["generated"][:14], + input_data.metadata(job_name, build)["version"] + ) + except KeyError: + build_info[build] = ("", "") + logging.info("{}: {}, {}".format(build, + build_info[build][0], + build_info[build][1])) + # Create the header: - builds = spec.cpta["data"].values()[0] - builds_lst = [str(build) for build in range(builds[0], builds[-1] + 1)] + csv_table = list() header = "Build Number:," + ",".join(builds_lst) + '\n' csv_table.append(header) + build_dates = [x[0] for x in build_info.values()] + header = "Build Date:," + ",".join(build_dates) + '\n' + csv_table.append(header) + vpp_versions = [x[1] for x in build_info.values()] + header = "VPP Version:," + ",".join(vpp_versions) + '\n' + csv_table.append(header) results = list() for chart in spec.cpta["plots"]: @@ -396,13 +442,13 @@ def _generate_all_charts(spec, input_data): tst_lst = list() for build in builds_lst: item = tst_data.get(int(build), '') - tst_lst.append(str(item) if item else '') + tst_lst.append(str(item)) csv_table.append("{0},".format(tst_name) + ",".join(tst_lst) + '\n') for period in chart["periods"]: # Generate traces: traces = list() - win_size = 10 if period == 1 else 5 if period < 20 else 3 + win_size = 14 idx = 0 for test_name, test_data in chart_data.items(): if not test_data: @@ -410,29 +456,36 @@ def _generate_all_charts(spec, input_data): format(test_name)) continue test_name = test_name.split('.')[-1] - trace, result = _generate_trending_traces( - test_data, - period=period, - moving_win_size=win_size, - fill_missing=True, - use_first=False, - name='-'.join(test_name.split('-')[3:-1]), - color=COLORS[idx]) + trace, result, first_date, last_date = \ + _generate_trending_traces( + test_data, + build_info=build_info, + period=period, + moving_win_size=win_size, + fill_missing=True, + use_first=False, + name='-'.join(test_name.split('-')[3:-1]), + color=COLORS[idx]) traces.extend(trace) results.append(result) idx += 1 - # Generate the chart: - period_name = "Daily" if period == 1 else \ - "Weekly" if period < 20 else "Monthly" - chart["layout"]["title"] = chart["title"].format(period=period_name) - _generate_chart(traces, - chart["layout"], - file_name="{0}-{1}-{2}{3}".format( - spec.cpta["output-file"], - chart["output-file-name"], - period, - spec.cpta["output-file-type"])) + if traces: + # Generate the chart: + chart["layout"]["xaxis"]["title"] = \ + chart["layout"]["xaxis"]["title"].format(job=job_name) + delta = timedelta(days=30) + start = last_date - delta + start = first_date if start < first_date else start + chart["layout"]["xaxis"]["range"] = [str(start.date()), + str(last_date.date())] + _generate_chart(traces, + chart["layout"], + file_name="{0}-{1}-{2}{3}".format( + spec.cpta["output-file"], + chart["output-file-name"], + period, + spec.cpta["output-file-type"])) logging.info(" Done.") @@ -444,11 +497,23 @@ def _generate_all_charts(spec, input_data): txt_table = None with open("{0}.csv".format(file_name), 'rb') as csv_file: csv_content = csv.reader(csv_file, delimiter=',', quotechar='"') + line_nr = 0 for row in csv_content: if txt_table is None: txt_table = prettytable.PrettyTable(row) else: - txt_table.add_row(row) + if line_nr > 1: + for idx, item in enumerate(row): + try: + row[idx] = str(round(float(item) / 1000000, 2)) + except ValueError: + pass + try: + txt_table.add_row(row) + except Exception as err: + logging.warning("Error occurred while generating TXT table:" + "\n{0}".format(err)) + line_nr += 1 txt_table.align["Build Number:"] = "l" with open("{0}.txt".format(file_name), "w") as txt_file: txt_file.write(str(txt_table))