X-Git-Url: https://gerrit.fd.io/r/gitweb?p=csit.git;a=blobdiff_plain;f=resources%2Ftools%2Fpresentation%2Fgenerator_tables.py;h=43117cc4ed40d19f31a21ab06659fa1d5d8f4a0a;hp=6aa57db796dfa57743871ca036c359df2eb32aec;hb=2e63ef13b419da1198439617e66cb0f1cfe6be65;hpb=39b4a07718ecab94ea331362edb62dfcf678bd09 diff --git a/resources/tools/presentation/generator_tables.py b/resources/tools/presentation/generator_tables.py index 6aa57db796..43117cc4ed 100644 --- a/resources/tools/presentation/generator_tables.py +++ b/resources/tools/presentation/generator_tables.py @@ -25,8 +25,8 @@ from numpy import nan, isnan from xml.etree import ElementTree as ET from errors import PresentationError -from utils import mean, stdev, relative_change, remove_outliers,\ - split_outliers, classify_anomalies, convert_csv_to_pretty_txt +from utils import mean, stdev, relative_change, classify_anomalies, \ + convert_csv_to_pretty_txt def generate_tables(spec, data): @@ -444,37 +444,22 @@ def table_performance_comparison(table, input_data): if tbl_dict[tst_name].get("history", None) is not None: for hist_data in tbl_dict[tst_name]["history"].values(): if hist_data: - data_t = remove_outliers( - hist_data, outlier_const=table["outlier-const"]) - if data_t: - item.append(round(mean(data_t) / 1000000, 2)) - item.append(round(stdev(data_t) / 1000000, 2)) - else: - item.extend([None, None]) + item.append(round(mean(hist_data) / 1000000, 2)) + item.append(round(stdev(hist_data) / 1000000, 2)) else: item.extend([None, None]) else: item.extend([None, None]) - if tbl_dict[tst_name]["ref-data"]: - data_t = remove_outliers(tbl_dict[tst_name]["ref-data"], - outlier_const=table["outlier-const"]) - # TODO: Specify window size. - if data_t: - item.append(round(mean(data_t) / 1000000, 2)) - item.append(round(stdev(data_t) / 1000000, 2)) - else: - item.extend([None, None]) + data_t = tbl_dict[tst_name]["ref-data"] + if data_t: + item.append(round(mean(data_t) / 1000000, 2)) + item.append(round(stdev(data_t) / 1000000, 2)) else: item.extend([None, None]) - if tbl_dict[tst_name]["cmp-data"]: - data_t = remove_outliers(tbl_dict[tst_name]["cmp-data"], - outlier_const=table["outlier-const"]) - # TODO: Specify window size. - if data_t: - item.append(round(mean(data_t) / 1000000, 2)) - item.append(round(stdev(data_t) / 1000000, 2)) - else: - item.extend([None, None]) + data_t = tbl_dict[tst_name]["cmp-data"] + if data_t: + item.append(round(mean(data_t) / 1000000, 2)) + item.append(round(stdev(data_t) / 1000000, 2)) else: item.extend([None, None]) if item[-4] is not None and item[-2] is not None and item[-4] != 0: @@ -644,26 +629,16 @@ def table_performance_comparison_mrr(table, input_data): tbl_lst = list() for tst_name in tbl_dict.keys(): item = [tbl_dict[tst_name]["name"], ] - if tbl_dict[tst_name]["ref-data"]: - data_t = remove_outliers(tbl_dict[tst_name]["ref-data"], - outlier_const=table["outlier-const"]) - # TODO: Specify window size. - if data_t: - item.append(round(mean(data_t) / 1000000, 2)) - item.append(round(stdev(data_t) / 1000000, 2)) - else: - item.extend([None, None]) + data_t = tbl_dict[tst_name]["ref-data"] + if data_t: + item.append(round(mean(data_t) / 1000000, 2)) + item.append(round(stdev(data_t) / 1000000, 2)) else: item.extend([None, None]) - if tbl_dict[tst_name]["cmp-data"]: - data_t = remove_outliers(tbl_dict[tst_name]["cmp-data"], - outlier_const=table["outlier-const"]) - # TODO: Specify window size. - if data_t: - item.append(round(mean(data_t) / 1000000, 2)) - item.append(round(stdev(data_t) / 1000000, 2)) - else: - item.extend([None, None]) + data_t = tbl_dict[tst_name]["cmp-data"] + if data_t: + item.append(round(mean(data_t) / 1000000, 2)) + item.append(round(stdev(data_t) / 1000000, 2)) else: item.extend([None, None]) if item[1] is not None and item[3] is not None and item[1] != 0: @@ -705,7 +680,8 @@ def table_performance_comparison_mrr(table, input_data): def table_performance_trending_dashboard(table, input_data): - """Generate the table(s) with algorithm: table_performance_comparison + """Generate the table(s) with algorithm: + table_performance_trending_dashboard specified in the specification file. :param table: Table to generate. @@ -728,8 +704,7 @@ def table_performance_trending_dashboard(table, input_data): "Short-Term Change [%]", "Long-Term Change [%]", "Regressions [#]", - "Progressions [#]", - "Outliers [#]" + "Progressions [#]" ] header_str = ",".join(header) + "\n" @@ -754,61 +729,47 @@ def table_performance_trending_dashboard(table, input_data): tbl_lst = list() for tst_name in tbl_dict.keys(): - if len(tbl_dict[tst_name]["data"]) < 3: + if len(tbl_dict[tst_name]["data"]) < 2: continue - pd_data = pd.Series(tbl_dict[tst_name]["data"]) - data_t, _ = split_outliers(pd_data, outlier_const=1.5, - window=table["window"]) - last_key = data_t.keys()[-1] + data_t = pd.Series(tbl_dict[tst_name]["data"]) + + classification_lst, avgs = classify_anomalies(data_t) + win_size = min(data_t.size, table["window"]) - win_first_idx = data_t.size - win_size - key_14 = data_t.keys()[win_first_idx] long_win_size = min(data_t.size, table["long-trend-window"]) - median_t = data_t.rolling(window=win_size, min_periods=2).median() - median_first_idx = median_t.size - long_win_size try: - max_median = max( - [x for x in median_t.values[median_first_idx:-win_size] + max_long_avg = max( + [x for x in avgs[-long_win_size:-win_size] if not isnan(x)]) except ValueError: - max_median = nan - try: - last_median_t = median_t[last_key] - except KeyError: - last_median_t = nan - try: - median_t_14 = median_t[key_14] - except KeyError: - median_t_14 = nan + max_long_avg = nan + last_avg = avgs[-1] + avg_week_ago = avgs[max(-win_size, -len(avgs))] - if isnan(last_median_t) or isnan(median_t_14) or median_t_14 == 0.0: + if isnan(last_avg) or isnan(avg_week_ago) or avg_week_ago == 0.0: rel_change_last = nan else: rel_change_last = round( - ((last_median_t - median_t_14) / median_t_14) * 100, 2) + ((last_avg - avg_week_ago) / avg_week_ago) * 100, 2) - if isnan(max_median) or isnan(last_median_t) or max_median == 0.0: + if isnan(max_long_avg) or isnan(last_avg) or max_long_avg == 0.0: rel_change_long = nan else: rel_change_long = round( - ((last_median_t - max_median) / max_median) * 100, 2) - - # Classification list: - classification_lst = classify_anomalies(data_t, window=14) + ((last_avg - max_long_avg) / max_long_avg) * 100, 2) if classification_lst: if isnan(rel_change_last) and isnan(rel_change_long): continue tbl_lst.append( [tbl_dict[tst_name]["name"], - '-' if isnan(last_median_t) else - round(last_median_t / 1000000, 2), + '-' if isnan(last_avg) else + round(last_avg / 1000000, 2), '-' if isnan(rel_change_last) else rel_change_last, '-' if isnan(rel_change_long) else rel_change_long, - classification_lst[win_first_idx:].count("regression"), - classification_lst[win_first_idx:].count("progression"), - classification_lst[win_first_idx:].count("outlier")]) + classification_lst[-win_size:].count("regression"), + classification_lst[-win_size:].count("progression")]) tbl_lst.sort(key=lambda rel: rel[0]) @@ -816,11 +777,9 @@ def table_performance_trending_dashboard(table, input_data): for nrr in range(table["window"], -1, -1): tbl_reg = [item for item in tbl_lst if item[4] == nrr] for nrp in range(table["window"], -1, -1): - tbl_pro = [item for item in tbl_reg if item[5] == nrp] - for nro in range(table["window"], -1, -1): - tbl_out = [item for item in tbl_pro if item[6] == nro] - tbl_out.sort(key=lambda rel: rel[2]) - tbl_sorted.extend(tbl_out) + tbl_out = [item for item in tbl_reg if item[5] == nrp] + tbl_out.sort(key=lambda rel: rel[2]) + tbl_sorted.extend(tbl_out) file_name = "{0}{1}".format(table["output-file"], table["output-file-ext"]) @@ -958,15 +917,12 @@ def table_performance_trending_dashboard_html(table, input_data): # Rows: colors = {"regression": ("#ffcccc", "#ff9999"), "progression": ("#c6ecc6", "#9fdf9f"), - "outlier": ("#e6e6e6", "#cccccc"), "normal": ("#e9f1fb", "#d4e4f7")} for r_idx, row in enumerate(csv_lst[1:]): if int(row[4]): color = "regression" elif int(row[5]): color = "progression" - elif int(row[6]): - color = "outlier" else: color = "normal" background = colors[color][r_idx % 2]