X-Git-Url: https://gerrit.fd.io/r/gitweb?a=blobdiff_plain;f=resources%2Ftools%2Fpresentation%2Fgenerator_CPTA.py;h=1e7719153fd7b1d3cccb0310fe2201a52bbdff88;hb=HEAD;hp=92244c2610837cdee1c73e0183b1b703bef28098;hpb=767e85ad0dcb9bcb9637188062681311fabb3307;p=csit.git diff --git a/resources/tools/presentation/generator_CPTA.py b/resources/tools/presentation/generator_CPTA.py deleted file mode 100644 index 92244c2610..0000000000 --- a/resources/tools/presentation/generator_CPTA.py +++ /dev/null @@ -1,479 +0,0 @@ -# Copyright (c) 2018 Cisco and/or its affiliates. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at: -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Generation of Continuous Performance Trending and Analysis. -""" - -import datetime -import logging -import csv -import prettytable -import plotly.offline as ploff -import plotly.graph_objs as plgo -import plotly.exceptions as plerr -import numpy as np -import pandas as pd - -from collections import OrderedDict -from utils import find_outliers, archive_input_data, execute_command - - -# Command to build the html format of the report -HTML_BUILDER = 'sphinx-build -v -c conf_cpta -a ' \ - '-b html -E ' \ - '-t html ' \ - '-D version="Generated on {date}" ' \ - '{working_dir} ' \ - '{build_dir}/' - -# .css file for the html format of the report -THEME_OVERRIDES = """/* override table width restrictions */ -.wy-nav-content { - max-width: 1200px !important; -} -""" - -COLORS = ["SkyBlue", "Olive", "Purple", "Coral", "Indigo", "Pink", - "Chocolate", "Brown", "Magenta", "Cyan", "Orange", "Black", - "Violet", "Blue", "Yellow"] - - -def generate_cpta(spec, data): - """Generate all formats and versions of the Continuous Performance Trending - and Analysis. - - :param spec: Specification read from the specification file. - :param data: Full data set. - :type spec: Specification - :type data: InputData - """ - - logging.info("Generating the Continuous Performance Trending and Analysis " - "...") - - ret_code = _generate_all_charts(spec, data) - - cmd = HTML_BUILDER.format( - date=datetime.date.today().strftime('%d-%b-%Y'), - working_dir=spec.environment["paths"]["DIR[WORKING,SRC]"], - build_dir=spec.environment["paths"]["DIR[BUILD,HTML]"]) - execute_command(cmd) - - with open(spec.environment["paths"]["DIR[CSS_PATCH_FILE]"], "w") as \ - css_file: - css_file.write(THEME_OVERRIDES) - - with open(spec.environment["paths"]["DIR[CSS_PATCH_FILE2]"], "w") as \ - css_file: - css_file.write(THEME_OVERRIDES) - - archive_input_data(spec) - - logging.info("Done.") - - return ret_code - - -def _select_data(in_data, period, fill_missing=False, use_first=False): - """Select the data from the full data set. The selection is done by picking - the samples depending on the period: period = 1: All, period = 2: every - second sample, period = 3: every third sample ... - - :param in_data: Full set of data. - :param period: Sampling period. - :param fill_missing: If the chosen sample is missing in the full set, its - nearest neighbour is used. - :param use_first: Use the first sample even though it is not chosen. - :type in_data: OrderedDict - :type period: int - :type fill_missing: bool - :type use_first: bool - :returns: Reduced data. - :rtype: OrderedDict - """ - - first_idx = min(in_data.keys()) - last_idx = max(in_data.keys()) - - idx = last_idx - data_dict = dict() - if use_first: - data_dict[first_idx] = in_data[first_idx] - while idx >= first_idx: - data = in_data.get(idx, None) - if data is None: - if fill_missing: - threshold = int(round(idx - period / 2)) + 1 - period % 2 - idx_low = first_idx if threshold < first_idx else threshold - threshold = int(round(idx + period / 2)) - idx_high = last_idx if threshold > last_idx else threshold - - flag_l = True - flag_h = True - idx_lst = list() - inc = 1 - while flag_l or flag_h: - if idx + inc > idx_high: - flag_h = False - else: - idx_lst.append(idx + inc) - if idx - inc < idx_low: - flag_l = False - else: - idx_lst.append(idx - inc) - inc += 1 - - for i in idx_lst: - if i in in_data.keys(): - data_dict[i] = in_data[i] - break - else: - data_dict[idx] = data - idx -= period - - return OrderedDict(sorted(data_dict.items(), key=lambda t: t[0])) - - -def _evaluate_results(in_data, trimmed_data, window=10): - """Evaluates if the sample value is regress, normal or progress compared to - previous data within the window. - We use the intervals defined as: - - regress: less than median - 3 * stdev - - normal: between median - 3 * stdev and median + 3 * stdev - - progress: more than median + 3 * stdev - - :param in_data: Full data set. - :param trimmed_data: Full data set without the outliers. - :param window: Window size used to calculate moving median and moving stdev. - :type in_data: pandas.Series - :type trimmed_data: pandas.Series - :type window: int - :returns: Evaluated results. - :rtype: list - """ - - if len(in_data) > 2: - win_size = in_data.size if in_data.size < window else window - results = [0.0, ] * win_size - median = in_data.rolling(window=win_size).median() - stdev_t = trimmed_data.rolling(window=win_size, min_periods=2).std() - m_vals = median.values - s_vals = stdev_t.values - d_vals = in_data.values - for day in range(win_size, in_data.size): - if np.isnan(m_vals[day - 1]) or np.isnan(s_vals[day - 1]): - results.append(0.0) - elif d_vals[day] < (m_vals[day - 1] - 3 * s_vals[day - 1]): - results.append(0.33) - elif (m_vals[day - 1] - 3 * s_vals[day - 1]) <= d_vals[day] <= \ - (m_vals[day - 1] + 3 * s_vals[day - 1]): - results.append(0.66) - else: - results.append(1.0) - else: - results = [0.0, ] - try: - median = np.median(in_data) - stdev = np.std(in_data) - if in_data.values[-1] < (median - 3 * stdev): - results.append(0.33) - elif (median - 3 * stdev) <= in_data.values[-1] <= ( - median + 3 * stdev): - results.append(0.66) - else: - results.append(1.0) - except TypeError: - results.append(None) - return results - - -def _generate_trending_traces(in_data, period, moving_win_size=10, - fill_missing=True, use_first=False, - show_moving_median=True, name="", color=""): - """Generate the trending traces: - - samples, - - moving median (trending plot) - - outliers, regress, progress - - :param in_data: Full data set. - :param period: Sampling period. - :param moving_win_size: Window size. - :param fill_missing: If the chosen sample is missing in the full set, its - nearest neighbour is used. - :param use_first: Use the first sample even though it is not chosen. - :param show_moving_median: Show moving median (trending plot). - :param name: Name of the plot - :param color: Name of the color for the plot. - :type in_data: OrderedDict - :type period: int - :type moving_win_size: int - :type fill_missing: bool - :type use_first: bool - :type show_moving_median: bool - :type name: str - :type color: str - :returns: Generated traces (list) and the evaluated result (float). - :rtype: tuple(traces, result) - """ - - if period > 1: - in_data = _select_data(in_data, period, - fill_missing=fill_missing, - use_first=use_first) - - data_x = [key for key in in_data.keys()] - data_y = [val for val in in_data.values()] - data_pd = pd.Series(data_y, index=data_x) - - t_data, outliers = find_outliers(data_pd) - - results = _evaluate_results(data_pd, t_data, window=moving_win_size) - - anomalies = pd.Series() - anomalies_res = list() - for idx, item in enumerate(in_data.items()): - item_pd = pd.Series([item[1], ], index=[item[0], ]) - if item[0] in outliers.keys(): - anomalies = anomalies.append(item_pd) - anomalies_res.append(0.0) - elif results[idx] in (0.33, 1.0): - anomalies = anomalies.append(item_pd) - anomalies_res.append(results[idx]) - anomalies_res.extend([0.0, 0.33, 0.66, 1.0]) - - # Create traces - color_scale = [[0.00, "grey"], - [0.25, "grey"], - [0.25, "red"], - [0.50, "red"], - [0.50, "white"], - [0.75, "white"], - [0.75, "green"], - [1.00, "green"]] - - trace_samples = plgo.Scatter( - x=data_x, - y=data_y, - mode='markers', - line={ - "width": 1 - }, - name="{name}-thput".format(name=name), - marker={ - "size": 5, - "color": color, - "symbol": "circle", - }, - ) - traces = [trace_samples, ] - - trace_anomalies = plgo.Scatter( - x=anomalies.keys(), - y=anomalies.values, - mode='markers', - hoverinfo="none", - showlegend=False, - legendgroup=name, - name="{name}: outliers".format(name=name), - marker={ - "size": 15, - "symbol": "circle-open", - "color": anomalies_res, - "colorscale": color_scale, - "showscale": True, - "line": { - "width": 2 - }, - "colorbar": { - "y": 0.5, - "len": 0.8, - "title": "Circles Marking Data Classification", - "titleside": 'right', - "titlefont": { - "size": 14 - }, - "tickmode": 'array', - "tickvals": [0.125, 0.375, 0.625, 0.875], - "ticktext": ["Outlier", "Regression", "Normal", "Progression"], - "ticks": "", - "ticklen": 0, - "tickangle": -90, - "thickness": 10 - } - } - ) - traces.append(trace_anomalies) - - if show_moving_median: - data_mean_y = pd.Series(data_y).rolling( - window=moving_win_size, min_periods=2).median() - trace_median = plgo.Scatter( - x=data_x, - y=data_mean_y, - mode='lines', - line={ - "shape": "spline", - "width": 1, - "color": color, - }, - name='{name}-trend'.format(name=name) - ) - traces.append(trace_median) - - return traces, results[-1] - - -def _generate_chart(traces, layout, file_name): - """Generates the whole chart using pre-generated traces. - - :param traces: Traces for the chart. - :param layout: Layout of the chart. - :param file_name: File name for the generated chart. - :type traces: list - :type layout: dict - :type file_name: str - """ - - # Create plot - logging.info(" Writing the file '{0}' ...".format(file_name)) - plpl = plgo.Figure(data=traces, layout=layout) - try: - ploff.plot(plpl, show_link=False, auto_open=False, filename=file_name) - except plerr.PlotlyEmptyDataError: - logging.warning(" No data for the plot. Skipped.") - - -def _generate_all_charts(spec, input_data): - """Generate all charts specified in the specification file. - - :param spec: Specification. - :param input_data: Full data set. - :type spec: Specification - :type input_data: InputData - """ - - csv_table = list() - # Create the header: - builds = spec.cpta["data"].values()[0] - builds_lst = [str(build) for build in range(builds[0], builds[-1] + 1)] - header = "Build Number:," + ",".join(builds_lst) + '\n' - csv_table.append(header) - - results = list() - for chart in spec.cpta["plots"]: - logging.info(" Generating the chart '{0}' ...". - format(chart.get("title", ""))) - - # Transform the data - data = input_data.filter_data(chart, continue_on_error=True) - if data is None: - logging.error("No data.") - return - - chart_data = dict() - for job in data: - for idx, build in job.items(): - for test_name, test in build.items(): - if chart_data.get(test_name, None) is None: - chart_data[test_name] = OrderedDict() - try: - chart_data[test_name][int(idx)] = \ - test["result"]["throughput"] - except (KeyError, TypeError): - pass - - # Add items to the csv table: - for tst_name, tst_data in chart_data.items(): - tst_lst = list() - for build in builds_lst: - item = tst_data.get(int(build), '') - tst_lst.append(str(item) if item else '') - csv_table.append("{0},".format(tst_name) + ",".join(tst_lst) + '\n') - - for period in chart["periods"]: - # Generate traces: - traces = list() - win_size = 10 if period == 1 else 5 if period < 20 else 3 - idx = 0 - for test_name, test_data in chart_data.items(): - if not test_data: - logging.warning("No data for the test '{0}'". - format(test_name)) - continue - test_name = test_name.split('.')[-1] - trace, result = _generate_trending_traces( - test_data, - period=period, - moving_win_size=win_size, - fill_missing=True, - use_first=False, - name='-'.join(test_name.split('-')[3:-1]), - color=COLORS[idx]) - traces.extend(trace) - results.append(result) - idx += 1 - - # Generate the chart: - period_name = "Daily" if period == 1 else \ - "Weekly" if period < 20 else "Monthly" - # chart["layout"]["title"] = chart["title"].format(period=period_name) - _generate_chart(traces, - chart["layout"], - file_name="{0}-{1}-{2}{3}".format( - spec.cpta["output-file"], - chart["output-file-name"], - period, - spec.cpta["output-file-type"])) - - logging.info(" Done.") - - # Write the tables: - file_name = spec.cpta["output-file"] + "-trending" - with open("{0}.csv".format(file_name), 'w') as file_handler: - file_handler.writelines(csv_table) - - txt_table = None - with open("{0}.csv".format(file_name), 'rb') as csv_file: - csv_content = csv.reader(csv_file, delimiter=',', quotechar='"') - header = True - for row in csv_content: - if txt_table is None: - txt_table = prettytable.PrettyTable(row) - header = False - else: - if not header: - for idx, item in enumerate(row): - try: - row[idx] = str(round(float(item) / 1000000, 2)) - except ValueError: - pass - txt_table.add_row(row) - txt_table.align["Build Number:"] = "l" - with open("{0}.txt".format(file_name), "w") as txt_file: - txt_file.write(str(txt_table)) - - # Evaluate result: - result = "PASS" - for item in results: - if item is None: - result = "FAIL" - break - if item == 0.66 and result == "PASS": - result = "PASS" - elif item == 0.33 or item == 0.0: - result = "FAIL" - - logging.info("Partial results: {0}".format(results)) - logging.info("Result: {0}".format(result)) - - return result