1 # Copyright (c) 2018 Cisco and/or its affiliates.
2 # Licensed under the Apache License, Version 2.0 (the "License");
3 # you may not use this file except in compliance with the License.
4 # You may obtain a copy of the License at:
6 # http://www.apache.org/licenses/LICENSE-2.0
8 # Unless required by applicable law or agreed to in writing, software
9 # distributed under the License is distributed on an "AS IS" BASIS,
10 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 # See the License for the specific language governing permissions and
12 # limitations under the License.
14 """Generation of Continuous Performance Trending and Analysis.
20 import plotly.offline as ploff
21 import plotly.graph_objs as plgo
22 import plotly.exceptions as plerr
26 from collections import OrderedDict
27 from datetime import datetime, timedelta
29 from utils import split_outliers, archive_input_data, execute_command
32 # Command to build the html format of the report
33 HTML_BUILDER = 'sphinx-build -v -c conf_cpta -a ' \
36 '-D version="{date}" ' \
40 # .css file for the html format of the report
41 THEME_OVERRIDES = """/* override table width restrictions */
43 max-width: 1200px !important;
47 COLORS = ["SkyBlue", "Olive", "Purple", "Coral", "Indigo", "Pink",
48 "Chocolate", "Brown", "Magenta", "Cyan", "Orange", "Black",
49 "Violet", "Blue", "Yellow"]
52 def generate_cpta(spec, data):
53 """Generate all formats and versions of the Continuous Performance Trending
56 :param spec: Specification read from the specification file.
57 :param data: Full data set.
58 :type spec: Specification
62 logging.info("Generating the Continuous Performance Trending and Analysis "
65 ret_code = _generate_all_charts(spec, data)
67 cmd = HTML_BUILDER.format(
68 date=datetime.utcnow().strftime('%m/%d/%Y %H:%M UTC'),
69 working_dir=spec.environment["paths"]["DIR[WORKING,SRC]"],
70 build_dir=spec.environment["paths"]["DIR[BUILD,HTML]"])
73 with open(spec.environment["paths"]["DIR[CSS_PATCH_FILE]"], "w") as \
75 css_file.write(THEME_OVERRIDES)
77 with open(spec.environment["paths"]["DIR[CSS_PATCH_FILE2]"], "w") as \
79 css_file.write(THEME_OVERRIDES)
81 archive_input_data(spec)
88 def _select_data(in_data, period, fill_missing=False, use_first=False):
89 """Select the data from the full data set. The selection is done by picking
90 the samples depending on the period: period = 1: All, period = 2: every
91 second sample, period = 3: every third sample ...
93 :param in_data: Full set of data.
94 :param period: Sampling period.
95 :param fill_missing: If the chosen sample is missing in the full set, its
96 nearest neighbour is used.
97 :param use_first: Use the first sample even though it is not chosen.
98 :type in_data: OrderedDict
100 :type fill_missing: bool
101 :type use_first: bool
102 :returns: Reduced data.
106 first_idx = min(in_data.keys())
107 last_idx = max(in_data.keys())
112 data_dict[first_idx] = in_data[first_idx]
113 while idx >= first_idx:
114 data = in_data.get(idx, None)
117 threshold = int(round(idx - period / 2)) + 1 - period % 2
118 idx_low = first_idx if threshold < first_idx else threshold
119 threshold = int(round(idx + period / 2))
120 idx_high = last_idx if threshold > last_idx else threshold
126 while flag_l or flag_h:
127 if idx + inc > idx_high:
130 idx_lst.append(idx + inc)
131 if idx - inc < idx_low:
134 idx_lst.append(idx - inc)
138 if i in in_data.keys():
139 data_dict[i] = in_data[i]
142 data_dict[idx] = data
145 return OrderedDict(sorted(data_dict.items(), key=lambda t: t[0]))
148 def _evaluate_results(trimmed_data, window=10):
149 """Evaluates if the sample value is regress, normal or progress compared to
150 previous data within the window.
151 We use the intervals defined as:
152 - regress: less than trimmed moving median - 3 * stdev
153 - normal: between trimmed moving median - 3 * stdev and median + 3 * stdev
154 - progress: more than trimmed moving median + 3 * stdev
155 where stdev is trimmed moving standard deviation.
157 :param trimmed_data: Full data set with the outliers replaced by nan.
158 :param window: Window size used to calculate moving average and moving stdev.
159 :type trimmed_data: pandas.Series
161 :returns: Evaluated results.
165 if len(trimmed_data) > 2:
166 win_size = trimmed_data.size if trimmed_data.size < window else window
168 tmm = trimmed_data.rolling(window=win_size, min_periods=2).median()
169 tmstd = trimmed_data.rolling(window=win_size, min_periods=2).std()
172 for build_nr, value in trimmed_data.iteritems():
177 or np.isnan(tmm[build_nr])
178 or np.isnan(tmstd[build_nr])):
180 elif value < (tmm[build_nr] - 3 * tmstd[build_nr]):
182 elif value > (tmm[build_nr] + 3 * tmstd[build_nr]):
189 tmm = np.median(trimmed_data)
190 tmstd = np.std(trimmed_data)
191 if trimmed_data.values[-1] < (tmm - 3 * tmstd):
193 elif (tmm - 3 * tmstd) <= trimmed_data.values[-1] <= (
203 def _generate_trending_traces(in_data, build_info, period, moving_win_size=10,
204 fill_missing=True, use_first=False,
205 show_trend_line=True, name="", color=""):
206 """Generate the trending traces:
208 - trimmed moving median (trending line)
209 - outliers, regress, progress
211 :param in_data: Full data set.
212 :param build_info: Information about the builds.
213 :param period: Sampling period.
214 :param moving_win_size: Window size.
215 :param fill_missing: If the chosen sample is missing in the full set, its
216 nearest neighbour is used.
217 :param use_first: Use the first sample even though it is not chosen.
218 :param show_trend_line: Show moving median (trending plot).
219 :param name: Name of the plot
220 :param color: Name of the color for the plot.
221 :type in_data: OrderedDict
222 :type build_info: dict
224 :type moving_win_size: int
225 :type fill_missing: bool
226 :type use_first: bool
227 :type show_trend_line: bool
230 :returns: Generated traces (list), the evaluated result (float) and the
232 :rtype: tuple(traces, result)
236 in_data = _select_data(in_data, period,
237 fill_missing=fill_missing,
240 data_x = list(in_data.keys())
241 data_y = list(in_data.values())
246 hover_text.append("vpp-ref: {0}<br>csit-ref: mrr-daily-build-{1}".
247 format(build_info[str(idx)][1].rsplit('~', 1)[0],
249 date = build_info[str(idx)][0]
250 xaxis.append(datetime(int(date[0:4]), int(date[4:6]), int(date[6:8]),
251 int(date[9:11]), int(date[12:])))
253 data_pd = pd.Series(data_y, index=xaxis)
255 t_data, outliers = split_outliers(data_pd, outlier_const=1.5,
256 window=moving_win_size)
257 results = _evaluate_results(t_data, window=moving_win_size)
259 anomalies = pd.Series()
260 anomalies_res = list()
261 for idx, item in enumerate(data_pd.items()):
262 item_pd = pd.Series([item[1], ], index=[item[0], ])
263 if item[0] in outliers.keys():
264 anomalies = anomalies.append(item_pd)
265 anomalies_res.append(0.0)
266 elif results[idx] in (0.33, 1.0):
267 anomalies = anomalies.append(item_pd)
268 anomalies_res.append(results[idx])
269 anomalies_res.extend([0.0, 0.33, 0.66, 1.0])
272 color_scale = [[0.00, "grey"],
281 trace_samples = plgo.Scatter(
288 name="{name}-thput".format(name=name),
295 hoverinfo="x+y+text+name"
297 traces = [trace_samples, ]
299 trace_anomalies = plgo.Scatter(
306 name="{name}-anomalies".format(name=name),
309 "symbol": "circle-open",
310 "color": anomalies_res,
311 "colorscale": color_scale,
319 "title": "Circles Marking Data Classification",
320 "titleside": 'right',
325 "tickvals": [0.125, 0.375, 0.625, 0.875],
326 "ticktext": ["Outlier", "Regression", "Normal", "Progression"],
334 traces.append(trace_anomalies)
337 data_trend = t_data.rolling(window=moving_win_size,
338 min_periods=2).median()
339 trace_trend = plgo.Scatter(
341 y=data_trend.tolist(),
348 name='{name}-trend'.format(name=name)
350 traces.append(trace_trend)
352 return traces, results[-1], xaxis[0], xaxis[-1]
355 def _generate_chart(traces, layout, file_name):
356 """Generates the whole chart using pre-generated traces.
358 :param traces: Traces for the chart.
359 :param layout: Layout of the chart.
360 :param file_name: File name for the generated chart.
367 logging.info(" Writing the file '{0}' ...".format(file_name))
368 plpl = plgo.Figure(data=traces, layout=layout)
370 ploff.plot(plpl, show_link=False, auto_open=False, filename=file_name)
371 except plerr.PlotlyEmptyDataError:
372 logging.warning(" No data for the plot. Skipped.")
375 def _generate_all_charts(spec, input_data):
376 """Generate all charts specified in the specification file.
378 :param spec: Specification.
379 :param input_data: Full data set.
380 :type spec: Specification
381 :type input_data: InputData
384 job_name = spec.cpta["data"].keys()[0]
387 for build in spec.input["builds"][job_name]:
388 status = build["status"]
389 if status != "failed" and status != "not found":
390 builds_lst.append(str(build["build"]))
392 # Get "build ID": "date" dict:
393 build_info = OrderedDict()
394 for build in builds_lst:
396 build_info[build] = (
397 input_data.metadata(job_name, build)["generated"][:14],
398 input_data.metadata(job_name, build)["version"]
401 build_info[build] = ("", "")
402 logging.info("{}: {}, {}".format(build,
403 build_info[build][0],
404 build_info[build][1]))
408 header = "Build Number:," + ",".join(builds_lst) + '\n'
409 csv_table.append(header)
410 build_dates = [x[0] for x in build_info.values()]
411 header = "Build Date:," + ",".join(build_dates) + '\n'
412 csv_table.append(header)
413 vpp_versions = [x[1] for x in build_info.values()]
414 header = "VPP Version:," + ",".join(vpp_versions) + '\n'
415 csv_table.append(header)
418 for chart in spec.cpta["plots"]:
419 logging.info(" Generating the chart '{0}' ...".
420 format(chart.get("title", "")))
423 data = input_data.filter_data(chart, continue_on_error=True)
425 logging.error("No data.")
430 for idx, build in job.items():
431 for test_name, test in build.items():
432 if chart_data.get(test_name, None) is None:
433 chart_data[test_name] = OrderedDict()
435 chart_data[test_name][int(idx)] = \
436 test["result"]["throughput"]
437 except (KeyError, TypeError):
440 # Add items to the csv table:
441 for tst_name, tst_data in chart_data.items():
443 for build in builds_lst:
444 item = tst_data.get(int(build), '')
445 tst_lst.append(str(item))
446 csv_table.append("{0},".format(tst_name) + ",".join(tst_lst) + '\n')
448 for period in chart["periods"]:
453 for test_name, test_data in chart_data.items():
455 logging.warning("No data for the test '{0}'".
458 test_name = test_name.split('.')[-1]
459 trace, result, first_date, last_date = \
460 _generate_trending_traces(
462 build_info=build_info,
464 moving_win_size=win_size,
467 name='-'.join(test_name.split('-')[3:-1]),
470 results.append(result)
474 # Generate the chart:
475 chart["layout"]["xaxis"]["title"] = \
476 chart["layout"]["xaxis"]["title"].format(job=job_name)
477 delta = timedelta(days=30)
478 start = last_date - delta
479 start = first_date if start < first_date else start
480 chart["layout"]["xaxis"]["range"] = [str(start.date()),
481 str(last_date.date())]
482 _generate_chart(traces,
484 file_name="{0}-{1}-{2}{3}".format(
485 spec.cpta["output-file"],
486 chart["output-file-name"],
488 spec.cpta["output-file-type"]))
490 logging.info(" Done.")
493 file_name = spec.cpta["output-file"] + "-trending"
494 with open("{0}.csv".format(file_name), 'w') as file_handler:
495 file_handler.writelines(csv_table)
498 with open("{0}.csv".format(file_name), 'rb') as csv_file:
499 csv_content = csv.reader(csv_file, delimiter=',', quotechar='"')
501 for row in csv_content:
502 if txt_table is None:
503 txt_table = prettytable.PrettyTable(row)
506 for idx, item in enumerate(row):
508 row[idx] = str(round(float(item) / 1000000, 2))
512 txt_table.add_row(row)
513 except Exception as err:
514 logging.warning("Error occurred while generating TXT table:"
517 txt_table.align["Build Number:"] = "l"
518 with open("{0}.txt".format(file_name), "w") as txt_file:
519 txt_file.write(str(txt_table))
527 if item == 0.66 and result == "PASS":
529 elif item == 0.33 or item == 0.0:
532 logging.info("Partial results: {0}".format(results))
533 logging.info("Result: {0}".format(result))