CSIT-1133: Cosmetic improvements in trending plots
[csit.git] / resources / tools / presentation / generator_CPTA.py
1 # Copyright (c) 2018 Cisco and/or its affiliates.
2 # Licensed under the Apache License, Version 2.0 (the "License");
3 # you may not use this file except in compliance with the License.
4 # You may obtain a copy of the License at:
5 #
6 #     http://www.apache.org/licenses/LICENSE-2.0
7 #
8 # Unless required by applicable law or agreed to in writing, software
9 # distributed under the License is distributed on an "AS IS" BASIS,
10 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 # See the License for the specific language governing permissions and
12 # limitations under the License.
13
14 """Generation of Continuous Performance Trending and Analysis.
15 """
16
17 import multiprocessing
18 import os
19 import logging
20 import csv
21 import prettytable
22 import plotly.offline as ploff
23 import plotly.graph_objs as plgo
24 import plotly.exceptions as plerr
25 import pandas as pd
26
27 from collections import OrderedDict
28 from datetime import datetime
29
30 from utils import split_outliers, archive_input_data, execute_command,\
31     classify_anomalies, Worker
32
33
34 # Command to build the html format of the report
35 HTML_BUILDER = 'sphinx-build -v -c conf_cpta -a ' \
36                '-b html -E ' \
37                '-t html ' \
38                '-D version="{date}" ' \
39                '{working_dir} ' \
40                '{build_dir}/'
41
42 # .css file for the html format of the report
43 THEME_OVERRIDES = """/* override table width restrictions */
44 .wy-nav-content {
45     max-width: 1200px !important;
46 }
47 """
48
49 COLORS = ["SkyBlue", "Olive", "Purple", "Coral", "Indigo", "Pink",
50           "Chocolate", "Brown", "Magenta", "Cyan", "Orange", "Black",
51           "Violet", "Blue", "Yellow"]
52
53
54 def generate_cpta(spec, data):
55     """Generate all formats and versions of the Continuous Performance Trending
56     and Analysis.
57
58     :param spec: Specification read from the specification file.
59     :param data: Full data set.
60     :type spec: Specification
61     :type data: InputData
62     """
63
64     logging.info("Generating the Continuous Performance Trending and Analysis "
65                  "...")
66
67     ret_code = _generate_all_charts(spec, data)
68
69     cmd = HTML_BUILDER.format(
70         date=datetime.utcnow().strftime('%m/%d/%Y %H:%M UTC'),
71         working_dir=spec.environment["paths"]["DIR[WORKING,SRC]"],
72         build_dir=spec.environment["paths"]["DIR[BUILD,HTML]"])
73     execute_command(cmd)
74
75     with open(spec.environment["paths"]["DIR[CSS_PATCH_FILE]"], "w") as \
76             css_file:
77         css_file.write(THEME_OVERRIDES)
78
79     with open(spec.environment["paths"]["DIR[CSS_PATCH_FILE2]"], "w") as \
80             css_file:
81         css_file.write(THEME_OVERRIDES)
82
83     archive_input_data(spec)
84
85     logging.info("Done.")
86
87     return ret_code
88
89
90 def _generate_trending_traces(in_data, job_name, build_info, moving_win_size=10,
91                               show_trend_line=True, name="", color=""):
92     """Generate the trending traces:
93      - samples,
94      - trimmed moving median (trending line)
95      - outliers, regress, progress
96
97     :param in_data: Full data set.
98     :param job_name: The name of job which generated the data.
99     :param build_info: Information about the builds.
100     :param moving_win_size: Window size.
101     :param show_trend_line: Show moving median (trending plot).
102     :param name: Name of the plot
103     :param color: Name of the color for the plot.
104     :type in_data: OrderedDict
105     :type job_name: str
106     :type build_info: dict
107     :type moving_win_size: int
108     :type show_trend_line: bool
109     :type name: str
110     :type color: str
111     :returns: Generated traces (list) and the evaluated result.
112     :rtype: tuple(traces, result)
113     """
114
115     data_x = list(in_data.keys())
116     data_y = list(in_data.values())
117
118     hover_text = list()
119     xaxis = list()
120     for idx in data_x:
121         if "dpdk" in job_name:
122             hover_text.append("dpdk-ref: {0}<br>csit-ref: mrr-weekly-build-{1}".
123                               format(build_info[job_name][str(idx)][1].
124                                      rsplit('~', 1)[0], idx))
125         elif "vpp" in job_name:
126             hover_text.append("vpp-ref: {0}<br>csit-ref: mrr-daily-build-{1}".
127                               format(build_info[job_name][str(idx)][1].
128                                      rsplit('~', 1)[0], idx))
129         date = build_info[job_name][str(idx)][0]
130         xaxis.append(datetime(int(date[0:4]), int(date[4:6]), int(date[6:8]),
131                               int(date[9:11]), int(date[12:])))
132
133     data_pd = pd.Series(data_y, index=xaxis)
134
135     t_data, outliers = split_outliers(data_pd, outlier_const=1.5,
136                                       window=moving_win_size)
137     anomaly_classification = classify_anomalies(t_data, window=moving_win_size)
138
139     anomalies = pd.Series()
140     anomalies_colors = list()
141     anomaly_color = {
142         "outlier": 0.0,
143         "regression": 0.33,
144         "normal": 0.66,
145         "progression": 1.0
146     }
147     if anomaly_classification:
148         for idx, item in enumerate(data_pd.items()):
149             if anomaly_classification[idx] in \
150                     ("outlier", "regression", "progression"):
151                 anomalies = anomalies.append(pd.Series([item[1], ],
152                                                        index=[item[0], ]))
153                 anomalies_colors.append(
154                     anomaly_color[anomaly_classification[idx]])
155         anomalies_colors.extend([0.0, 0.33, 0.66, 1.0])
156
157     # Create traces
158
159     trace_samples = plgo.Scatter(
160         x=xaxis,
161         y=data_y,
162         mode='markers',
163         line={
164             "width": 1
165         },
166         showlegend=True,
167         legendgroup=name,
168         name="{name}".format(name=name),
169         marker={
170             "size": 5,
171             "color": color,
172             "symbol": "circle",
173         },
174         text=hover_text,
175         hoverinfo="x+y+text+name"
176     )
177     traces = [trace_samples, ]
178
179     trace_anomalies = plgo.Scatter(
180         x=anomalies.keys(),
181         y=anomalies.values,
182         mode='markers',
183         hoverinfo="none",
184         showlegend=False,
185         legendgroup=name,
186         name="{name}-anomalies".format(name=name),
187         marker={
188             "size": 15,
189             "symbol": "circle-open",
190             "color": anomalies_colors,
191             "colorscale": [[0.00, "grey"],
192                            [0.25, "grey"],
193                            [0.25, "red"],
194                            [0.50, "red"],
195                            [0.50, "white"],
196                            [0.75, "white"],
197                            [0.75, "green"],
198                            [1.00, "green"]],
199             "showscale": True,
200             "line": {
201                 "width": 2
202             },
203             "colorbar": {
204                 "y": 0.5,
205                 "len": 0.8,
206                 "title": "Circles Marking Data Classification",
207                 "titleside": 'right',
208                 "titlefont": {
209                     "size": 14
210                 },
211                 "tickmode": 'array',
212                 "tickvals": [0.125, 0.375, 0.625, 0.875],
213                 "ticktext": ["Outlier", "Regression", "Normal", "Progression"],
214                 "ticks": "",
215                 "ticklen": 0,
216                 "tickangle": -90,
217                 "thickness": 10
218             }
219         }
220     )
221     traces.append(trace_anomalies)
222
223     if show_trend_line:
224         data_trend = t_data.rolling(window=moving_win_size,
225                                     min_periods=2).median()
226         trace_trend = plgo.Scatter(
227             x=data_trend.keys(),
228             y=data_trend.tolist(),
229             mode='lines',
230             line={
231                 "shape": "spline",
232                 "width": 1,
233                 "color": color,
234             },
235             showlegend=False,
236             legendgroup=name,
237             name='{name}-trend'.format(name=name)
238         )
239         traces.append(trace_trend)
240
241     if anomaly_classification:
242         return traces, anomaly_classification[-1]
243     else:
244         return traces, None
245
246
247 def _generate_all_charts(spec, input_data):
248     """Generate all charts specified in the specification file.
249
250     :param spec: Specification.
251     :param input_data: Full data set.
252     :type spec: Specification
253     :type input_data: InputData
254     """
255
256     def _generate_chart(_, data_q, graph):
257         """Generates the chart.
258         """
259
260         logs = list()
261
262         logging.info("  Generating the chart '{0}' ...".
263                      format(graph.get("title", "")))
264         logs.append(("INFO", "  Generating the chart '{0}' ...".
265                      format(graph.get("title", ""))))
266
267         job_name = graph["data"].keys()[0]
268
269         csv_tbl = list()
270         res = list()
271
272         # Transform the data
273         logs.append(("INFO", "    Creating the data set for the {0} '{1}'.".
274                      format(graph.get("type", ""), graph.get("title", ""))))
275         data = input_data.filter_data(graph, continue_on_error=True)
276         if data is None:
277             logging.error("No data.")
278             return
279
280         chart_data = dict()
281         for job, job_data in data.iteritems():
282             if job != job_name:
283                 continue
284             for index, bld in job_data.items():
285                 for test_name, test in bld.items():
286                     if chart_data.get(test_name, None) is None:
287                         chart_data[test_name] = OrderedDict()
288                     try:
289                         chart_data[test_name][int(index)] = \
290                             test["result"]["throughput"]
291                     except (KeyError, TypeError):
292                         pass
293
294         # Add items to the csv table:
295         for tst_name, tst_data in chart_data.items():
296             tst_lst = list()
297             for bld in builds_dict[job_name]:
298                 itm = tst_data.get(int(bld), '')
299                 tst_lst.append(str(itm))
300             csv_tbl.append("{0},".format(tst_name) + ",".join(tst_lst) + '\n')
301         # Generate traces:
302         traces = list()
303         win_size = 14
304         index = 0
305         for test_name, test_data in chart_data.items():
306             if not test_data:
307                 logs.append(("WARNING", "No data for the test '{0}'".
308                              format(test_name)))
309                 continue
310             test_name = test_name.split('.')[-1]
311             trace, rslt = _generate_trending_traces(
312                 test_data,
313                 job_name=job_name,
314                 build_info=build_info,
315                 moving_win_size=win_size,
316                 name='-'.join(test_name.split('-')[3:-1]),
317                 color=COLORS[index])
318             traces.extend(trace)
319             res.append(rslt)
320             index += 1
321
322         if traces:
323             # Generate the chart:
324             graph["layout"]["xaxis"]["title"] = \
325                 graph["layout"]["xaxis"]["title"].format(job=job_name)
326             name_file = "{0}-{1}{2}".format(spec.cpta["output-file"],
327                                             graph["output-file-name"],
328                                             spec.cpta["output-file-type"])
329
330             logs.append(("INFO", "    Writing the file '{0}' ...".
331                          format(name_file)))
332             plpl = plgo.Figure(data=traces, layout=graph["layout"])
333             try:
334                 ploff.plot(plpl, show_link=False, auto_open=False,
335                            filename=name_file)
336             except plerr.PlotlyEmptyDataError:
337                 logs.append(("WARNING", "No data for the plot. Skipped."))
338
339         data_out = {
340             "job_name": job_name,
341             "csv_table": csv_tbl,
342             "results": res,
343             "logs": logs
344         }
345         data_q.put(data_out)
346
347     builds_dict = dict()
348     for job in spec.input["builds"].keys():
349         if builds_dict.get(job, None) is None:
350             builds_dict[job] = list()
351         for build in spec.input["builds"][job]:
352             status = build["status"]
353             if status != "failed" and status != "not found":
354                 builds_dict[job].append(str(build["build"]))
355
356     # Create "build ID": "date" dict:
357     build_info = dict()
358     for job_name, job_data in builds_dict.items():
359         if build_info.get(job_name, None) is None:
360             build_info[job_name] = OrderedDict()
361         for build in job_data:
362             build_info[job_name][build] = (
363                 input_data.metadata(job_name, build).get("generated", ""),
364                 input_data.metadata(job_name, build).get("version", "")
365             )
366
367     work_queue = multiprocessing.JoinableQueue()
368     manager = multiprocessing.Manager()
369     data_queue = manager.Queue()
370     cpus = multiprocessing.cpu_count()
371
372     workers = list()
373     for cpu in range(cpus):
374         worker = Worker(work_queue,
375                         data_queue,
376                         _generate_chart)
377         worker.daemon = True
378         worker.start()
379         workers.append(worker)
380         os.system("taskset -p -c {0} {1} > /dev/null 2>&1".
381                   format(cpu, worker.pid))
382
383     for chart in spec.cpta["plots"]:
384         work_queue.put((chart, ))
385     work_queue.join()
386
387     anomaly_classifications = list()
388
389     # Create the header:
390     csv_tables = dict()
391     for job_name in builds_dict.keys():
392         if csv_tables.get(job_name, None) is None:
393             csv_tables[job_name] = list()
394         header = "Build Number:," + ",".join(builds_dict[job_name]) + '\n'
395         csv_tables[job_name].append(header)
396         build_dates = [x[0] for x in build_info[job_name].values()]
397         header = "Build Date:," + ",".join(build_dates) + '\n'
398         csv_tables[job_name].append(header)
399         versions = [x[1] for x in build_info[job_name].values()]
400         header = "Version:," + ",".join(versions) + '\n'
401         csv_tables[job_name].append(header)
402
403     while not data_queue.empty():
404         result = data_queue.get()
405
406         anomaly_classifications.extend(result["results"])
407         csv_tables[result["job_name"]].extend(result["csv_table"])
408
409         for item in result["logs"]:
410             if item[0] == "INFO":
411                 logging.info(item[1])
412             elif item[0] == "ERROR":
413                 logging.error(item[1])
414             elif item[0] == "DEBUG":
415                 logging.debug(item[1])
416             elif item[0] == "CRITICAL":
417                 logging.critical(item[1])
418             elif item[0] == "WARNING":
419                 logging.warning(item[1])
420
421     del data_queue
422
423     # Terminate all workers
424     for worker in workers:
425         worker.terminate()
426         worker.join()
427
428     # Write the tables:
429     for job_name, csv_table in csv_tables.items():
430         file_name = spec.cpta["output-file"] + "-" + job_name + "-trending"
431         with open("{0}.csv".format(file_name), 'w') as file_handler:
432             file_handler.writelines(csv_table)
433
434         txt_table = None
435         with open("{0}.csv".format(file_name), 'rb') as csv_file:
436             csv_content = csv.reader(csv_file, delimiter=',', quotechar='"')
437             line_nr = 0
438             for row in csv_content:
439                 if txt_table is None:
440                     txt_table = prettytable.PrettyTable(row)
441                 else:
442                     if line_nr > 1:
443                         for idx, item in enumerate(row):
444                             try:
445                                 row[idx] = str(round(float(item) / 1000000, 2))
446                             except ValueError:
447                                 pass
448                     try:
449                         txt_table.add_row(row)
450                     except Exception as err:
451                         logging.warning("Error occurred while generating TXT "
452                                         "table:\n{0}".format(err))
453                 line_nr += 1
454             txt_table.align["Build Number:"] = "l"
455         with open("{0}.txt".format(file_name), "w") as txt_file:
456             txt_file.write(str(txt_table))
457
458     # Evaluate result:
459     if anomaly_classifications:
460         result = "PASS"
461         for classification in anomaly_classifications:
462             if classification == "regression" or classification == "outlier":
463                 result = "FAIL"
464                 break
465     else:
466         result = "FAIL"
467
468     logging.info("Partial results: {0}".format(anomaly_classifications))
469     logging.info("Result: {0}".format(result))
470
471     return result