d4ac06d09fb858600a63ef5fa106f4823d4a6819
[csit.git] / resources / tools / presentation / new / generator_CPTA.py
1 # Copyright (c) 2018 Cisco and/or its affiliates.
2 # Licensed under the Apache License, Version 2.0 (the "License");
3 # you may not use this file except in compliance with the License.
4 # You may obtain a copy of the License at:
5 #
6 #     http://www.apache.org/licenses/LICENSE-2.0
7 #
8 # Unless required by applicable law or agreed to in writing, software
9 # distributed under the License is distributed on an "AS IS" BASIS,
10 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 # See the License for the specific language governing permissions and
12 # limitations under the License.
13
14 """Generation of Continuous Performance Trending and Analysis.
15 """
16
17 import multiprocessing
18 import os
19 import logging
20 import csv
21 import prettytable
22 import plotly.offline as ploff
23 import plotly.graph_objs as plgo
24 import plotly.exceptions as plerr
25 import pandas as pd
26
27 from collections import OrderedDict
28 from datetime import datetime
29
30 from utils import archive_input_data, execute_command, \
31     classify_anomalies, Worker
32
33
34 # Command to build the html format of the report
35 HTML_BUILDER = 'sphinx-build -v -c conf_cpta -a ' \
36                '-b html -E ' \
37                '-t html ' \
38                '-D version="{date}" ' \
39                '{working_dir} ' \
40                '{build_dir}/'
41
42 # .css file for the html format of the report
43 THEME_OVERRIDES = """/* override table width restrictions */
44 .wy-nav-content {
45     max-width: 1200px !important;
46 }
47 """
48
49 COLORS = ["SkyBlue", "Olive", "Purple", "Coral", "Indigo", "Pink",
50           "Chocolate", "Brown", "Magenta", "Cyan", "Orange", "Black",
51           "Violet", "Blue", "Yellow"]
52
53
54 def generate_cpta(spec, data):
55     """Generate all formats and versions of the Continuous Performance Trending
56     and Analysis.
57
58     :param spec: Specification read from the specification file.
59     :param data: Full data set.
60     :type spec: Specification
61     :type data: InputData
62     """
63
64     logging.info("Generating the Continuous Performance Trending and Analysis "
65                  "...")
66
67     ret_code = _generate_all_charts(spec, data)
68
69     cmd = HTML_BUILDER.format(
70         date=datetime.utcnow().strftime('%m/%d/%Y %H:%M UTC'),
71         working_dir=spec.environment["paths"]["DIR[WORKING,SRC]"],
72         build_dir=spec.environment["paths"]["DIR[BUILD,HTML]"])
73     execute_command(cmd)
74
75     with open(spec.environment["paths"]["DIR[CSS_PATCH_FILE]"], "w") as \
76             css_file:
77         css_file.write(THEME_OVERRIDES)
78
79     with open(spec.environment["paths"]["DIR[CSS_PATCH_FILE2]"], "w") as \
80             css_file:
81         css_file.write(THEME_OVERRIDES)
82
83     archive_input_data(spec)
84
85     logging.info("Done.")
86
87     return ret_code
88
89
90 def _generate_trending_traces(in_data, job_name, build_info,
91                               show_trend_line=True, name="", color=""):
92     """Generate the trending traces:
93      - samples,
94      - outliers, regress, progress
95      - average of normal samples (trending line)
96
97     :param in_data: Full data set.
98     :param job_name: The name of job which generated the data.
99     :param build_info: Information about the builds.
100     :param show_trend_line: Show moving median (trending plot).
101     :param name: Name of the plot
102     :param color: Name of the color for the plot.
103     :type in_data: OrderedDict
104     :type job_name: str
105     :type build_info: dict
106     :type show_trend_line: bool
107     :type name: str
108     :type color: str
109     :returns: Generated traces (list) and the evaluated result.
110     :rtype: tuple(traces, result)
111     """
112
113     data_x = list(in_data.keys())
114     data_y = list(in_data.values())
115
116     hover_text = list()
117     xaxis = list()
118     for idx in data_x:
119         if "dpdk" in job_name:
120             hover_text.append("dpdk-ref: {0}<br>csit-ref: mrr-weekly-build-{1}".
121                               format(build_info[job_name][str(idx)][1].
122                                      rsplit('~', 1)[0], idx))
123         elif "vpp" in job_name:
124             hover_text.append("vpp-ref: {0}<br>csit-ref: mrr-daily-build-{1}".
125                               format(build_info[job_name][str(idx)][1].
126                                      rsplit('~', 1)[0], idx))
127         date = build_info[job_name][str(idx)][0]
128         xaxis.append(datetime(int(date[0:4]), int(date[4:6]), int(date[6:8]),
129                               int(date[9:11]), int(date[12:])))
130
131     data_pd = pd.Series(data_y, index=xaxis)
132
133     anomaly_classification, avgs = classify_anomalies(data_pd)
134
135     anomalies = pd.Series()
136     anomalies_colors = list()
137     anomalies_avgs = list()
138     anomaly_color = {
139         "regression": 0.0,
140         "normal": 0.5,
141         "progression": 1.0
142     }
143     if anomaly_classification:
144         for idx, item in enumerate(data_pd.items()):
145             if anomaly_classification[idx] in \
146                     ("outlier", "regression", "progression"):
147                 anomalies = anomalies.append(pd.Series([item[1], ],
148                                                        index=[item[0], ]))
149                 anomalies_colors.append(
150                     anomaly_color[anomaly_classification[idx]])
151                 anomalies_avgs.append(avgs[idx])
152         anomalies_colors.extend([0.0, 0.5, 1.0])
153
154     # Create traces
155
156     trace_samples = plgo.Scatter(
157         x=xaxis,
158         y=data_y,
159         mode='markers',
160         line={
161             "width": 1
162         },
163         showlegend=True,
164         legendgroup=name,
165         name="{name}".format(name=name),
166         marker={
167             "size": 5,
168             "color": color,
169             "symbol": "circle",
170         },
171         text=hover_text,
172         hoverinfo="x+y+text+name"
173     )
174     traces = [trace_samples, ]
175
176     if show_trend_line:
177         trace_trend = plgo.Scatter(
178             x=xaxis,
179             y=avgs,
180             mode='lines',
181             line={
182                 "shape": "linear",
183                 "width": 1,
184                 "color": color,
185             },
186             showlegend=False,
187             legendgroup=name,
188             name='{name}-trend'.format(name=name)
189         )
190         traces.append(trace_trend)
191
192     trace_anomalies = plgo.Scatter(
193         x=anomalies.keys(),
194         y=anomalies_avgs,
195         mode='markers',
196         hoverinfo="none",
197         showlegend=False,
198         legendgroup=name,
199         name="{name}-anomalies".format(name=name),
200         marker={
201             "size": 15,
202             "symbol": "circle-open",
203             "color": anomalies_colors,
204             "colorscale": [[0.00, "red"],
205                            [0.33, "red"],
206                            [0.33, "white"],
207                            [0.66, "white"],
208                            [0.66, "green"],
209                            [1.00, "green"]],
210             "showscale": True,
211             "line": {
212                 "width": 2
213             },
214             "colorbar": {
215                 "y": 0.5,
216                 "len": 0.8,
217                 "title": "Circles Marking Data Classification",
218                 "titleside": 'right',
219                 "titlefont": {
220                     "size": 14
221                 },
222                 "tickmode": 'array',
223                 "tickvals": [0.167, 0.500, 0.833],
224                 "ticktext": ["Regression", "Normal", "Progression"],
225                 "ticks": "",
226                 "ticklen": 0,
227                 "tickangle": -90,
228                 "thickness": 10
229             }
230         }
231     )
232     traces.append(trace_anomalies)
233
234     if anomaly_classification:
235         return traces, anomaly_classification[-1]
236     else:
237         return traces, None
238
239
240 def _generate_all_charts(spec, input_data):
241     """Generate all charts specified in the specification file.
242
243     :param spec: Specification.
244     :param input_data: Full data set.
245     :type spec: Specification
246     :type input_data: InputData
247     """
248
249     def _generate_chart(_, data_q, graph):
250         """Generates the chart.
251         """
252
253         logs = list()
254
255         logging.info("  Generating the chart '{0}' ...".
256                      format(graph.get("title", "")))
257         logs.append(("INFO", "  Generating the chart '{0}' ...".
258                      format(graph.get("title", ""))))
259
260         job_name = graph["data"].keys()[0]
261
262         csv_tbl = list()
263         res = list()
264
265         # Transform the data
266         logs.append(("INFO", "    Creating the data set for the {0} '{1}'.".
267                      format(graph.get("type", ""), graph.get("title", ""))))
268         data = input_data.filter_data(graph, continue_on_error=True)
269         if data is None:
270             logging.error("No data.")
271             return
272
273         chart_data = dict()
274         for job, job_data in data.iteritems():
275             if job != job_name:
276                 continue
277             for index, bld in job_data.items():
278                 for test_name, test in bld.items():
279                     if chart_data.get(test_name, None) is None:
280                         chart_data[test_name] = OrderedDict()
281                     try:
282                         chart_data[test_name][int(index)] = \
283                             test["result"]["throughput"]
284                     except (KeyError, TypeError):
285                         pass
286
287         # Add items to the csv table:
288         for tst_name, tst_data in chart_data.items():
289             tst_lst = list()
290             for bld in builds_dict[job_name]:
291                 itm = tst_data.get(int(bld), '')
292                 tst_lst.append(str(itm))
293             csv_tbl.append("{0},".format(tst_name) + ",".join(tst_lst) + '\n')
294         # Generate traces:
295         traces = list()
296         win_size = 14
297         index = 0
298         for test_name, test_data in chart_data.items():
299             if not test_data:
300                 logs.append(("WARNING", "No data for the test '{0}'".
301                              format(test_name)))
302                 continue
303             test_name = test_name.split('.')[-1]
304             trace, rslt = _generate_trending_traces(
305                 test_data,
306                 job_name=job_name,
307                 build_info=build_info,
308                 name='-'.join(test_name.split('-')[3:-1]),
309                 color=COLORS[index])
310             traces.extend(trace)
311             res.append(rslt)
312             index += 1
313
314         if traces:
315             # Generate the chart:
316             graph["layout"]["xaxis"]["title"] = \
317                 graph["layout"]["xaxis"]["title"].format(job=job_name)
318             name_file = "{0}-{1}{2}".format(spec.cpta["output-file"],
319                                             graph["output-file-name"],
320                                             spec.cpta["output-file-type"])
321
322             logs.append(("INFO", "    Writing the file '{0}' ...".
323                          format(name_file)))
324             plpl = plgo.Figure(data=traces, layout=graph["layout"])
325             try:
326                 ploff.plot(plpl, show_link=False, auto_open=False,
327                            filename=name_file)
328             except plerr.PlotlyEmptyDataError:
329                 logs.append(("WARNING", "No data for the plot. Skipped."))
330
331         data_out = {
332             "job_name": job_name,
333             "csv_table": csv_tbl,
334             "results": res,
335             "logs": logs
336         }
337         data_q.put(data_out)
338
339     builds_dict = dict()
340     for job in spec.input["builds"].keys():
341         if builds_dict.get(job, None) is None:
342             builds_dict[job] = list()
343         for build in spec.input["builds"][job]:
344             status = build["status"]
345             if status != "failed" and status != "not found":
346                 builds_dict[job].append(str(build["build"]))
347
348     # Create "build ID": "date" dict:
349     build_info = dict()
350     for job_name, job_data in builds_dict.items():
351         if build_info.get(job_name, None) is None:
352             build_info[job_name] = OrderedDict()
353         for build in job_data:
354             build_info[job_name][build] = (
355                 input_data.metadata(job_name, build).get("generated", ""),
356                 input_data.metadata(job_name, build).get("version", "")
357             )
358
359     work_queue = multiprocessing.JoinableQueue()
360     manager = multiprocessing.Manager()
361     data_queue = manager.Queue()
362     cpus = multiprocessing.cpu_count()
363
364     workers = list()
365     for cpu in range(cpus):
366         worker = Worker(work_queue,
367                         data_queue,
368                         _generate_chart)
369         worker.daemon = True
370         worker.start()
371         workers.append(worker)
372         os.system("taskset -p -c {0} {1} > /dev/null 2>&1".
373                   format(cpu, worker.pid))
374
375     for chart in spec.cpta["plots"]:
376         work_queue.put((chart, ))
377     work_queue.join()
378
379     anomaly_classifications = list()
380
381     # Create the header:
382     csv_tables = dict()
383     for job_name in builds_dict.keys():
384         if csv_tables.get(job_name, None) is None:
385             csv_tables[job_name] = list()
386         header = "Build Number:," + ",".join(builds_dict[job_name]) + '\n'
387         csv_tables[job_name].append(header)
388         build_dates = [x[0] for x in build_info[job_name].values()]
389         header = "Build Date:," + ",".join(build_dates) + '\n'
390         csv_tables[job_name].append(header)
391         versions = [x[1] for x in build_info[job_name].values()]
392         header = "Version:," + ",".join(versions) + '\n'
393         csv_tables[job_name].append(header)
394
395     while not data_queue.empty():
396         result = data_queue.get()
397
398         anomaly_classifications.extend(result["results"])
399         csv_tables[result["job_name"]].extend(result["csv_table"])
400
401         for item in result["logs"]:
402             if item[0] == "INFO":
403                 logging.info(item[1])
404             elif item[0] == "ERROR":
405                 logging.error(item[1])
406             elif item[0] == "DEBUG":
407                 logging.debug(item[1])
408             elif item[0] == "CRITICAL":
409                 logging.critical(item[1])
410             elif item[0] == "WARNING":
411                 logging.warning(item[1])
412
413     del data_queue
414
415     # Terminate all workers
416     for worker in workers:
417         worker.terminate()
418         worker.join()
419
420     # Write the tables:
421     for job_name, csv_table in csv_tables.items():
422         file_name = spec.cpta["output-file"] + "-" + job_name + "-trending"
423         with open("{0}.csv".format(file_name), 'w') as file_handler:
424             file_handler.writelines(csv_table)
425
426         txt_table = None
427         with open("{0}.csv".format(file_name), 'rb') as csv_file:
428             csv_content = csv.reader(csv_file, delimiter=',', quotechar='"')
429             line_nr = 0
430             for row in csv_content:
431                 if txt_table is None:
432                     txt_table = prettytable.PrettyTable(row)
433                 else:
434                     if line_nr > 1:
435                         for idx, item in enumerate(row):
436                             try:
437                                 row[idx] = str(round(float(item) / 1000000, 2))
438                             except ValueError:
439                                 pass
440                     try:
441                         txt_table.add_row(row)
442                     except Exception as err:
443                         logging.warning("Error occurred while generating TXT "
444                                         "table:\n{0}".format(err))
445                 line_nr += 1
446             txt_table.align["Build Number:"] = "l"
447         with open("{0}.txt".format(file_name), "w") as txt_file:
448             txt_file.write(str(txt_table))
449
450     # Evaluate result:
451     if anomaly_classifications:
452         result = "PASS"
453         for classification in anomaly_classifications:
454             if classification == "regression" or classification == "outlier":
455                 result = "FAIL"
456                 break
457     else:
458         result = "FAIL"
459
460     logging.info("Partial results: {0}".format(anomaly_classifications))
461     logging.info("Result: {0}".format(result))
462
463     return result