a4459bc003957910231a68615f3375f5a7c89ed6
[csit.git] / resources / tools / presentation / generator_CPTA.py
1 # Copyright (c) 2018 Cisco and/or its affiliates.
2 # Licensed under the Apache License, Version 2.0 (the "License");
3 # you may not use this file except in compliance with the License.
4 # You may obtain a copy of the License at:
5 #
6 #     http://www.apache.org/licenses/LICENSE-2.0
7 #
8 # Unless required by applicable law or agreed to in writing, software
9 # distributed under the License is distributed on an "AS IS" BASIS,
10 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 # See the License for the specific language governing permissions and
12 # limitations under the License.
13
14 """Generation of Continuous Performance Trending and Analysis.
15 """
16
17 import multiprocessing
18 import os
19 import logging
20 import csv
21 import prettytable
22 import plotly.offline as ploff
23 import plotly.graph_objs as plgo
24 import plotly.exceptions as plerr
25
26 from collections import OrderedDict
27 from datetime import datetime
28
29 from utils import archive_input_data, execute_command, \
30     classify_anomalies, Worker
31
32
33 # Command to build the html format of the report
34 HTML_BUILDER = 'sphinx-build -v -c conf_cpta -a ' \
35                '-b html -E ' \
36                '-t html ' \
37                '-D version="{date}" ' \
38                '{working_dir} ' \
39                '{build_dir}/'
40
41 # .css file for the html format of the report
42 THEME_OVERRIDES = """/* override table width restrictions */
43 .wy-nav-content {
44     max-width: 1200px !important;
45 }
46 """
47
48 COLORS = ["SkyBlue", "Olive", "Purple", "Coral", "Indigo", "Pink",
49           "Chocolate", "Brown", "Magenta", "Cyan", "Orange", "Black",
50           "Violet", "Blue", "Yellow"]
51
52
53 def generate_cpta(spec, data):
54     """Generate all formats and versions of the Continuous Performance Trending
55     and Analysis.
56
57     :param spec: Specification read from the specification file.
58     :param data: Full data set.
59     :type spec: Specification
60     :type data: InputData
61     """
62
63     logging.info("Generating the Continuous Performance Trending and Analysis "
64                  "...")
65
66     ret_code = _generate_all_charts(spec, data)
67
68     cmd = HTML_BUILDER.format(
69         date=datetime.utcnow().strftime('%m/%d/%Y %H:%M UTC'),
70         working_dir=spec.environment["paths"]["DIR[WORKING,SRC]"],
71         build_dir=spec.environment["paths"]["DIR[BUILD,HTML]"])
72     execute_command(cmd)
73
74     with open(spec.environment["paths"]["DIR[CSS_PATCH_FILE]"], "w") as \
75             css_file:
76         css_file.write(THEME_OVERRIDES)
77
78     with open(spec.environment["paths"]["DIR[CSS_PATCH_FILE2]"], "w") as \
79             css_file:
80         css_file.write(THEME_OVERRIDES)
81
82     archive_input_data(spec)
83
84     logging.info("Done.")
85
86     return ret_code
87
88
89 def _generate_trending_traces(in_data, job_name, build_info,
90                               show_trend_line=True, name="", color=""):
91     """Generate the trending traces:
92      - samples,
93      - outliers, regress, progress
94      - average of normal samples (trending line)
95
96     :param in_data: Full data set.
97     :param job_name: The name of job which generated the data.
98     :param build_info: Information about the builds.
99     :param show_trend_line: Show moving median (trending plot).
100     :param name: Name of the plot
101     :param color: Name of the color for the plot.
102     :type in_data: OrderedDict
103     :type job_name: str
104     :type build_info: dict
105     :type show_trend_line: bool
106     :type name: str
107     :type color: str
108     :returns: Generated traces (list) and the evaluated result.
109     :rtype: tuple(traces, result)
110     """
111
112     data_x = list(in_data.keys())
113     data_y = list(in_data.values())
114
115     hover_text = list()
116     xaxis = list()
117     for idx in data_x:
118         date = build_info[job_name][str(idx)][0]
119         hover_str = ("date: {0}<br>"
120                      "value: {1:,}<br>"
121                      "{2}-ref: {3}<br>"
122                      "csit-ref: mrr-{4}-build-{5}")
123         if "dpdk" in job_name:
124             hover_text.append(hover_str.format(
125                 date,
126                 int(in_data[idx].avg),
127                 "dpdk",
128                 build_info[job_name][str(idx)][1].
129                 rsplit('~', 1)[0],
130                 "weekly",
131                 idx))
132         elif "vpp" in job_name:
133             hover_text.append(hover_str.format(
134                 date,
135                 int(in_data[idx].avg),
136                 "vpp",
137                 build_info[job_name][str(idx)][1].
138                 rsplit('~', 1)[0],
139                 "daily",
140                 idx))
141
142         xaxis.append(datetime(int(date[0:4]), int(date[4:6]), int(date[6:8]),
143                               int(date[9:11]), int(date[12:])))
144
145     data_pd = OrderedDict()
146     for key, value in zip(xaxis, data_y):
147         data_pd[key] = value
148
149     anomaly_classification, avgs = classify_anomalies(data_pd)
150
151     anomalies = OrderedDict()
152     anomalies_colors = list()
153     anomalies_avgs = list()
154     anomaly_color = {
155         "regression": 0.0,
156         "normal": 0.5,
157         "progression": 1.0
158     }
159     if anomaly_classification:
160         for idx, (key, value) in enumerate(data_pd.iteritems()):
161             if anomaly_classification[idx] in \
162                     ("outlier", "regression", "progression"):
163                 anomalies[key] = value
164                 anomalies_colors.append(
165                     anomaly_color[anomaly_classification[idx]])
166                 anomalies_avgs.append(avgs[idx])
167         anomalies_colors.extend([0.0, 0.5, 1.0])
168
169     # Create traces
170
171     trace_samples = plgo.Scatter(
172         x=xaxis,
173         y=[y.avg for y in data_y],
174         mode='markers',
175         line={
176             "width": 1
177         },
178         showlegend=True,
179         legendgroup=name,
180         name="{name}".format(name=name),
181         marker={
182             "size": 5,
183             "color": color,
184             "symbol": "circle",
185         },
186         text=hover_text,
187         hoverinfo="text"
188     )
189     traces = [trace_samples, ]
190
191     if show_trend_line:
192         trace_trend = plgo.Scatter(
193             x=xaxis,
194             y=avgs,
195             mode='lines',
196             line={
197                 "shape": "linear",
198                 "width": 1,
199                 "color": color,
200             },
201             showlegend=False,
202             legendgroup=name,
203             name='{name}'.format(name=name),
204             text=["trend: {0:,}".format(int(avg)) for avg in avgs],
205             hoverinfo="text+name"
206         )
207         traces.append(trace_trend)
208
209     trace_anomalies = plgo.Scatter(
210         x=anomalies.keys(),
211         y=anomalies_avgs,
212         mode='markers',
213         hoverinfo="none",
214         showlegend=False,
215         legendgroup=name,
216         name="{name}-anomalies".format(name=name),
217         marker={
218             "size": 15,
219             "symbol": "circle-open",
220             "color": anomalies_colors,
221             "colorscale": [[0.00, "red"],
222                            [0.33, "red"],
223                            [0.33, "white"],
224                            [0.66, "white"],
225                            [0.66, "green"],
226                            [1.00, "green"]],
227             "showscale": True,
228             "line": {
229                 "width": 2
230             },
231             "colorbar": {
232                 "y": 0.5,
233                 "len": 0.8,
234                 "title": "Circles Marking Data Classification",
235                 "titleside": 'right',
236                 "titlefont": {
237                     "size": 14
238                 },
239                 "tickmode": 'array',
240                 "tickvals": [0.167, 0.500, 0.833],
241                 "ticktext": ["Regression", "Normal", "Progression"],
242                 "ticks": "",
243                 "ticklen": 0,
244                 "tickangle": -90,
245                 "thickness": 10
246             }
247         }
248     )
249     traces.append(trace_anomalies)
250
251     if anomaly_classification:
252         return traces, anomaly_classification[-1]
253     else:
254         return traces, None
255
256
257 def _generate_all_charts(spec, input_data):
258     """Generate all charts specified in the specification file.
259
260     :param spec: Specification.
261     :param input_data: Full data set.
262     :type spec: Specification
263     :type input_data: InputData
264     """
265
266     def _generate_chart(_, data_q, graph):
267         """Generates the chart.
268         """
269
270         logs = list()
271
272         logging.info("  Generating the chart '{0}' ...".
273                      format(graph.get("title", "")))
274         logs.append(("INFO", "  Generating the chart '{0}' ...".
275                      format(graph.get("title", ""))))
276
277         job_name = graph["data"].keys()[0]
278
279         csv_tbl = list()
280         res = list()
281
282         # Transform the data
283         logs.append(("INFO", "    Creating the data set for the {0} '{1}'.".
284                      format(graph.get("type", ""), graph.get("title", ""))))
285         data = input_data.filter_data(graph, continue_on_error=True)
286         if data is None:
287             logging.error("No data.")
288             return
289
290         chart_data = dict()
291         for job, job_data in data.iteritems():
292             if job != job_name:
293                 continue
294             for index, bld in job_data.items():
295                 for test_name, test in bld.items():
296                     if chart_data.get(test_name, None) is None:
297                         chart_data[test_name] = OrderedDict()
298                     try:
299                         chart_data[test_name][int(index)] = \
300                             test["result"]["receive-rate"]
301                     except (KeyError, TypeError):
302                         pass
303
304         # Add items to the csv table:
305         for tst_name, tst_data in chart_data.items():
306             tst_lst = list()
307             for bld in builds_dict[job_name]:
308                 itm = tst_data.get(int(bld), '')
309                 if not isinstance(itm, str):
310                     itm = itm.avg
311                 tst_lst.append(str(itm))
312             csv_tbl.append("{0},".format(tst_name) + ",".join(tst_lst) + '\n')
313         # Generate traces:
314         traces = list()
315         index = 0
316         for test_name, test_data in chart_data.items():
317             if not test_data:
318                 logs.append(("WARNING", "No data for the test '{0}'".
319                              format(test_name)))
320                 continue
321             message = "index: {index}, test: {test}".format(
322                 index=index, test=test_name)
323             test_name = test_name.split('.')[-1]
324             try:
325                 trace, rslt = _generate_trending_traces(
326                     test_data,
327                     job_name=job_name,
328                     build_info=build_info,
329                     name='-'.join(test_name.split('-')[2:-1]),
330                     color=COLORS[index])
331             except IndexError:
332                 message = "Out of colors: {}".format(message)
333                 logs.append(("ERROR", message))
334                 logging.error(message)
335                 continue
336             traces.extend(trace)
337             res.append(rslt)
338             index += 1
339
340         if traces:
341             # Generate the chart:
342             graph["layout"]["xaxis"]["title"] = \
343                 graph["layout"]["xaxis"]["title"].format(job=job_name)
344             name_file = "{0}-{1}{2}".format(spec.cpta["output-file"],
345                                             graph["output-file-name"],
346                                             spec.cpta["output-file-type"])
347
348             logs.append(("INFO", "    Writing the file '{0}' ...".
349                          format(name_file)))
350             plpl = plgo.Figure(data=traces, layout=graph["layout"])
351             try:
352                 ploff.plot(plpl, show_link=False, auto_open=False,
353                            filename=name_file)
354             except plerr.PlotlyEmptyDataError:
355                 logs.append(("WARNING", "No data for the plot. Skipped."))
356
357         data_out = {
358             "job_name": job_name,
359             "csv_table": csv_tbl,
360             "results": res,
361             "logs": logs
362         }
363         data_q.put(data_out)
364
365     builds_dict = dict()
366     for job in spec.input["builds"].keys():
367         if builds_dict.get(job, None) is None:
368             builds_dict[job] = list()
369         for build in spec.input["builds"][job]:
370             status = build["status"]
371             if status != "failed" and status != "not found":
372                 builds_dict[job].append(str(build["build"]))
373
374     # Create "build ID": "date" dict:
375     build_info = dict()
376     for job_name, job_data in builds_dict.items():
377         if build_info.get(job_name, None) is None:
378             build_info[job_name] = OrderedDict()
379         for build in job_data:
380             build_info[job_name][build] = (
381                 input_data.metadata(job_name, build).get("generated", ""),
382                 input_data.metadata(job_name, build).get("version", "")
383             )
384
385     work_queue = multiprocessing.JoinableQueue()
386     manager = multiprocessing.Manager()
387     data_queue = manager.Queue()
388     cpus = multiprocessing.cpu_count()
389
390     workers = list()
391     for cpu in range(cpus):
392         worker = Worker(work_queue,
393                         data_queue,
394                         _generate_chart)
395         worker.daemon = True
396         worker.start()
397         workers.append(worker)
398         os.system("taskset -p -c {0} {1} > /dev/null 2>&1".
399                   format(cpu, worker.pid))
400
401     for chart in spec.cpta["plots"]:
402         work_queue.put((chart, ))
403     work_queue.join()
404
405     anomaly_classifications = list()
406
407     # Create the header:
408     csv_tables = dict()
409     for job_name in builds_dict.keys():
410         if csv_tables.get(job_name, None) is None:
411             csv_tables[job_name] = list()
412         header = "Build Number:," + ",".join(builds_dict[job_name]) + '\n'
413         csv_tables[job_name].append(header)
414         build_dates = [x[0] for x in build_info[job_name].values()]
415         header = "Build Date:," + ",".join(build_dates) + '\n'
416         csv_tables[job_name].append(header)
417         versions = [x[1] for x in build_info[job_name].values()]
418         header = "Version:," + ",".join(versions) + '\n'
419         csv_tables[job_name].append(header)
420
421     while not data_queue.empty():
422         result = data_queue.get()
423
424         anomaly_classifications.extend(result["results"])
425         csv_tables[result["job_name"]].extend(result["csv_table"])
426
427         for item in result["logs"]:
428             if item[0] == "INFO":
429                 logging.info(item[1])
430             elif item[0] == "ERROR":
431                 logging.error(item[1])
432             elif item[0] == "DEBUG":
433                 logging.debug(item[1])
434             elif item[0] == "CRITICAL":
435                 logging.critical(item[1])
436             elif item[0] == "WARNING":
437                 logging.warning(item[1])
438
439     del data_queue
440
441     # Terminate all workers
442     for worker in workers:
443         worker.terminate()
444         worker.join()
445
446     # Write the tables:
447     for job_name, csv_table in csv_tables.items():
448         file_name = spec.cpta["output-file"] + "-" + job_name + "-trending"
449         with open("{0}.csv".format(file_name), 'w') as file_handler:
450             file_handler.writelines(csv_table)
451
452         txt_table = None
453         with open("{0}.csv".format(file_name), 'rb') as csv_file:
454             csv_content = csv.reader(csv_file, delimiter=',', quotechar='"')
455             line_nr = 0
456             for row in csv_content:
457                 if txt_table is None:
458                     txt_table = prettytable.PrettyTable(row)
459                 else:
460                     if line_nr > 1:
461                         for idx, item in enumerate(row):
462                             try:
463                                 row[idx] = str(round(float(item) / 1000000, 2))
464                             except ValueError:
465                                 pass
466                     try:
467                         txt_table.add_row(row)
468                     except Exception as err:
469                         logging.warning("Error occurred while generating TXT "
470                                         "table:\n{0}".format(err))
471                 line_nr += 1
472             txt_table.align["Build Number:"] = "l"
473         with open("{0}.txt".format(file_name), "w") as txt_file:
474             txt_file.write(str(txt_table))
475
476     # Evaluate result:
477     if anomaly_classifications:
478         result = "PASS"
479         for classification in anomaly_classifications:
480             if classification == "regression" or classification == "outlier":
481                 result = "FAIL"
482                 break
483     else:
484         result = "FAIL"
485
486     logging.info("Partial results: {0}".format(anomaly_classifications))
487     logging.info("Result: {0}".format(result))
488
489     return result