CSIT-1110: Cherry-pick edits into new detection
[csit.git] / resources / tools / presentation / new / generator_CPTA.py
1 # Copyright (c) 2018 Cisco and/or its affiliates.
2 # Licensed under the Apache License, Version 2.0 (the "License");
3 # you may not use this file except in compliance with the License.
4 # You may obtain a copy of the License at:
5 #
6 #     http://www.apache.org/licenses/LICENSE-2.0
7 #
8 # Unless required by applicable law or agreed to in writing, software
9 # distributed under the License is distributed on an "AS IS" BASIS,
10 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 # See the License for the specific language governing permissions and
12 # limitations under the License.
13
14 """Generation of Continuous Performance Trending and Analysis.
15 """
16
17 import multiprocessing
18 import os
19 import logging
20 import csv
21 import prettytable
22 import plotly.offline as ploff
23 import plotly.graph_objs as plgo
24 import plotly.exceptions as plerr
25 import pandas as pd
26
27 from collections import OrderedDict
28 from datetime import datetime
29
30 from utils import archive_input_data, execute_command, \
31     classify_anomalies, Worker
32
33
34 # Command to build the html format of the report
35 HTML_BUILDER = 'sphinx-build -v -c conf_cpta -a ' \
36                '-b html -E ' \
37                '-t html ' \
38                '-D version="{date}" ' \
39                '{working_dir} ' \
40                '{build_dir}/'
41
42 # .css file for the html format of the report
43 THEME_OVERRIDES = """/* override table width restrictions */
44 .wy-nav-content {
45     max-width: 1200px !important;
46 }
47 """
48
49 COLORS = ["SkyBlue", "Olive", "Purple", "Coral", "Indigo", "Pink",
50           "Chocolate", "Brown", "Magenta", "Cyan", "Orange", "Black",
51           "Violet", "Blue", "Yellow"]
52
53
54 def generate_cpta(spec, data):
55     """Generate all formats and versions of the Continuous Performance Trending
56     and Analysis.
57
58     :param spec: Specification read from the specification file.
59     :param data: Full data set.
60     :type spec: Specification
61     :type data: InputData
62     """
63
64     logging.info("Generating the Continuous Performance Trending and Analysis "
65                  "...")
66
67     ret_code = _generate_all_charts(spec, data)
68
69     cmd = HTML_BUILDER.format(
70         date=datetime.utcnow().strftime('%m/%d/%Y %H:%M UTC'),
71         working_dir=spec.environment["paths"]["DIR[WORKING,SRC]"],
72         build_dir=spec.environment["paths"]["DIR[BUILD,HTML]"])
73     execute_command(cmd)
74
75     with open(spec.environment["paths"]["DIR[CSS_PATCH_FILE]"], "w") as \
76             css_file:
77         css_file.write(THEME_OVERRIDES)
78
79     with open(spec.environment["paths"]["DIR[CSS_PATCH_FILE2]"], "w") as \
80             css_file:
81         css_file.write(THEME_OVERRIDES)
82
83     archive_input_data(spec)
84
85     logging.info("Done.")
86
87     return ret_code
88
89
90 def _generate_trending_traces(in_data, job_name, build_info,
91                               show_trend_line=True, name="", color=""):
92     """Generate the trending traces:
93      - samples,
94      - outliers, regress, progress
95      - average of normal samples (trending line)
96
97     :param in_data: Full data set.
98     :param job_name: The name of job which generated the data.
99     :param build_info: Information about the builds.
100     :param show_trend_line: Show moving median (trending plot).
101     :param name: Name of the plot
102     :param color: Name of the color for the plot.
103     :type in_data: OrderedDict
104     :type job_name: str
105     :type build_info: dict
106     :type show_trend_line: bool
107     :type name: str
108     :type color: str
109     :returns: Generated traces (list) and the evaluated result.
110     :rtype: tuple(traces, result)
111     """
112
113     data_x = list(in_data.keys())
114     data_y = list(in_data.values())
115
116     hover_text = list()
117     xaxis = list()
118     for idx in data_x:
119         if "dpdk" in job_name:
120             hover_text.append("dpdk-ref: {0}<br>csit-ref: mrr-weekly-build-{1}".
121                               format(build_info[job_name][str(idx)][1].
122                                      rsplit('~', 1)[0], idx))
123         elif "vpp" in job_name:
124             hover_text.append("vpp-ref: {0}<br>csit-ref: mrr-daily-build-{1}".
125                               format(build_info[job_name][str(idx)][1].
126                                      rsplit('~', 1)[0], idx))
127         date = build_info[job_name][str(idx)][0]
128         xaxis.append(datetime(int(date[0:4]), int(date[4:6]), int(date[6:8]),
129                               int(date[9:11]), int(date[12:])))
130
131     data_pd = pd.Series(data_y, index=xaxis)
132
133     anomaly_classification, avgs = classify_anomalies(data_pd)
134
135     anomalies = pd.Series()
136     anomalies_colors = list()
137     anomalies_avgs = list()
138     anomaly_color = {
139         "outlier": 0.0,
140         "regression": 0.33,
141         "normal": 0.66,
142         "progression": 1.0
143     }
144     if anomaly_classification:
145         for idx, item in enumerate(data_pd.items()):
146             if anomaly_classification[idx] in \
147                     ("outlier", "regression", "progression"):
148                 anomalies = anomalies.append(pd.Series([item[1], ],
149                                                        index=[item[0], ]))
150                 anomalies_colors.append(
151                     anomaly_color[anomaly_classification[idx]])
152                 anomalies_avgs.append(avgs[idx])
153         anomalies_colors.extend([0.0, 0.33, 0.66, 1.0])
154
155     # Create traces
156
157     trace_samples = plgo.Scatter(
158         x=xaxis,
159         y=data_y,
160         mode='markers',
161         line={
162             "width": 1
163         },
164         legendgroup=name,
165         name="{name}-thput".format(name=name),
166         marker={
167             "size": 5,
168             "color": color,
169             "symbol": "circle",
170         },
171         text=hover_text,
172         hoverinfo="x+y+text+name"
173     )
174     traces = [trace_samples, ]
175
176     if show_trend_line:
177         trace_trend = plgo.Scatter(
178             x=xaxis,
179             y=avgs,
180             mode='lines',
181             line={
182                 "shape": "linear",
183                 "width": 1,
184                 "color": color,
185             },
186             legendgroup=name,
187             name='{name}-trend'.format(name=name)
188         )
189         traces.append(trace_trend)
190
191     trace_anomalies = plgo.Scatter(
192         x=anomalies.keys(),
193         y=anomalies_avgs,
194         mode='markers',
195         hoverinfo="none",
196         showlegend=True,
197         legendgroup=name,
198         name="{name}-anomalies".format(name=name),
199         marker={
200             "size": 15,
201             "symbol": "circle-open",
202             "color": anomalies_colors,
203             "colorscale": [[0.00, "grey"],
204                            [0.25, "grey"],
205                            [0.25, "red"],
206                            [0.50, "red"],
207                            [0.50, "white"],
208                            [0.75, "white"],
209                            [0.75, "green"],
210                            [1.00, "green"]],
211             "showscale": True,
212             "line": {
213                 "width": 2
214             },
215             "colorbar": {
216                 "y": 0.5,
217                 "len": 0.8,
218                 "title": "Circles Marking Data Classification",
219                 "titleside": 'right',
220                 "titlefont": {
221                     "size": 14
222                 },
223                 "tickmode": 'array',
224                 "tickvals": [0.125, 0.375, 0.625, 0.875],
225                 "ticktext": ["Outlier", "Regression", "Normal", "Progression"],
226                 "ticks": "",
227                 "ticklen": 0,
228                 "tickangle": -90,
229                 "thickness": 10
230             }
231         }
232     )
233     traces.append(trace_anomalies)
234
235     if anomaly_classification:
236         return traces, anomaly_classification[-1]
237     else:
238         return traces, None
239
240
241 def _generate_all_charts(spec, input_data):
242     """Generate all charts specified in the specification file.
243
244     :param spec: Specification.
245     :param input_data: Full data set.
246     :type spec: Specification
247     :type input_data: InputData
248     """
249
250     def _generate_chart(_, data_q, graph):
251         """Generates the chart.
252         """
253
254         logs = list()
255
256         logging.info("  Generating the chart '{0}' ...".
257                      format(graph.get("title", "")))
258         logs.append(("INFO", "  Generating the chart '{0}' ...".
259                      format(graph.get("title", ""))))
260
261         job_name = graph["data"].keys()[0]
262
263         csv_tbl = list()
264         res = list()
265
266         # Transform the data
267         logs.append(("INFO", "    Creating the data set for the {0} '{1}'.".
268                      format(graph.get("type", ""), graph.get("title", ""))))
269         data = input_data.filter_data(graph, continue_on_error=True)
270         if data is None:
271             logging.error("No data.")
272             return
273
274         chart_data = dict()
275         for job, job_data in data.iteritems():
276             if job != job_name:
277                 continue
278             for index, bld in job_data.items():
279                 for test_name, test in bld.items():
280                     if chart_data.get(test_name, None) is None:
281                         chart_data[test_name] = OrderedDict()
282                     try:
283                         chart_data[test_name][int(index)] = \
284                             test["result"]["throughput"]
285                     except (KeyError, TypeError):
286                         pass
287
288         # Add items to the csv table:
289         for tst_name, tst_data in chart_data.items():
290             tst_lst = list()
291             for bld in builds_dict[job_name]:
292                 itm = tst_data.get(int(bld), '')
293                 tst_lst.append(str(itm))
294             csv_tbl.append("{0},".format(tst_name) + ",".join(tst_lst) + '\n')
295         # Generate traces:
296         traces = list()
297         win_size = 14
298         index = 0
299         for test_name, test_data in chart_data.items():
300             if not test_data:
301                 logs.append(("WARNING", "No data for the test '{0}'".
302                              format(test_name)))
303                 continue
304             test_name = test_name.split('.')[-1]
305             trace, rslt = _generate_trending_traces(
306                 test_data,
307                 job_name=job_name,
308                 build_info=build_info,
309                 name='-'.join(test_name.split('-')[3:-1]),
310                 color=COLORS[index])
311             traces.extend(trace)
312             res.append(rslt)
313             index += 1
314
315         if traces:
316             # Generate the chart:
317             graph["layout"]["xaxis"]["title"] = \
318                 graph["layout"]["xaxis"]["title"].format(job=job_name)
319             name_file = "{0}-{1}{2}".format(spec.cpta["output-file"],
320                                             graph["output-file-name"],
321                                             spec.cpta["output-file-type"])
322
323             logs.append(("INFO", "    Writing the file '{0}' ...".
324                          format(name_file)))
325             plpl = plgo.Figure(data=traces, layout=graph["layout"])
326             try:
327                 ploff.plot(plpl, show_link=False, auto_open=False,
328                            filename=name_file)
329             except plerr.PlotlyEmptyDataError:
330                 logs.append(("WARNING", "No data for the plot. Skipped."))
331
332         data_out = {
333             "job_name": job_name,
334             "csv_table": csv_tbl,
335             "results": res,
336             "logs": logs
337         }
338         data_q.put(data_out)
339
340     builds_dict = dict()
341     for job in spec.input["builds"].keys():
342         if builds_dict.get(job, None) is None:
343             builds_dict[job] = list()
344         for build in spec.input["builds"][job]:
345             status = build["status"]
346             if status != "failed" and status != "not found":
347                 builds_dict[job].append(str(build["build"]))
348
349     # Create "build ID": "date" dict:
350     build_info = dict()
351     for job_name, job_data in builds_dict.items():
352         if build_info.get(job_name, None) is None:
353             build_info[job_name] = OrderedDict()
354         for build in job_data:
355             build_info[job_name][build] = (
356                 input_data.metadata(job_name, build).get("generated", ""),
357                 input_data.metadata(job_name, build).get("version", "")
358             )
359
360     work_queue = multiprocessing.JoinableQueue()
361     manager = multiprocessing.Manager()
362     data_queue = manager.Queue()
363     cpus = multiprocessing.cpu_count()
364
365     workers = list()
366     for cpu in range(cpus):
367         worker = Worker(work_queue,
368                         data_queue,
369                         _generate_chart)
370         worker.daemon = True
371         worker.start()
372         workers.append(worker)
373         os.system("taskset -p -c {0} {1} > /dev/null 2>&1".
374                   format(cpu, worker.pid))
375
376     for chart in spec.cpta["plots"]:
377         work_queue.put((chart, ))
378     work_queue.join()
379
380     anomaly_classifications = list()
381
382     # Create the header:
383     csv_tables = dict()
384     for job_name in builds_dict.keys():
385         if csv_tables.get(job_name, None) is None:
386             csv_tables[job_name] = list()
387         header = "Build Number:," + ",".join(builds_dict[job_name]) + '\n'
388         csv_tables[job_name].append(header)
389         build_dates = [x[0] for x in build_info[job_name].values()]
390         header = "Build Date:," + ",".join(build_dates) + '\n'
391         csv_tables[job_name].append(header)
392         versions = [x[1] for x in build_info[job_name].values()]
393         header = "Version:," + ",".join(versions) + '\n'
394         csv_tables[job_name].append(header)
395
396     while not data_queue.empty():
397         result = data_queue.get()
398
399         anomaly_classifications.extend(result["results"])
400         csv_tables[result["job_name"]].extend(result["csv_table"])
401
402         for item in result["logs"]:
403             if item[0] == "INFO":
404                 logging.info(item[1])
405             elif item[0] == "ERROR":
406                 logging.error(item[1])
407             elif item[0] == "DEBUG":
408                 logging.debug(item[1])
409             elif item[0] == "CRITICAL":
410                 logging.critical(item[1])
411             elif item[0] == "WARNING":
412                 logging.warning(item[1])
413
414     del data_queue
415
416     # Terminate all workers
417     for worker in workers:
418         worker.terminate()
419         worker.join()
420
421     # Write the tables:
422     for job_name, csv_table in csv_tables.items():
423         file_name = spec.cpta["output-file"] + "-" + job_name + "-trending"
424         with open("{0}.csv".format(file_name), 'w') as file_handler:
425             file_handler.writelines(csv_table)
426
427         txt_table = None
428         with open("{0}.csv".format(file_name), 'rb') as csv_file:
429             csv_content = csv.reader(csv_file, delimiter=',', quotechar='"')
430             line_nr = 0
431             for row in csv_content:
432                 if txt_table is None:
433                     txt_table = prettytable.PrettyTable(row)
434                 else:
435                     if line_nr > 1:
436                         for idx, item in enumerate(row):
437                             try:
438                                 row[idx] = str(round(float(item) / 1000000, 2))
439                             except ValueError:
440                                 pass
441                     try:
442                         txt_table.add_row(row)
443                     except Exception as err:
444                         logging.warning("Error occurred while generating TXT "
445                                         "table:\n{0}".format(err))
446                 line_nr += 1
447             txt_table.align["Build Number:"] = "l"
448         with open("{0}.txt".format(file_name), "w") as txt_file:
449             txt_file.write(str(txt_table))
450
451     # Evaluate result:
452     if anomaly_classifications:
453         result = "PASS"
454         for classification in anomaly_classifications:
455             if classification == "regression" or classification == "outlier":
456                 result = "FAIL"
457                 break
458     else:
459         result = "FAIL"
460
461     logging.info("Partial results: {0}".format(anomaly_classifications))
462     logging.info("Result: {0}".format(result))
463
464     return result