CSIT-942: RCA - Option 1: Analysing Archived VPP Results
[csit.git] / resources / tools / presentation / generator_CPTA.py
1 # Copyright (c) 2018 Cisco and/or its affiliates.
2 # Licensed under the Apache License, Version 2.0 (the "License");
3 # you may not use this file except in compliance with the License.
4 # You may obtain a copy of the License at:
5 #
6 #     http://www.apache.org/licenses/LICENSE-2.0
7 #
8 # Unless required by applicable law or agreed to in writing, software
9 # distributed under the License is distributed on an "AS IS" BASIS,
10 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 # See the License for the specific language governing permissions and
12 # limitations under the License.
13
14 """Generation of Continuous Performance Trending and Analysis.
15 """
16
17 import multiprocessing
18 import os
19 import logging
20 import csv
21 import prettytable
22 import plotly.offline as ploff
23 import plotly.graph_objs as plgo
24 import plotly.exceptions as plerr
25 import pandas as pd
26
27 from collections import OrderedDict
28 from datetime import datetime
29
30 from utils import split_outliers, archive_input_data, execute_command,\
31     classify_anomalies, Worker
32
33
34 # Command to build the html format of the report
35 HTML_BUILDER = 'sphinx-build -v -c conf_cpta -a ' \
36                '-b html -E ' \
37                '-t html ' \
38                '-D version="{date}" ' \
39                '{working_dir} ' \
40                '{build_dir}/'
41
42 # .css file for the html format of the report
43 THEME_OVERRIDES = """/* override table width restrictions */
44 .wy-nav-content {
45     max-width: 1200px !important;
46 }
47 """
48
49 COLORS = ["SkyBlue", "Olive", "Purple", "Coral", "Indigo", "Pink",
50           "Chocolate", "Brown", "Magenta", "Cyan", "Orange", "Black",
51           "Violet", "Blue", "Yellow"]
52
53
54 def generate_cpta(spec, data):
55     """Generate all formats and versions of the Continuous Performance Trending
56     and Analysis.
57
58     :param spec: Specification read from the specification file.
59     :param data: Full data set.
60     :type spec: Specification
61     :type data: InputData
62     """
63
64     logging.info("Generating the Continuous Performance Trending and Analysis "
65                  "...")
66
67     ret_code = _generate_all_charts(spec, data)
68
69     cmd = HTML_BUILDER.format(
70         date=datetime.utcnow().strftime('%m/%d/%Y %H:%M UTC'),
71         working_dir=spec.environment["paths"]["DIR[WORKING,SRC]"],
72         build_dir=spec.environment["paths"]["DIR[BUILD,HTML]"])
73     execute_command(cmd)
74
75     with open(spec.environment["paths"]["DIR[CSS_PATCH_FILE]"], "w") as \
76             css_file:
77         css_file.write(THEME_OVERRIDES)
78
79     with open(spec.environment["paths"]["DIR[CSS_PATCH_FILE2]"], "w") as \
80             css_file:
81         css_file.write(THEME_OVERRIDES)
82
83     archive_input_data(spec)
84
85     logging.info("Done.")
86
87     return ret_code
88
89
90 def _generate_trending_traces(in_data, job_name, build_info, moving_win_size=10,
91                               show_trend_line=True, name="", color=""):
92     """Generate the trending traces:
93      - samples,
94      - trimmed moving median (trending line)
95      - outliers, regress, progress
96
97     :param in_data: Full data set.
98     :param job_name: The name of job which generated the data.
99     :param build_info: Information about the builds.
100     :param moving_win_size: Window size.
101     :param show_trend_line: Show moving median (trending plot).
102     :param name: Name of the plot
103     :param color: Name of the color for the plot.
104     :type in_data: OrderedDict
105     :type job_name: str
106     :type build_info: dict
107     :type moving_win_size: int
108     :type show_trend_line: bool
109     :type name: str
110     :type color: str
111     :returns: Generated traces (list) and the evaluated result.
112     :rtype: tuple(traces, result)
113     """
114
115     data_x = list(in_data.keys())
116     data_y = list(in_data.values())
117
118     hover_text = list()
119     xaxis = list()
120     for idx in data_x:
121         if "dpdk" in job_name:
122             hover_text.append("dpdk-ref: {0}<br>csit-ref: mrr-weekly-build-{1}".
123                               format(build_info[job_name][str(idx)][1].
124                                      rsplit('~', 1)[0], idx))
125         elif "vpp" in job_name:
126             hover_text.append("vpp-ref: {0}<br>csit-ref: mrr-daily-build-{1}".
127                               format(build_info[job_name][str(idx)][1].
128                                      rsplit('~', 1)[0], idx))
129         date = build_info[job_name][str(idx)][0]
130         xaxis.append(datetime(int(date[0:4]), int(date[4:6]), int(date[6:8]),
131                               int(date[9:11]), int(date[12:])))
132
133     data_pd = pd.Series(data_y, index=xaxis)
134
135     t_data, outliers = split_outliers(data_pd, outlier_const=1.5,
136                                       window=moving_win_size)
137     anomaly_classification = classify_anomalies(t_data, window=moving_win_size)
138
139     anomalies = pd.Series()
140     anomalies_colors = list()
141     anomaly_color = {
142         "outlier": 0.0,
143         "regression": 0.33,
144         "normal": 0.66,
145         "progression": 1.0
146     }
147     if anomaly_classification:
148         for idx, item in enumerate(data_pd.items()):
149             if anomaly_classification[idx] in \
150                     ("outlier", "regression", "progression"):
151                 anomalies = anomalies.append(pd.Series([item[1], ],
152                                                        index=[item[0], ]))
153                 anomalies_colors.append(
154                     anomaly_color[anomaly_classification[idx]])
155         anomalies_colors.extend([0.0, 0.33, 0.66, 1.0])
156
157     # Create traces
158
159     trace_samples = plgo.Scatter(
160         x=xaxis,
161         y=data_y,
162         mode='markers',
163         line={
164             "width": 1
165         },
166         legendgroup=name,
167         name="{name}-thput".format(name=name),
168         marker={
169             "size": 5,
170             "color": color,
171             "symbol": "circle",
172         },
173         text=hover_text,
174         hoverinfo="x+y+text+name"
175     )
176     traces = [trace_samples, ]
177
178     trace_anomalies = plgo.Scatter(
179         x=anomalies.keys(),
180         y=anomalies.values,
181         mode='markers',
182         hoverinfo="none",
183         showlegend=True,
184         legendgroup=name,
185         name="{name}-anomalies".format(name=name),
186         marker={
187             "size": 15,
188             "symbol": "circle-open",
189             "color": anomalies_colors,
190             "colorscale": [[0.00, "grey"],
191                            [0.25, "grey"],
192                            [0.25, "red"],
193                            [0.50, "red"],
194                            [0.50, "white"],
195                            [0.75, "white"],
196                            [0.75, "green"],
197                            [1.00, "green"]],
198             "showscale": True,
199             "line": {
200                 "width": 2
201             },
202             "colorbar": {
203                 "y": 0.5,
204                 "len": 0.8,
205                 "title": "Circles Marking Data Classification",
206                 "titleside": 'right',
207                 "titlefont": {
208                     "size": 14
209                 },
210                 "tickmode": 'array',
211                 "tickvals": [0.125, 0.375, 0.625, 0.875],
212                 "ticktext": ["Outlier", "Regression", "Normal", "Progression"],
213                 "ticks": "",
214                 "ticklen": 0,
215                 "tickangle": -90,
216                 "thickness": 10
217             }
218         }
219     )
220     traces.append(trace_anomalies)
221
222     if show_trend_line:
223         data_trend = t_data.rolling(window=moving_win_size,
224                                     min_periods=2).median()
225         trace_trend = plgo.Scatter(
226             x=data_trend.keys(),
227             y=data_trend.tolist(),
228             mode='lines',
229             line={
230                 "shape": "spline",
231                 "width": 1,
232                 "color": color,
233             },
234             legendgroup=name,
235             name='{name}-trend'.format(name=name)
236         )
237         traces.append(trace_trend)
238
239     if anomaly_classification:
240         return traces, anomaly_classification[-1]
241     else:
242         return traces, None
243
244
245 def _generate_all_charts(spec, input_data):
246     """Generate all charts specified in the specification file.
247
248     :param spec: Specification.
249     :param input_data: Full data set.
250     :type spec: Specification
251     :type input_data: InputData
252     """
253
254     def _generate_chart(_, data_q, graph):
255         """Generates the chart.
256         """
257
258         logs = list()
259
260         logging.info("  Generating the chart '{0}' ...".
261                      format(graph.get("title", "")))
262         logs.append(("INFO", "  Generating the chart '{0}' ...".
263                      format(graph.get("title", ""))))
264
265         job_name = graph["data"].keys()[0]
266
267         csv_tbl = list()
268         res = list()
269
270         # Transform the data
271         logs.append(("INFO", "    Creating the data set for the {0} '{1}'.".
272                      format(graph.get("type", ""), graph.get("title", ""))))
273         data = input_data.filter_data(graph, continue_on_error=True)
274         if data is None:
275             logging.error("No data.")
276             return
277
278         chart_data = dict()
279         for job, job_data in data.iteritems():
280             if job != job_name:
281                 continue
282             for index, bld in job_data.items():
283                 for test_name, test in bld.items():
284                     if chart_data.get(test_name, None) is None:
285                         chart_data[test_name] = OrderedDict()
286                     try:
287                         chart_data[test_name][int(index)] = \
288                             test["result"]["throughput"]
289                     except (KeyError, TypeError):
290                         pass
291
292         # Add items to the csv table:
293         for tst_name, tst_data in chart_data.items():
294             tst_lst = list()
295             for bld in builds_dict[job_name]:
296                 itm = tst_data.get(int(bld), '')
297                 tst_lst.append(str(itm))
298             csv_tbl.append("{0},".format(tst_name) + ",".join(tst_lst) + '\n')
299         # Generate traces:
300         traces = list()
301         win_size = 14
302         index = 0
303         for test_name, test_data in chart_data.items():
304             if not test_data:
305                 logs.append(("WARNING", "No data for the test '{0}'".
306                              format(test_name)))
307                 continue
308             test_name = test_name.split('.')[-1]
309             trace, rslt = _generate_trending_traces(
310                 test_data,
311                 job_name=job_name,
312                 build_info=build_info,
313                 moving_win_size=win_size,
314                 name='-'.join(test_name.split('-')[3:-1]),
315                 color=COLORS[index])
316             traces.extend(trace)
317             res.append(rslt)
318             index += 1
319
320         if traces:
321             # Generate the chart:
322             graph["layout"]["xaxis"]["title"] = \
323                 graph["layout"]["xaxis"]["title"].format(job=job_name)
324             name_file = "{0}-{1}{2}".format(spec.cpta["output-file"],
325                                             graph["output-file-name"],
326                                             spec.cpta["output-file-type"])
327
328             logs.append(("INFO", "    Writing the file '{0}' ...".
329                          format(name_file)))
330             plpl = plgo.Figure(data=traces, layout=graph["layout"])
331             try:
332                 ploff.plot(plpl, show_link=False, auto_open=False,
333                            filename=name_file)
334             except plerr.PlotlyEmptyDataError:
335                 logs.append(("WARNING", "No data for the plot. Skipped."))
336
337         data_out = {
338             "job_name": job_name,
339             "csv_table": csv_tbl,
340             "results": res,
341             "logs": logs
342         }
343         data_q.put(data_out)
344
345     builds_dict = dict()
346     for job in spec.input["builds"].keys():
347         if builds_dict.get(job, None) is None:
348             builds_dict[job] = list()
349         for build in spec.input["builds"][job]:
350             status = build["status"]
351             if status != "failed" and status != "not found":
352                 builds_dict[job].append(str(build["build"]))
353
354     # Create "build ID": "date" dict:
355     build_info = dict()
356     for job_name, job_data in builds_dict.items():
357         if build_info.get(job_name, None) is None:
358             build_info[job_name] = OrderedDict()
359         for build in job_data:
360             build_info[job_name][build] = (
361                 input_data.metadata(job_name, build).get("generated", ""),
362                 input_data.metadata(job_name, build).get("version", "")
363             )
364
365     work_queue = multiprocessing.JoinableQueue()
366     manager = multiprocessing.Manager()
367     data_queue = manager.Queue()
368     cpus = multiprocessing.cpu_count()
369
370     workers = list()
371     for cpu in range(cpus):
372         worker = Worker(work_queue,
373                         data_queue,
374                         _generate_chart)
375         worker.daemon = True
376         worker.start()
377         workers.append(worker)
378         os.system("taskset -p -c {0} {1} > /dev/null 2>&1".
379                   format(cpu, worker.pid))
380
381     for chart in spec.cpta["plots"]:
382         work_queue.put((chart, ))
383     work_queue.join()
384
385     anomaly_classifications = list()
386
387     # Create the header:
388     csv_tables = dict()
389     for job_name in builds_dict.keys():
390         if csv_tables.get(job_name, None) is None:
391             csv_tables[job_name] = list()
392         header = "Build Number:," + ",".join(builds_dict[job_name]) + '\n'
393         csv_tables[job_name].append(header)
394         build_dates = [x[0] for x in build_info[job_name].values()]
395         header = "Build Date:," + ",".join(build_dates) + '\n'
396         csv_tables[job_name].append(header)
397         versions = [x[1] for x in build_info[job_name].values()]
398         header = "Version:," + ",".join(versions) + '\n'
399         csv_tables[job_name].append(header)
400
401     while not data_queue.empty():
402         result = data_queue.get()
403
404         anomaly_classifications.extend(result["results"])
405         csv_tables[result["job_name"]].extend(result["csv_table"])
406
407         for item in result["logs"]:
408             if item[0] == "INFO":
409                 logging.info(item[1])
410             elif item[0] == "ERROR":
411                 logging.error(item[1])
412             elif item[0] == "DEBUG":
413                 logging.debug(item[1])
414             elif item[0] == "CRITICAL":
415                 logging.critical(item[1])
416             elif item[0] == "WARNING":
417                 logging.warning(item[1])
418
419     del data_queue
420
421     # Terminate all workers
422     for worker in workers:
423         worker.terminate()
424         worker.join()
425
426     # Write the tables:
427     for job_name, csv_table in csv_tables.items():
428         file_name = spec.cpta["output-file"] + "-" + job_name + "-trending"
429         with open("{0}.csv".format(file_name), 'w') as file_handler:
430             file_handler.writelines(csv_table)
431
432         txt_table = None
433         with open("{0}.csv".format(file_name), 'rb') as csv_file:
434             csv_content = csv.reader(csv_file, delimiter=',', quotechar='"')
435             line_nr = 0
436             for row in csv_content:
437                 if txt_table is None:
438                     txt_table = prettytable.PrettyTable(row)
439                 else:
440                     if line_nr > 1:
441                         for idx, item in enumerate(row):
442                             try:
443                                 row[idx] = str(round(float(item) / 1000000, 2))
444                             except ValueError:
445                                 pass
446                     try:
447                         txt_table.add_row(row)
448                     except Exception as err:
449                         logging.warning("Error occurred while generating TXT "
450                                         "table:\n{0}".format(err))
451                 line_nr += 1
452             txt_table.align["Build Number:"] = "l"
453         with open("{0}.txt".format(file_name), "w") as txt_file:
454             txt_file.write(str(txt_table))
455
456     # Evaluate result:
457     if anomaly_classifications:
458         result = "PASS"
459         for classification in anomaly_classifications:
460             if classification == "regression" or classification == "outlier":
461                 result = "FAIL"
462                 break
463     else:
464         result = "FAIL"
465
466     logging.info("Partial results: {0}".format(anomaly_classifications))
467     logging.info("Result: {0}".format(result))
468
469     return result