Trending: Add exception handling for anomalies classification
[csit.git] / resources / tools / presentation / generator_cpta.py
1 # Copyright (c) 2021 Cisco and/or its affiliates.
2 # Licensed under the Apache License, Version 2.0 (the "License");
3 # you may not use this file except in compliance with the License.
4 # You may obtain a copy of the License at:
5 #
6 #     http://www.apache.org/licenses/LICENSE-2.0
7 #
8 # Unless required by applicable law or agreed to in writing, software
9 # distributed under the License is distributed on an "AS IS" BASIS,
10 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 # See the License for the specific language governing permissions and
12 # limitations under the License.
13
14 """Generation of Continuous Performance Trending and Analysis.
15 """
16
17 import re
18 import logging
19 import csv
20
21 from collections import OrderedDict
22 from datetime import datetime
23 from copy import deepcopy
24
25 import prettytable
26 import plotly.offline as ploff
27 import plotly.graph_objs as plgo
28 import plotly.exceptions as plerr
29
30 from pal_utils import archive_input_data, execute_command, classify_anomalies
31
32
33 # Command to build the html format of the report
34 HTML_BUILDER = u'sphinx-build -v -c sphinx_conf/trending -a ' \
35                u'-b html -E ' \
36                u'-t html ' \
37                u'-D version="{date}" ' \
38                u'{working_dir} ' \
39                u'{build_dir}/'
40
41 # .css file for the html format of the report
42 THEME_OVERRIDES = u"""/* override table width restrictions */
43 .wy-nav-content {
44     max-width: 1200px !important;
45 }
46 .rst-content blockquote {
47     margin-left: 0px;
48     line-height: 18px;
49     margin-bottom: 0px;
50 }
51 .wy-menu-vertical a {
52     display: inline-block;
53     line-height: 18px;
54     padding: 0 2em;
55     display: block;
56     position: relative;
57     font-size: 90%;
58     color: #d9d9d9
59 }
60 .wy-menu-vertical li.current a {
61     color: gray;
62     border-right: solid 1px #c9c9c9;
63     padding: 0 3em;
64 }
65 .wy-menu-vertical li.toctree-l2.current > a {
66     background: #c9c9c9;
67     padding: 0 3em;
68 }
69 .wy-menu-vertical li.toctree-l2.current li.toctree-l3 > a {
70     display: block;
71     background: #c9c9c9;
72     padding: 0 4em;
73 }
74 .wy-menu-vertical li.toctree-l3.current li.toctree-l4 > a {
75     display: block;
76     background: #bdbdbd;
77     padding: 0 5em;
78 }
79 .wy-menu-vertical li.on a, .wy-menu-vertical li.current > a {
80     color: #404040;
81     padding: 0 2em;
82     font-weight: bold;
83     position: relative;
84     background: #fcfcfc;
85     border: none;
86         border-top-width: medium;
87         border-bottom-width: medium;
88         border-top-style: none;
89         border-bottom-style: none;
90         border-top-color: currentcolor;
91         border-bottom-color: currentcolor;
92     padding-left: 2em -4px;
93 }
94 """
95
96 COLORS = (
97     u"#1A1110",
98     u"#DA2647",
99     u"#214FC6",
100     u"#01786F",
101     u"#BD8260",
102     u"#FFD12A",
103     u"#A6E7FF",
104     u"#738276",
105     u"#C95A49",
106     u"#FC5A8D",
107     u"#CEC8EF",
108     u"#391285",
109     u"#6F2DA8",
110     u"#FF878D",
111     u"#45A27D",
112     u"#FFD0B9",
113     u"#FD5240",
114     u"#DB91EF",
115     u"#44D7A8",
116     u"#4F86F7",
117     u"#84DE02",
118     u"#FFCFF1",
119     u"#614051"
120 )
121
122
123 def generate_cpta(spec, data):
124     """Generate all formats and versions of the Continuous Performance Trending
125     and Analysis.
126
127     :param spec: Specification read from the specification file.
128     :param data: Full data set.
129     :type spec: Specification
130     :type data: InputData
131     """
132
133     logging.info(u"Generating the Continuous Performance Trending and Analysis "
134                  u"...")
135
136     ret_code = _generate_all_charts(spec, data)
137
138     cmd = HTML_BUILDER.format(
139         date=datetime.utcnow().strftime(u'%Y-%m-%d %H:%M UTC'),
140         working_dir=spec.environment[u'paths'][u'DIR[WORKING,SRC]'],
141         build_dir=spec.environment[u'paths'][u'DIR[BUILD,HTML]'])
142     execute_command(cmd)
143
144     with open(spec.environment[u'paths'][u'DIR[CSS_PATCH_FILE]'], u'w') as \
145             css_file:
146         css_file.write(THEME_OVERRIDES)
147
148     with open(spec.environment[u'paths'][u'DIR[CSS_PATCH_FILE2]'], u'w') as \
149             css_file:
150         css_file.write(THEME_OVERRIDES)
151
152     if spec.environment.get(u"archive-inputs", False):
153         archive_input_data(spec)
154
155     logging.info(u"Done.")
156
157     return ret_code
158
159
160 def _generate_trending_traces(in_data, job_name, build_info,
161                               name=u"", color=u"", incl_tests=u"mrr"):
162     """Generate the trending traces:
163      - samples,
164      - outliers, regress, progress
165      - average of normal samples (trending line)
166
167     :param in_data: Full data set.
168     :param job_name: The name of job which generated the data.
169     :param build_info: Information about the builds.
170     :param name: Name of the plot
171     :param color: Name of the color for the plot.
172     :param incl_tests: Included tests, accepted values: mrr, ndr, pdr
173     :type in_data: OrderedDict
174     :type job_name: str
175     :type build_info: dict
176     :type name: str
177     :type color: str
178     :type incl_tests: str
179     :returns: Generated traces (list) and the evaluated result.
180     :rtype: tuple(traces, result)
181     """
182
183     if incl_tests not in (u"mrr", u"ndr", u"pdr"):
184         return list(), None
185
186     data_x = list(in_data.keys())
187     data_y_pps = list()
188     data_y_mpps = list()
189     data_y_stdev = list()
190     for item in in_data.values():
191         data_y_pps.append(float(item[u"receive-rate"]))
192         data_y_stdev.append(float(item[u"receive-stdev"]) / 1e6)
193         data_y_mpps.append(float(item[u"receive-rate"]) / 1e6)
194
195     hover_text = list()
196     xaxis = list()
197     for index, key in enumerate(data_x):
198         str_key = str(key)
199         date = build_info[job_name][str_key][0]
200         hover_str = (u"date: {date}<br>"
201                      u"{property} [Mpps]: {value:.3f}<br>"
202                      u"<stdev>"
203                      u"{sut}-ref: {build}<br>"
204                      u"csit-ref: {test}-{period}-build-{build_nr}<br>"
205                      u"testbed: {testbed}")
206         if incl_tests == u"mrr":
207             hover_str = hover_str.replace(
208                 u"<stdev>", f"stdev [Mpps]: {data_y_stdev[index]:.3f}<br>"
209             )
210         else:
211             hover_str = hover_str.replace(u"<stdev>", u"")
212         if u"-cps" in name:
213             hover_str = hover_str.replace(u"[Mpps]", u"[Mcps]")
214         if u"dpdk" in job_name:
215             hover_text.append(hover_str.format(
216                 date=date,
217                 property=u"average" if incl_tests == u"mrr" else u"throughput",
218                 value=data_y_mpps[index],
219                 sut=u"dpdk",
220                 build=build_info[job_name][str_key][1].rsplit(u'~', 1)[0],
221                 test=incl_tests,
222                 period=u"weekly",
223                 build_nr=str_key,
224                 testbed=build_info[job_name][str_key][2]))
225         elif u"vpp" in job_name:
226             hover_str = hover_str.format(
227                 date=date,
228                 property=u"average" if incl_tests == u"mrr" else u"throughput",
229                 value=data_y_mpps[index],
230                 sut=u"vpp",
231                 build=build_info[job_name][str_key][1].rsplit(u'~', 1)[0],
232                 test=incl_tests,
233                 period=u"daily" if incl_tests == u"mrr" else u"weekly",
234                 build_nr=str_key,
235                 testbed=build_info[job_name][str_key][2])
236             if u"-cps" in name:
237                 hover_str = hover_str.replace(u"throughput", u"connection rate")
238             hover_text.append(hover_str)
239
240         xaxis.append(datetime(int(date[0:4]), int(date[4:6]), int(date[6:8]),
241                               int(date[9:11]), int(date[12:])))
242
243     data_pd = OrderedDict()
244     for key, value in zip(xaxis, data_y_pps):
245         data_pd[key] = value
246
247     try:
248         anomaly_classification, avgs_pps, stdevs_pps = \
249             classify_anomalies(data_pd)
250     except ValueError as err:
251         logging.info(f"{err} Skipping")
252         return
253     avgs_mpps = [avg_pps / 1e6 for avg_pps in avgs_pps]
254     stdevs_mpps = [stdev_pps / 1e6 for stdev_pps in stdevs_pps]
255
256     anomalies = OrderedDict()
257     anomalies_colors = list()
258     anomalies_avgs = list()
259     anomaly_color = {
260         u"regression": 0.0,
261         u"normal": 0.5,
262         u"progression": 1.0
263     }
264     if anomaly_classification:
265         for index, (key, value) in enumerate(data_pd.items()):
266             if anomaly_classification[index] in (u"regression", u"progression"):
267                 anomalies[key] = value / 1e6
268                 anomalies_colors.append(
269                     anomaly_color[anomaly_classification[index]])
270                 anomalies_avgs.append(avgs_mpps[index])
271         anomalies_colors.extend([0.0, 0.5, 1.0])
272
273     # Create traces
274
275     trace_samples = plgo.Scatter(
276         x=xaxis,
277         y=data_y_mpps,
278         mode=u"markers",
279         line={
280             u"width": 1
281         },
282         showlegend=True,
283         legendgroup=name,
284         name=f"{name}",
285         marker={
286             u"size": 5,
287             u"color": color,
288             u"symbol": u"circle",
289         },
290         text=hover_text,
291         hoverinfo=u"text+name"
292     )
293     traces = [trace_samples, ]
294
295     trend_hover_text = list()
296     for idx in range(len(data_x)):
297         trend_hover_str = (
298             f"trend [Mpps]: {avgs_mpps[idx]:.3f}<br>"
299             f"stdev [Mpps]: {stdevs_mpps[idx]:.3f}"
300         )
301         trend_hover_text.append(trend_hover_str)
302
303     trace_trend = plgo.Scatter(
304         x=xaxis,
305         y=avgs_mpps,
306         mode=u"lines",
307         line={
308             u"shape": u"linear",
309             u"width": 1,
310             u"color": color,
311         },
312         showlegend=False,
313         legendgroup=name,
314         name=f"{name}",
315         text=trend_hover_text,
316         hoverinfo=u"text+name"
317     )
318     traces.append(trace_trend)
319
320     trace_anomalies = plgo.Scatter(
321         x=list(anomalies.keys()),
322         y=anomalies_avgs,
323         mode=u"markers",
324         hoverinfo=u"none",
325         showlegend=False,
326         legendgroup=name,
327         name=f"{name}-anomalies",
328         marker={
329             u"size": 15,
330             u"symbol": u"circle-open",
331             u"color": anomalies_colors,
332             u"colorscale": [
333                 [0.00, u"red"],
334                 [0.33, u"red"],
335                 [0.33, u"white"],
336                 [0.66, u"white"],
337                 [0.66, u"green"],
338                 [1.00, u"green"]
339             ],
340             u"showscale": True,
341             u"line": {
342                 u"width": 2
343             },
344             u"colorbar": {
345                 u"y": 0.5,
346                 u"len": 0.8,
347                 u"title": u"Circles Marking Data Classification",
348                 u"titleside": u"right",
349                 u"titlefont": {
350                     u"size": 14
351                 },
352                 u"tickmode": u"array",
353                 u"tickvals": [0.167, 0.500, 0.833],
354                 u"ticktext": [u"Regression", u"Normal", u"Progression"],
355                 u"ticks": u"",
356                 u"ticklen": 0,
357                 u"tickangle": -90,
358                 u"thickness": 10
359             }
360         }
361     )
362     traces.append(trace_anomalies)
363
364     if anomaly_classification:
365         return traces, anomaly_classification[-1]
366
367     return traces, None
368
369
370 def _generate_all_charts(spec, input_data):
371     """Generate all charts specified in the specification file.
372
373     :param spec: Specification.
374     :param input_data: Full data set.
375     :type spec: Specification
376     :type input_data: InputData
377     """
378
379     def _generate_chart(graph):
380         """Generates the chart.
381
382         :param graph: The graph to be generated
383         :type graph: dict
384         :returns: Dictionary with the job name, csv table with results and
385             list of tests classification results.
386         :rtype: dict
387         """
388
389         logging.info(f"  Generating the chart {graph.get(u'title', u'')} ...")
390
391         job_name = list(graph[u"data"].keys())[0]
392
393         # Transform the data
394         logging.info(
395             f"    Creating the data set for the {graph.get(u'type', u'')} "
396             f"{graph.get(u'title', u'')}."
397         )
398
399         data = input_data.filter_tests_by_name(
400             graph,
401             params=[u"type", u"result", u"throughput", u"tags"],
402             continue_on_error=True
403         )
404
405         if data is None or data.empty:
406             logging.error(u"No data.")
407             return dict()
408
409         return_lst = list()
410
411         for ttype in graph.get(u"test-type", (u"mrr", )):
412             for core in graph.get(u"core", tuple()):
413                 csv_tbl = list()
414                 res = dict()
415                 chart_data = dict()
416                 chart_tags = dict()
417                 for item in graph.get(u"include", tuple()):
418                     reg_ex = re.compile(str(item.format(core=core)).lower())
419                     for job, job_data in data.items():
420                         if job != job_name:
421                             continue
422                         for index, bld in job_data.items():
423                             for test_id, test in bld.items():
424                                 if not re.match(reg_ex, str(test_id).lower()):
425                                     continue
426                                 if chart_data.get(test_id, None) is None:
427                                     chart_data[test_id] = OrderedDict()
428                                 try:
429                                     if ttype == u"mrr":
430                                         rate = test[u"result"][u"receive-rate"]
431                                         stdev = \
432                                             test[u"result"][u"receive-stdev"]
433                                     elif ttype == u"ndr":
434                                         rate = \
435                                             test["throughput"][u"NDR"][u"LOWER"]
436                                         stdev = float(u"nan")
437                                     elif ttype == u"pdr":
438                                         rate = \
439                                             test["throughput"][u"PDR"][u"LOWER"]
440                                         stdev = float(u"nan")
441                                     else:
442                                         continue
443                                     chart_data[test_id][int(index)] = {
444                                         u"receive-rate": rate,
445                                         u"receive-stdev": stdev
446                                     }
447                                     chart_tags[test_id] = \
448                                         test.get(u"tags", None)
449                                 except (KeyError, TypeError):
450                                     pass
451
452                 # Add items to the csv table:
453                 for tst_name, tst_data in chart_data.items():
454                     tst_lst = list()
455                     for bld in builds_dict[job_name]:
456                         itm = tst_data.get(int(bld), dict())
457                         # CSIT-1180: Itm will be list, compute stats.
458                         try:
459                             tst_lst.append(str(itm.get(u"receive-rate", u"")))
460                         except AttributeError:
461                             tst_lst.append(u"")
462                     csv_tbl.append(f"{tst_name}," + u",".join(tst_lst) + u'\n')
463
464                 # Generate traces:
465                 traces = list()
466                 index = 0
467                 groups = graph.get(u"groups", None)
468                 visibility = list()
469
470                 if groups:
471                     for group in groups:
472                         visible = list()
473                         for tag in group:
474                             for tst_name, test_data in chart_data.items():
475                                 if not test_data:
476                                     logging.warning(
477                                         f"No data for the test {tst_name}"
478                                     )
479                                     continue
480                                 if tag not in chart_tags[tst_name]:
481                                     continue
482                                 try:
483                                     trace, rslt = _generate_trending_traces(
484                                         test_data,
485                                         job_name=job_name,
486                                         build_info=build_info,
487                                         name=u'-'.join(tst_name.split(u'.')[-1].
488                                                        split(u'-')[2:-1]),
489                                         color=COLORS[index],
490                                         incl_tests=ttype
491                                     )
492                                 except IndexError:
493                                     logging.error(f"Out of colors: index: "
494                                                   f"{index}, test: {tst_name}")
495                                     index += 1
496                                     continue
497                                 traces.extend(trace)
498                                 visible.extend(
499                                     [True for _ in range(len(trace))]
500                                 )
501                                 res[tst_name] = rslt
502                                 index += 1
503                                 break
504                         visibility.append(visible)
505                 else:
506                     for tst_name, test_data in chart_data.items():
507                         if not test_data:
508                             logging.warning(f"No data for the test {tst_name}")
509                             continue
510                         try:
511                             trace, rslt = _generate_trending_traces(
512                                 test_data,
513                                 job_name=job_name,
514                                 build_info=build_info,
515                                 name=u'-'.join(
516                                     tst_name.split(u'.')[-1].split(u'-')[2:-1]),
517                                 color=COLORS[index],
518                                 incl_tests=ttype
519                             )
520                         except IndexError:
521                             logging.error(
522                                 f"Out of colors: index: "
523                                 f"{index}, test: {tst_name}"
524                             )
525                             index += 1
526                             continue
527                         traces.extend(trace)
528                         res[tst_name] = rslt
529                         index += 1
530
531                 if traces:
532                     # Generate the chart:
533                     try:
534                         layout = deepcopy(graph[u"layout"])
535                     except KeyError as err:
536                         logging.error(u"Finished with error: No layout defined")
537                         logging.error(repr(err))
538                         return dict()
539                     if groups:
540                         show = list()
541                         for i in range(len(visibility)):
542                             visible = list()
543                             for vis_idx, _ in enumerate(visibility):
544                                 for _ in range(len(visibility[vis_idx])):
545                                     visible.append(i == vis_idx)
546                             show.append(visible)
547
548                         buttons = list()
549                         buttons.append(dict(
550                             label=u"All",
551                             method=u"update",
552                             args=[{u"visible":
553                                        [True for _ in range(len(show[0]))]}, ]
554                         ))
555                         for i in range(len(groups)):
556                             try:
557                                 label = graph[u"group-names"][i]
558                             except (IndexError, KeyError):
559                                 label = f"Group {i + 1}"
560                             buttons.append(dict(
561                                 label=label,
562                                 method=u"update",
563                                 args=[{u"visible": show[i]}, ]
564                             ))
565
566                         layout[u"updatemenus"] = list([
567                             dict(
568                                 active=0,
569                                 type=u"dropdown",
570                                 direction=u"down",
571                                 xanchor=u"left",
572                                 yanchor=u"bottom",
573                                 x=-0.12,
574                                 y=1.0,
575                                 buttons=buttons
576                             )
577                         ])
578
579                     name_file = (
580                         f"{spec.cpta[u'output-file']}/"
581                         f"{graph[u'output-file-name']}.html"
582                     )
583                     name_file = name_file.format(core=core, test_type=ttype)
584
585                     logging.info(f"    Writing the file {name_file}")
586                     plpl = plgo.Figure(data=traces, layout=layout)
587                     try:
588                         ploff.plot(
589                             plpl,
590                             show_link=False,
591                             auto_open=False,
592                             filename=name_file
593                         )
594                     except plerr.PlotlyEmptyDataError:
595                         logging.warning(u"No data for the plot. Skipped.")
596
597                 return_lst.append(
598                     {
599                         u"job_name": job_name,
600                         u"csv_table": csv_tbl,
601                         u"results": res
602                     }
603                 )
604
605         return return_lst
606
607     builds_dict = dict()
608     for job, builds in spec.input.items():
609         if builds_dict.get(job, None) is None:
610             builds_dict[job] = list()
611         for build in builds:
612             if build[u"status"] not in (u"failed", u"not found", u"removed",
613                                         None):
614                 builds_dict[job].append(str(build[u"build"]))
615
616     # Create "build ID": "date" dict:
617     build_info = dict()
618     tb_tbl = spec.environment.get(u"testbeds", None)
619     for job_name, job_data in builds_dict.items():
620         if build_info.get(job_name, None) is None:
621             build_info[job_name] = OrderedDict()
622         for build in job_data:
623             testbed = u""
624             tb_ip = input_data.metadata(job_name, build).get(u"testbed", u"")
625             if tb_ip and tb_tbl:
626                 testbed = tb_tbl.get(tb_ip, u"")
627             build_info[job_name][build] = (
628                 input_data.metadata(job_name, build).get(u"generated", u""),
629                 input_data.metadata(job_name, build).get(u"version", u""),
630                 testbed
631             )
632
633     anomaly_classifications = dict()
634
635     # Create the table header:
636     csv_tables = dict()
637     for job_name in builds_dict:
638         if csv_tables.get(job_name, None) is None:
639             csv_tables[job_name] = list()
640         header = f"Build Number:,{u','.join(builds_dict[job_name])}\n"
641         csv_tables[job_name].append(header)
642         build_dates = [x[0] for x in build_info[job_name].values()]
643         header = f"Build Date:,{u','.join(build_dates)}\n"
644         csv_tables[job_name].append(header)
645         versions = [x[1] for x in build_info[job_name].values()]
646         header = f"Version:,{u','.join(versions)}\n"
647         csv_tables[job_name].append(header)
648
649     for chart in spec.cpta[u"plots"]:
650         results = _generate_chart(chart)
651         if not results:
652             continue
653
654         for result in results:
655             csv_tables[result[u"job_name"]].extend(result[u"csv_table"])
656
657             if anomaly_classifications.get(result[u"job_name"], None) is None:
658                 anomaly_classifications[result[u"job_name"]] = dict()
659             anomaly_classifications[result[u"job_name"]].\
660                 update(result[u"results"])
661
662     # Write the tables:
663     for job_name, csv_table in csv_tables.items():
664         file_name = f"{spec.cpta[u'output-file']}/{job_name}-trending"
665         with open(f"{file_name}.csv", u"wt") as file_handler:
666             file_handler.writelines(csv_table)
667
668         txt_table = None
669         with open(f"{file_name}.csv", u"rt") as csv_file:
670             csv_content = csv.reader(csv_file, delimiter=u',', quotechar=u'"')
671             line_nr = 0
672             for row in csv_content:
673                 if txt_table is None:
674                     txt_table = prettytable.PrettyTable(row)
675                 else:
676                     if line_nr > 1:
677                         for idx, item in enumerate(row):
678                             try:
679                                 row[idx] = str(round(float(item) / 1000000, 2))
680                             except ValueError:
681                                 pass
682                     try:
683                         txt_table.add_row(row)
684                     # PrettyTable raises Exception
685                     except Exception as err:
686                         logging.warning(
687                             f"Error occurred while generating TXT table:\n{err}"
688                         )
689                 line_nr += 1
690             txt_table.align[u"Build Number:"] = u"l"
691         with open(f"{file_name}.txt", u"wt") as txt_file:
692             txt_file.write(str(txt_table))
693
694     # Evaluate result:
695     if anomaly_classifications:
696         result = u"PASS"
697         for job_name, job_data in anomaly_classifications.items():
698             file_name = \
699                 f"{spec.cpta[u'output-file']}/regressions-{job_name}.txt"
700             with open(file_name, u'w') as txt_file:
701                 for test_name, classification in job_data.items():
702                     if classification == u"regression":
703                         txt_file.write(test_name + u'\n')
704                     if classification in (u"regression", u"outlier"):
705                         result = u"FAIL"
706             file_name = \
707                 f"{spec.cpta[u'output-file']}/progressions-{job_name}.txt"
708             with open(file_name, u'w') as txt_file:
709                 for test_name, classification in job_data.items():
710                     if classification == u"progression":
711                         txt_file.write(test_name + u'\n')
712     else:
713         result = u"FAIL"
714
715     logging.info(f"Partial results: {anomaly_classifications}")
716     logging.info(f"Result: {result}")
717
718     return result