Trending: Add exception handling for anomalies classification

[csit.git] / resources / tools / presentation / generator_cpta.py
diff --git a/resources/tools/presentation/generator_cpta.py b/resources/tools/presentation/generator_cpta.py

index ac0a5c6..4b10440 100644 (file)
--- a/resources/tools/presentation/generator_cpta.py
+++ b/resources/tools/presentation/generator_cpta.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2020 Cisco and/or its affiliates.
+# Copyright (c) 2021 Cisco and/or its affiliates.
  # Licensed under the Apache License, Version 2.0 (the "License");
  # you may not use this file except in compliance with the License.
  # You may obtain a copy of the License at:
@@ -14,6 +14,7 @@
  """Generation of Continuous Performance Trending and Analysis.
  """
  
+import re
  import logging
  import csv
  
@@ -30,7 +31,7 @@ from pal_utils import archive_input_data, execute_command, classify_anomalies
  
  
  # Command to build the html format of the report
-HTML_BUILDER = u'sphinx-build -v -c conf_cpta -a ' \
+HTML_BUILDER = u'sphinx-build -v -c sphinx_conf/trending -a ' \
                 u'-b html -E ' \
                 u'-t html ' \
                 u'-D version="{date}" ' \
@@ -92,20 +93,31 @@ THEME_OVERRIDES = u"""/* override table width restrictions */
  }
  """
  
-COLORS = [
-    u"SkyBlue", u"Olive", u"Purple", u"Coral", u"Indigo", u"Pink",
-    u"Chocolate", u"Brown", u"Magenta", u"Cyan", u"Orange", u"Black",
-    u"Violet", u"Blue", u"Yellow", u"BurlyWood", u"CadetBlue", u"Crimson",
-    u"DarkBlue", u"DarkCyan", u"DarkGreen", u"Green", u"GoldenRod",
-    u"LightGreen", u"LightSeaGreen", u"LightSkyBlue", u"Maroon",
-    u"MediumSeaGreen", u"SeaGreen", u"LightSlateGrey",
-    u"SkyBlue", u"Olive", u"Purple", u"Coral", u"Indigo", u"Pink",
-    u"Chocolate", u"Brown", u"Magenta", u"Cyan", u"Orange", u"Black",
-    u"Violet", u"Blue", u"Yellow", u"BurlyWood", u"CadetBlue", u"Crimson",
-    u"DarkBlue", u"DarkCyan", u"DarkGreen", u"Green", u"GoldenRod",
-    u"LightGreen", u"LightSeaGreen", u"LightSkyBlue", u"Maroon",
-    u"MediumSeaGreen", u"SeaGreen", u"LightSlateGrey"
-]
+COLORS = (
+    u"#1A1110",
+    u"#DA2647",
+    u"#214FC6",
+    u"#01786F",
+    u"#BD8260",
+    u"#FFD12A",
+    u"#A6E7FF",
+    u"#738276",
+    u"#C95A49",
+    u"#FC5A8D",
+    u"#CEC8EF",
+    u"#391285",
+    u"#6F2DA8",
+    u"#FF878D",
+    u"#45A27D",
+    u"#FFD0B9",
+    u"#FD5240",
+    u"#DB91EF",
+    u"#44D7A8",
+    u"#4F86F7",
+    u"#84DE02",
+    u"#FFCFF1",
+    u"#614051"
+)
  
  
  def generate_cpta(spec, data):
@@ -137,7 +149,7 @@ def generate_cpta(spec, data):
              css_file:
          css_file.write(THEME_OVERRIDES)
  
-    if spec.configuration.get(u"archive-inputs", True):
+    if spec.environment.get(u"archive-inputs", False):
          archive_input_data(spec)
  
      logging.info(u"Done.")
@@ -146,7 +158,7 @@ def generate_cpta(spec, data):
  
  
  def _generate_trending_traces(in_data, job_name, build_info,
-                              show_trend_line=True, name=u"", color=u""):
+                              name=u"", color=u"", incl_tests=u"mrr"):
      """Generate the trending traces:
       - samples,
       - outliers, regress, progress
@@ -155,22 +167,30 @@ def _generate_trending_traces(in_data, job_name, build_info,
      :param in_data: Full data set.
      :param job_name: The name of job which generated the data.
      :param build_info: Information about the builds.
-    :param show_trend_line: Show moving median (trending plot).
      :param name: Name of the plot
      :param color: Name of the color for the plot.
+    :param incl_tests: Included tests, accepted values: mrr, ndr, pdr
      :type in_data: OrderedDict
      :type job_name: str
      :type build_info: dict
-    :type show_trend_line: bool
      :type name: str
      :type color: str
+    :type incl_tests: str
      :returns: Generated traces (list) and the evaluated result.
      :rtype: tuple(traces, result)
      """
  
+    if incl_tests not in (u"mrr", u"ndr", u"pdr"):
+        return list(), None
+
      data_x = list(in_data.keys())
-    data_y_pps = list(in_data.values())
-    data_y_mpps = [float(item) / 1e6 for item in data_y_pps]
+    data_y_pps = list()
+    data_y_mpps = list()
+    data_y_stdev = list()
+    for item in in_data.values():
+        data_y_pps.append(float(item[u"receive-rate"]))
+        data_y_stdev.append(float(item[u"receive-stdev"]) / 1e6)
+        data_y_mpps.append(float(item[u"receive-rate"]) / 1e6)
  
      hover_text = list()
      xaxis = list()
@@ -178,28 +198,44 @@ def _generate_trending_traces(in_data, job_name, build_info,
          str_key = str(key)
          date = build_info[job_name][str_key][0]
          hover_str = (u"date: {date}<br>"
-                     u"value [Mpps]: {value:.3f}<br>"
+                     u"{property} [Mpps]: {value:.3f}<br>"
+                     u"<stdev>"
                       u"{sut}-ref: {build}<br>"
-                     u"csit-ref: mrr-{period}-build-{build_nr}<br>"
+                     u"csit-ref: {test}-{period}-build-{build_nr}<br>"
                       u"testbed: {testbed}")
+        if incl_tests == u"mrr":
+            hover_str = hover_str.replace(
+                u"<stdev>", f"stdev [Mpps]: {data_y_stdev[index]:.3f}<br>"
+            )
+        else:
+            hover_str = hover_str.replace(u"<stdev>", u"")
+        if u"-cps" in name:
+            hover_str = hover_str.replace(u"[Mpps]", u"[Mcps]")
          if u"dpdk" in job_name:
              hover_text.append(hover_str.format(
                  date=date,
+                property=u"average" if incl_tests == u"mrr" else u"throughput",
                  value=data_y_mpps[index],
                  sut=u"dpdk",
                  build=build_info[job_name][str_key][1].rsplit(u'~', 1)[0],
+                test=incl_tests,
                  period=u"weekly",
                  build_nr=str_key,
                  testbed=build_info[job_name][str_key][2]))
          elif u"vpp" in job_name:
-            hover_text.append(hover_str.format(
+            hover_str = hover_str.format(
                  date=date,
+                property=u"average" if incl_tests == u"mrr" else u"throughput",
                  value=data_y_mpps[index],
                  sut=u"vpp",
                  build=build_info[job_name][str_key][1].rsplit(u'~', 1)[0],
-                period=u"daily",
+                test=incl_tests,
+                period=u"daily" if incl_tests == u"mrr" else u"weekly",
                  build_nr=str_key,
-                testbed=build_info[job_name][str_key][2]))
+                testbed=build_info[job_name][str_key][2])
+            if u"-cps" in name:
+                hover_str = hover_str.replace(u"throughput", u"connection rate")
+            hover_text.append(hover_str)
  
          xaxis.append(datetime(int(date[0:4]), int(date[4:6]), int(date[6:8]),
                                int(date[9:11]), int(date[12:])))
@@ -208,8 +244,14 @@ def _generate_trending_traces(in_data, job_name, build_info,
      for key, value in zip(xaxis, data_y_pps):
          data_pd[key] = value
  
-    anomaly_classification, avgs_pps = classify_anomalies(data_pd)
+    try:
+        anomaly_classification, avgs_pps, stdevs_pps = \
+            classify_anomalies(data_pd)
+    except ValueError as err:
+        logging.info(f"{err} Skipping")
+        return
      avgs_mpps = [avg_pps / 1e6 for avg_pps in avgs_pps]
+    stdevs_mpps = [stdev_pps / 1e6 for stdev_pps in stdevs_pps]
  
      anomalies = OrderedDict()
      anomalies_colors = list()
@@ -221,8 +263,7 @@ def _generate_trending_traces(in_data, job_name, build_info,
      }
      if anomaly_classification:
          for index, (key, value) in enumerate(data_pd.items()):
-            if anomaly_classification[index] in \
-                    (u"outlier", u"regression", u"progression"):
+            if anomaly_classification[index] in (u"regression", u"progression"):
                  anomalies[key] = value / 1e6
                  anomalies_colors.append(
                      anomaly_color[anomaly_classification[index]])
@@ -251,23 +292,30 @@ def _generate_trending_traces(in_data, job_name, build_info,
      )
      traces = [trace_samples, ]
  
-    if show_trend_line:
-        trace_trend = plgo.Scatter(
-            x=xaxis,
-            y=avgs_mpps,
-            mode=u"lines",
-            line={
-                u"shape": u"linear",
-                u"width": 1,
-                u"color": color,
-            },
-            showlegend=False,
-            legendgroup=name,
-            name=f"{name}",
-            text=[f"trend [Mpps]: {avg:.3f}" for avg in avgs_mpps],
-            hoverinfo=u"text+name"
+    trend_hover_text = list()
+    for idx in range(len(data_x)):
+        trend_hover_str = (
+            f"trend [Mpps]: {avgs_mpps[idx]:.3f}<br>"
+            f"stdev [Mpps]: {stdevs_mpps[idx]:.3f}"
          )
-        traces.append(trace_trend)
+        trend_hover_text.append(trend_hover_str)
+
+    trace_trend = plgo.Scatter(
+        x=xaxis,
+        y=avgs_mpps,
+        mode=u"lines",
+        line={
+            u"shape": u"linear",
+            u"width": 1,
+            u"color": color,
+        },
+        showlegend=False,
+        legendgroup=name,
+        name=f"{name}",
+        text=trend_hover_text,
+        hoverinfo=u"text+name"
+    )
+    traces.append(trace_trend)
  
      trace_anomalies = plgo.Scatter(
          x=list(anomalies.keys()),
@@ -342,182 +390,227 @@ def _generate_all_charts(spec, input_data):
  
          job_name = list(graph[u"data"].keys())[0]
  
-        csv_tbl = list()
-        res = dict()
-
          # Transform the data
          logging.info(
-             f"    Creating the data set for the {graph.get(u'type', u'')} "
-             f"{graph.get(u'title', u'')}."
+            f"    Creating the data set for the {graph.get(u'type', u'')} "
+            f"{graph.get(u'title', u'')}."
          )
  
-        if graph.get(u"include", None):
-            data = input_data.filter_tests_by_name(
-                graph,
-                params=[u"type", u"result", u"tags"],
-                continue_on_error=True
-            )
-        else:
-            data = input_data.filter_data(
-                graph,
-                params=[u"type", u"result", u"tags"],
-                continue_on_error=True)
+        data = input_data.filter_tests_by_name(
+            graph,
+            params=[u"type", u"result", u"throughput", u"tags"],
+            continue_on_error=True
+        )
  
          if data is None or data.empty:
              logging.error(u"No data.")
              return dict()
  
-        chart_data = dict()
-        chart_tags = dict()
-        for job, job_data in data.items():
-            if job != job_name:
-                continue
-            for index, bld in job_data.items():
-                for test_name, test in bld.items():
-                    if chart_data.get(test_name, None) is None:
-                        chart_data[test_name] = OrderedDict()
-                    try:
-                        chart_data[test_name][int(index)] = \
-                            test[u"result"][u"receive-rate"]
-                        chart_tags[test_name] = test.get(u"tags", None)
-                    except (KeyError, TypeError):
-                        pass
-
-        # Add items to the csv table:
-        for tst_name, tst_data in chart_data.items():
-            tst_lst = list()
-            for bld in builds_dict[job_name]:
-                itm = tst_data.get(int(bld), u'')
-                # CSIT-1180: Itm will be list, compute stats.
-                tst_lst.append(str(itm))
-            csv_tbl.append(f"{tst_name}," + u",".join(tst_lst) + u'\n')
-
-        # Generate traces:
-        traces = list()
-        index = 0
-        groups = graph.get(u"groups", None)
-        visibility = list()
-
-        if groups:
-            for group in groups:
-                visible = list()
-                for tag in group:
+        return_lst = list()
+
+        for ttype in graph.get(u"test-type", (u"mrr", )):
+            for core in graph.get(u"core", tuple()):
+                csv_tbl = list()
+                res = dict()
+                chart_data = dict()
+                chart_tags = dict()
+                for item in graph.get(u"include", tuple()):
+                    reg_ex = re.compile(str(item.format(core=core)).lower())
+                    for job, job_data in data.items():
+                        if job != job_name:
+                            continue
+                        for index, bld in job_data.items():
+                            for test_id, test in bld.items():
+                                if not re.match(reg_ex, str(test_id).lower()):
+                                    continue
+                                if chart_data.get(test_id, None) is None:
+                                    chart_data[test_id] = OrderedDict()
+                                try:
+                                    if ttype == u"mrr":
+                                        rate = test[u"result"][u"receive-rate"]
+                                        stdev = \
+                                            test[u"result"][u"receive-stdev"]
+                                    elif ttype == u"ndr":
+                                        rate = \
+                                            test["throughput"][u"NDR"][u"LOWER"]
+                                        stdev = float(u"nan")
+                                    elif ttype == u"pdr":
+                                        rate = \
+                                            test["throughput"][u"PDR"][u"LOWER"]
+                                        stdev = float(u"nan")
+                                    else:
+                                        continue
+                                    chart_data[test_id][int(index)] = {
+                                        u"receive-rate": rate,
+                                        u"receive-stdev": stdev
+                                    }
+                                    chart_tags[test_id] = \
+                                        test.get(u"tags", None)
+                                except (KeyError, TypeError):
+                                    pass
+
+                # Add items to the csv table:
+                for tst_name, tst_data in chart_data.items():
+                    tst_lst = list()
+                    for bld in builds_dict[job_name]:
+                        itm = tst_data.get(int(bld), dict())
+                        # CSIT-1180: Itm will be list, compute stats.
+                        try:
+                            tst_lst.append(str(itm.get(u"receive-rate", u"")))
+                        except AttributeError:
+                            tst_lst.append(u"")
+                    csv_tbl.append(f"{tst_name}," + u",".join(tst_lst) + u'\n')
+
+                # Generate traces:
+                traces = list()
+                index = 0
+                groups = graph.get(u"groups", None)
+                visibility = list()
+
+                if groups:
+                    for group in groups:
+                        visible = list()
+                        for tag in group:
+                            for tst_name, test_data in chart_data.items():
+                                if not test_data:
+                                    logging.warning(
+                                        f"No data for the test {tst_name}"
+                                    )
+                                    continue
+                                if tag not in chart_tags[tst_name]:
+                                    continue
+                                try:
+                                    trace, rslt = _generate_trending_traces(
+                                        test_data,
+                                        job_name=job_name,
+                                        build_info=build_info,
+                                        name=u'-'.join(tst_name.split(u'.')[-1].
+                                                       split(u'-')[2:-1]),
+                                        color=COLORS[index],
+                                        incl_tests=ttype
+                                    )
+                                except IndexError:
+                                    logging.error(f"Out of colors: index: "
+                                                  f"{index}, test: {tst_name}")
+                                    index += 1
+                                    continue
+                                traces.extend(trace)
+                                visible.extend(
+                                    [True for _ in range(len(trace))]
+                                )
+                                res[tst_name] = rslt
+                                index += 1
+                                break
+                        visibility.append(visible)
+                else:
                      for tst_name, test_data in chart_data.items():
                          if not test_data:
                              logging.warning(f"No data for the test {tst_name}")
                              continue
-                        if tag not in chart_tags[tst_name]:
-                            continue
                          try:
                              trace, rslt = _generate_trending_traces(
                                  test_data,
                                  job_name=job_name,
                                  build_info=build_info,
-                                name=u'-'.join(tst_name.split(u'.')[-1].
-                                               split(u'-')[2:-1]),
-                                color=COLORS[index])
+                                name=u'-'.join(
+                                    tst_name.split(u'.')[-1].split(u'-')[2:-1]),
+                                color=COLORS[index],
+                                incl_tests=ttype
+                            )
                          except IndexError:
-                            logging.error(f"Out of colors: index: "
-                                          f"{index}, test: {tst_name}")
+                            logging.error(
+                                f"Out of colors: index: "
+                                f"{index}, test: {tst_name}"
+                            )
                              index += 1
                              continue
                          traces.extend(trace)
-                        visible.extend([True for _ in range(len(trace))])
                          res[tst_name] = rslt
                          index += 1
-                        break
-                visibility.append(visible)
-        else:
-            for tst_name, test_data in chart_data.items():
-                if not test_data:
-                    logging.warning(f"No data for the test {tst_name}")
-                    continue
-                try:
-                    trace, rslt = _generate_trending_traces(
-                        test_data,
-                        job_name=job_name,
-                        build_info=build_info,
-                        name=u'-'.join(
-                            tst_name.split(u'.')[-1].split(u'-')[2:-1]),
-                        color=COLORS[index])
-                except IndexError:
-                    logging.error(
-                        f"Out of colors: index: {index}, test: {tst_name}"
-                    )
-                    index += 1
-                    continue
-                traces.extend(trace)
-                res[tst_name] = rslt
-                index += 1
-
-        if traces:
-            # Generate the chart:
-            try:
-                layout = deepcopy(graph[u"layout"])
-            except KeyError as err:
-                logging.error(u"Finished with error: No layout defined")
-                logging.error(repr(err))
-                return dict()
-            if groups:
-                show = list()
-                for i in range(len(visibility)):
-                    visible = list()
-                    for vis_idx, _ in enumerate(visibility):
-                        for _ in range(len(visibility[vis_idx])):
-                            visible.append(i == vis_idx)
-                    show.append(visible)
-
-                buttons = list()
-                buttons.append(dict(
-                    label=u"All",
-                    method=u"update",
-                    args=[{u"visible": [True for _ in range(len(show[0]))]}, ]
-                ))
-                for i in range(len(groups)):
+
+                if traces:
+                    # Generate the chart:
                      try:
-                        label = graph[u"group-names"][i]
-                    except (IndexError, KeyError):
-                        label = f"Group {i + 1}"
-                    buttons.append(dict(
-                        label=label,
-                        method=u"update",
-                        args=[{u"visible": show[i]}, ]
-                    ))
-
-                layout[u"updatemenus"] = list([
-                    dict(
-                        active=0,
-                        type=u"dropdown",
-                        direction=u"down",
-                        xanchor=u"left",
-                        yanchor=u"bottom",
-                        x=-0.12,
-                        y=1.0,
-                        buttons=buttons
+                        layout = deepcopy(graph[u"layout"])
+                    except KeyError as err:
+                        logging.error(u"Finished with error: No layout defined")
+                        logging.error(repr(err))
+                        return dict()
+                    if groups:
+                        show = list()
+                        for i in range(len(visibility)):
+                            visible = list()
+                            for vis_idx, _ in enumerate(visibility):
+                                for _ in range(len(visibility[vis_idx])):
+                                    visible.append(i == vis_idx)
+                            show.append(visible)
+
+                        buttons = list()
+                        buttons.append(dict(
+                            label=u"All",
+                            method=u"update",
+                            args=[{u"visible":
+                                       [True for _ in range(len(show[0]))]}, ]
+                        ))
+                        for i in range(len(groups)):
+                            try:
+                                label = graph[u"group-names"][i]
+                            except (IndexError, KeyError):
+                                label = f"Group {i + 1}"
+                            buttons.append(dict(
+                                label=label,
+                                method=u"update",
+                                args=[{u"visible": show[i]}, ]
+                            ))
+
+                        layout[u"updatemenus"] = list([
+                            dict(
+                                active=0,
+                                type=u"dropdown",
+                                direction=u"down",
+                                xanchor=u"left",
+                                yanchor=u"bottom",
+                                x=-0.12,
+                                y=1.0,
+                                buttons=buttons
+                            )
+                        ])
+
+                    name_file = (
+                        f"{spec.cpta[u'output-file']}/"
+                        f"{graph[u'output-file-name']}.html"
                      )
-                ])
+                    name_file = name_file.format(core=core, test_type=ttype)
  
-            name_file = (
-                f"{spec.cpta[u'output-file']}/{graph[u'output-file-name']}"
-                f"{spec.cpta[u'output-file-type']}")
+                    logging.info(f"    Writing the file {name_file}")
+                    plpl = plgo.Figure(data=traces, layout=layout)
+                    try:
+                        ploff.plot(
+                            plpl,
+                            show_link=False,
+                            auto_open=False,
+                            filename=name_file
+                        )
+                    except plerr.PlotlyEmptyDataError:
+                        logging.warning(u"No data for the plot. Skipped.")
  
-            logging.info(f"    Writing the file {name_file} ...")
-            plpl = plgo.Figure(data=traces, layout=layout)
-            try:
-                ploff.plot(plpl, show_link=False, auto_open=False,
-                           filename=name_file)
-            except plerr.PlotlyEmptyDataError:
-                logging.warning(u"No data for the plot. Skipped.")
+                return_lst.append(
+                    {
+                        u"job_name": job_name,
+                        u"csv_table": csv_tbl,
+                        u"results": res
+                    }
+                )
  
-        return {u"job_name": job_name, u"csv_table": csv_tbl, u"results": res}
+        return return_lst
  
      builds_dict = dict()
-    for job in spec.input[u"builds"].keys():
+    for job, builds in spec.input.items():
          if builds_dict.get(job, None) is None:
              builds_dict[job] = list()
-        for build in spec.input[u"builds"][job]:
-            status = build[u"status"]
-            if status not in (u"failed", u"not found", u"removed", None):
+        for build in builds:
+            if build[u"status"] not in (u"failed", u"not found", u"removed",
+                                        None):
                  builds_dict[job].append(str(build[u"build"]))
  
      # Create "build ID": "date" dict:
@@ -554,15 +647,17 @@ def _generate_all_charts(spec, input_data):
          csv_tables[job_name].append(header)
  
      for chart in spec.cpta[u"plots"]:
-        result = _generate_chart(chart)
-        if not result:
+        results = _generate_chart(chart)
+        if not results:
              continue
  
-        csv_tables[result[u"job_name"]].extend(result[u"csv_table"])
+        for result in results:
+            csv_tables[result[u"job_name"]].extend(result[u"csv_table"])
  
-        if anomaly_classifications.get(result[u"job_name"], None) is None:
-            anomaly_classifications[result[u"job_name"]] = dict()
-        anomaly_classifications[result[u"job_name"]].update(result[u"results"])
+            if anomaly_classifications.get(result[u"job_name"], None) is None:
+                anomaly_classifications[result[u"job_name"]] = dict()
+            anomaly_classifications[result[u"job_name"]].\
+                update(result[u"results"])
  
      # Write the tables:
      for job_name, csv_table in csv_tables.items():