resources/tools/presentation/generator_CPTA.py

   1 # Copyright (c) 2018 Cisco and/or its affiliates.
   2 # Licensed under the Apache License, Version 2.0 (the "License");
   3 # you may not use this file except in compliance with the License.
   4 # You may obtain a copy of the License at:
   5 #
   6 #     http://www.apache.org/licenses/LICENSE-2.0
   7 #
   8 # Unless required by applicable law or agreed to in writing, software
   9 # distributed under the License is distributed on an "AS IS" BASIS,
  10 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  11 # See the License for the specific language governing permissions and
  12 # limitations under the License.
  13
  14 """Generation of Continuous Performance Trending and Analysis.
  15 """
  16
  17 import datetime
  18 import logging
  19 import plotly.offline as ploff
  20 import plotly.graph_objs as plgo
  21 import plotly.exceptions as plerr
  22 import numpy as np
  23 import pandas as pd
  24
  25 from collections import OrderedDict
  26 from utils import find_outliers, archive_input_data, execute_command
  27
  28
  29 # Command to build the html format of the report
  30 HTML_BUILDER = 'sphinx-build -v -c conf_cpta -a ' \
  31                '-b html -E ' \
  32                '-t html ' \
  33                '-D version="Generated on {date}" ' \
  34                '{working_dir} ' \
  35                '{build_dir}/'
  36
  37 # .css file for the html format of the report
  38 THEME_OVERRIDES = """/* override table width restrictions */
  39 .wy-nav-content {
  40     max-width: 1200px !important;
  41 }
  42 """
  43
  44 COLORS = ["SkyBlue", "Olive", "Purple", "Coral", "Indigo", "Pink",
  45           "Chocolate", "Brown", "Magenta", "Cyan", "Orange", "Black",
  46           "Violet", "Blue", "Yellow"]
  47
  48
  49 def generate_cpta(spec, data):
  50     """Generate all formats and versions of the Continuous Performance Trending
  51     and Analysis.
  52
  53     :param spec: Specification read from the specification file.
  54     :param data: Full data set.
  55     :type spec: Specification
  56     :type data: InputData
  57     """
  58
  59     logging.info("Generating the Continuous Performance Trending and Analysis "
  60                  "...")
  61
  62     ret_code = _generate_all_charts(spec, data)
  63
  64     cmd = HTML_BUILDER.format(
  65         date=datetime.date.today().strftime('%d-%b-%Y'),
  66         working_dir=spec.environment["paths"]["DIR[WORKING,SRC]"],
  67         build_dir=spec.environment["paths"]["DIR[BUILD,HTML]"])
  68     execute_command(cmd)
  69
  70     with open(spec.environment["paths"]["DIR[CSS_PATCH_FILE]"], "w") as \
  71             css_file:
  72         css_file.write(THEME_OVERRIDES)
  73
  74     with open(spec.environment["paths"]["DIR[CSS_PATCH_FILE2]"], "w") as \
  75             css_file:
  76         css_file.write(THEME_OVERRIDES)
  77
  78     archive_input_data(spec)
  79
  80     logging.info("Done.")
  81
  82     return ret_code
  83
  84
  85 def _select_data(in_data, period, fill_missing=False, use_first=False):
  86     """Select the data from the full data set. The selection is done by picking
  87     the samples depending on the period: period = 1: All, period = 2: every
  88     second sample, period = 3: every third sample ...
  89
  90     :param in_data: Full set of data.
  91     :param period: Sampling period.
  92     :param fill_missing: If the chosen sample is missing in the full set, its
  93     nearest neighbour is used.
  94     :param use_first: Use the first sample even though it is not chosen.
  95     :type in_data: OrderedDict
  96     :type period: int
  97     :type fill_missing: bool
  98     :type use_first: bool
  99     :returns: Reduced data.
 100     :rtype: OrderedDict
 101     """
 102
 103     first_idx = min(in_data.keys())
 104     last_idx = max(in_data.keys())
 105
 106     idx = last_idx
 107     data_dict = dict()
 108     if use_first:
 109         data_dict[first_idx] = in_data[first_idx]
 110     while idx >= first_idx:
 111         data = in_data.get(idx, None)
 112         if data is None:
 113             if fill_missing:
 114                 threshold = int(round(idx - period / 2)) + 1 - period % 2
 115                 idx_low = first_idx if threshold < first_idx else threshold
 116                 threshold = int(round(idx + period / 2))
 117                 idx_high = last_idx if threshold > last_idx else threshold
 118
 119                 flag_l = True
 120                 flag_h = True
 121                 idx_lst = list()
 122                 inc = 1
 123                 while flag_l or flag_h:
 124                     if idx + inc > idx_high:
 125                         flag_h = False
 126                     else:
 127                         idx_lst.append(idx + inc)
 128                     if idx - inc < idx_low:
 129                         flag_l = False
 130                     else:
 131                         idx_lst.append(idx - inc)
 132                     inc += 1
 133
 134                 for i in idx_lst:
 135                     if i in in_data.keys():
 136                         data_dict[i] = in_data[i]
 137                         break
 138         else:
 139             data_dict[idx] = data
 140         idx -= period
 141
 142     return OrderedDict(sorted(data_dict.items(), key=lambda t: t[0]))
 143
 144
 145 def _evaluate_results(in_data, trimmed_data, window=10):
 146     """Evaluates if the sample value is regress, normal or progress compared to
 147     previous data within the window.
 148     We use the intervals defined as:
 149     - regress: less than median - 3 * stdev
 150     - normal: between median - 3 * stdev and median + 3 * stdev
 151     - progress: more than median + 3 * stdev
 152
 153     :param in_data: Full data set.
 154     :param trimmed_data: Full data set without the outliers.
 155     :param window: Window size used to calculate moving median and moving stdev.
 156     :type in_data: pandas.Series
 157     :type trimmed_data: pandas.Series
 158     :type window: int
 159     :returns: Evaluated results.
 160     :rtype: list
 161     """
 162
 163     if len(in_data) > 2:
 164         win_size = in_data.size if in_data.size < window else window
 165         results = [0.0, ] * win_size
 166         median = in_data.rolling(window=win_size).median()
 167         stdev_t = trimmed_data.rolling(window=win_size, min_periods=2).std()
 168         m_vals = median.values
 169         s_vals = stdev_t.values
 170         d_vals = in_data.values
 171         for day in range(win_size, in_data.size):
 172             if np.isnan(m_vals[day - 1]) or np.isnan(s_vals[day - 1]):
 173                 results.append(0.0)
 174             elif d_vals[day] < (m_vals[day - 1] - 3 * s_vals[day - 1]):
 175                 results.append(0.33)
 176             elif (m_vals[day - 1] - 3 * s_vals[day - 1]) <= d_vals[day] <= \
 177                     (m_vals[day - 1] + 3 * s_vals[day - 1]):
 178                 results.append(0.66)
 179             else:
 180                 results.append(1.0)
 181     else:
 182         results = [0.0, ]
 183         try:
 184             median = np.median(in_data)
 185             stdev = np.std(in_data)
 186             if in_data.values[-1] < (median - 3 * stdev):
 187                 results.append(0.33)
 188             elif (median - 3 * stdev) <= in_data.values[-1] <= (
 189                     median + 3 * stdev):
 190                 results.append(0.66)
 191             else:
 192                 results.append(1.0)
 193         except TypeError:
 194             results.append(None)
 195     return results
 196
 197
 198 def _generate_trending_traces(in_data, period, moving_win_size=10,
 199                               fill_missing=True, use_first=False,
 200                               show_moving_median=True, name="", color=""):
 201     """Generate the trending traces:
 202      - samples,
 203      - moving median (trending plot)
 204      - outliers, regress, progress
 205
 206     :param in_data: Full data set.
 207     :param period: Sampling period.
 208     :param moving_win_size: Window size.
 209     :param fill_missing: If the chosen sample is missing in the full set, its
 210     nearest neighbour is used.
 211     :param use_first: Use the first sample even though it is not chosen.
 212     :param show_moving_median: Show moving median (trending plot).
 213     :param name: Name of the plot
 214     :param color: Name of the color for the plot.
 215     :type in_data: OrderedDict
 216     :type period: int
 217     :type moving_win_size: int
 218     :type fill_missing: bool
 219     :type use_first: bool
 220     :type show_moving_median: bool
 221     :type name: str
 222     :type color: str
 223     :returns: Generated traces (list) and the evaluated result (float).
 224     :rtype: tuple(traces, result)
 225     """
 226
 227     if period > 1:
 228         in_data = _select_data(in_data, period,
 229                                fill_missing=fill_missing,
 230                                use_first=use_first)
 231
 232     data_x = [key for key in in_data.keys()]
 233     data_y = [val for val in in_data.values()]
 234     data_pd = pd.Series(data_y, index=data_x)
 235
 236     t_data, outliers = find_outliers(data_pd)
 237
 238     results = _evaluate_results(data_pd, t_data, window=moving_win_size)
 239
 240     anomalies = pd.Series()
 241     anomalies_res = list()
 242     for idx, item in enumerate(in_data.items()):
 243         item_pd = pd.Series([item[1], ], index=[item[0], ])
 244         if item[0] in outliers.keys():
 245             anomalies = anomalies.append(item_pd)
 246             anomalies_res.append(0.0)
 247         elif results[idx] in (0.33, 1.0):
 248             anomalies = anomalies.append(item_pd)
 249             anomalies_res.append(results[idx])
 250     anomalies_res.extend([0.0, 0.33, 0.66, 1.0])
 251
 252     # Create traces
 253     color_scale = [[0.00, "grey"],
 254                    [0.25, "grey"],
 255                    [0.25, "red"],
 256                    [0.50, "red"],
 257                    [0.50, "white"],
 258                    [0.75, "white"],
 259                    [0.75, "green"],
 260                    [1.00, "green"]]
 261
 262     trace_samples = plgo.Scatter(
 263         x=data_x,
 264         y=data_y,
 265         mode='markers',
 266         line={
 267             "width": 1
 268         },
 269         name="{name}-thput".format(name=name),
 270         marker={
 271             "size": 5,
 272             "color": color,
 273             "symbol": "circle",
 274         },
 275     )
 276     traces = [trace_samples, ]
 277
 278     trace_anomalies = plgo.Scatter(
 279         x=anomalies.keys(),
 280         y=anomalies.values,
 281         mode='markers',
 282         hoverinfo="none",
 283         showlegend=False,
 284         legendgroup=name,
 285         name="{name}: outliers".format(name=name),
 286         marker={
 287             "size": 15,
 288             "symbol": "circle-open",
 289             "color": anomalies_res,
 290             "colorscale": color_scale,
 291             "showscale": True,
 292
 293             "colorbar": {
 294                 "y": 0.5,
 295                 "len": 0.8,
 296                 "title": "Results Clasification",
 297                 "titleside": 'right',
 298                 "titlefont": {
 299                     "size": 14
 300                 },
 301                 "tickmode": 'array',
 302                 "tickvals": [0.125, 0.375, 0.625, 0.875],
 303                 "ticktext": ["Outlier", "Regress", "Normal", "Progress"],
 304                 "ticks": 'outside',
 305                 "ticklen": 0,
 306                 "tickangle": -90,
 307                 "thickness": 10
 308             }
 309         }
 310     )
 311     traces.append(trace_anomalies)
 312
 313     if show_moving_median:
 314         data_mean_y = pd.Series(data_y).rolling(
 315             window=moving_win_size).median()
 316         trace_median = plgo.Scatter(
 317             x=data_x,
 318             y=data_mean_y,
 319             mode='lines',
 320             line={
 321                 "shape": "spline",
 322                 "width": 1,
 323                 "color": color,
 324             },
 325             name='{name}-trend'.format(name=name, size=moving_win_size)
 326         )
 327         traces.append(trace_median)
 328
 329     return traces, results[-1]
 330
 331
 332 def _generate_chart(traces, layout, file_name):
 333     """Generates the whole chart using pre-generated traces.
 334
 335     :param traces: Traces for the chart.
 336     :param layout: Layout of the chart.
 337     :param file_name: File name for the generated chart.
 338     :type traces: list
 339     :type layout: dict
 340     :type file_name: str
 341     """
 342
 343     # Create plot
 344     logging.info("    Writing the file '{0}' ...".format(file_name))
 345     plpl = plgo.Figure(data=traces, layout=layout)
 346     try:
 347         ploff.plot(plpl, show_link=False, auto_open=False, filename=file_name)
 348     except plerr.PlotlyEmptyDataError:
 349         logging.warning(" No data for the plot. Skipped.")
 350
 351
 352 def _generate_all_charts(spec, input_data):
 353     """Generate all charts specified in the specification file.
 354
 355     :param spec: Specification.
 356     :param input_data: Full data set.
 357     :type spec: Specification
 358     :type input_data: InputData
 359     """
 360
 361     results = list()
 362     for chart in spec.cpta["plots"]:
 363         logging.info("  Generating the chart '{0}' ...".
 364                      format(chart.get("title", "")))
 365
 366         # Transform the data
 367         data = input_data.filter_data(chart, continue_on_error=True)
 368         if data is None:
 369             logging.error("No data.")
 370             return
 371
 372         chart_data = dict()
 373         for job in data:
 374             for idx, build in job.items():
 375                 for test in build:
 376                     if chart_data.get(test["name"], None) is None:
 377                         chart_data[test["name"]] = OrderedDict()
 378                     try:
 379                         chart_data[test["name"]][int(idx)] = \
 380                             test["result"]["throughput"]
 381                     except (KeyError, TypeError):
 382                         chart_data[test["name"]][int(idx)] = None
 383
 384         for period in chart["periods"]:
 385             # Generate traces:
 386             traces = list()
 387             win_size = 10 if period == 1 else 5 if period < 20 else 3
 388             idx = 0
 389             for test_name, test_data in chart_data.items():
 390                 if not test_data:
 391                     logging.warning("No data for the test '{0}'".
 392                                     format(test_name))
 393                     continue
 394                 trace, result = _generate_trending_traces(
 395                     test_data,
 396                     period=period,
 397                     moving_win_size=win_size,
 398                     fill_missing=True,
 399                     use_first=False,
 400                     name='-'.join(test_name.split('-')[3:-1]),
 401                     color=COLORS[idx])
 402                 traces.extend(trace)
 403                 results.append(result)
 404                 idx += 1
 405
 406             # Generate the chart:
 407             period_name = "Daily" if period == 1 else \
 408                 "Weekly" if period < 20 else "Monthly"
 409             chart["layout"]["title"] = chart["title"].format(period=period_name)
 410             _generate_chart(traces,
 411                             chart["layout"],
 412                             file_name="{0}-{1}-{2}{3}".format(
 413                                 spec.cpta["output-file"],
 414                                 chart["output-file-name"],
 415                                 period,
 416                                 spec.cpta["output-file-type"]))
 417
 418         logging.info("  Done.")
 419
 420     result = "PASS"
 421     for item in results:
 422         if item is None:
 423             result = "FAIL"
 424             break
 425         if item == 0.66 and result == "PASS":
 426             result = "PASS"
 427         elif item == 0.33 or item == 0.0:
 428             result = "FAIL"
 429     print(results)
 430     print(result)
 431     if result == "FAIL":
 432         return 1
 433     else:
 434         return 0