resources/tools/presentation/generator_CPTA.py

   1 # Copyright (c) 2018 Cisco and/or its affiliates.
   2 # Licensed under the Apache License, Version 2.0 (the "License");
   3 # you may not use this file except in compliance with the License.
   4 # You may obtain a copy of the License at:
   5 #
   6 #     http://www.apache.org/licenses/LICENSE-2.0
   7 #
   8 # Unless required by applicable law or agreed to in writing, software
   9 # distributed under the License is distributed on an "AS IS" BASIS,
  10 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  11 # See the License for the specific language governing permissions and
  12 # limitations under the License.
  13
  14 """Generation of Continuous Performance Trending and Analysis.
  15 """
  16
  17 import datetime
  18 import logging
  19 import plotly.offline as ploff
  20 import plotly.graph_objs as plgo
  21 import numpy as np
  22 import pandas as pd
  23
  24 from collections import OrderedDict
  25 from utils import find_outliers, archive_input_data, execute_command
  26
  27
  28 # Command to build the html format of the report
  29 HTML_BUILDER = 'sphinx-build -v -c conf_cpta -a ' \
  30                '-b html -E ' \
  31                '-t html ' \
  32                '{working_dir} ' \
  33                '{build_dir}/'
  34
  35 # .css file for the html format of the report
  36 THEME_OVERRIDES = """/* override table width restrictions */
  37 .wy-nav-content {
  38     max-width: 1200px !important;
  39 }
  40 """
  41
  42 COLORS = ["SkyBlue", "Olive", "Purple", "Coral", "Indigo", "Pink",
  43           "Chocolate", "Brown", "Magenta", "Cyan", "Orange", "Black",
  44           "Violet", "Blue", "Yellow"]
  45
  46
  47 def generate_cpta(spec, data):
  48     """Generate all formats and versions of the Continuous Performance Trending
  49     and Analysis.
  50
  51     :param spec: Specification read from the specification file.
  52     :param data: Full data set.
  53     :type spec: Specification
  54     :type data: InputData
  55     """
  56
  57     logging.info("Generating the Continuous Performance Trending and Analysis "
  58                  "...")
  59
  60     ret_code = _generate_all_charts(spec, data)
  61
  62     cmd = HTML_BUILDER.format(
  63         date=datetime.date.today().strftime('%d-%b-%Y'),
  64         working_dir=spec.environment["paths"]["DIR[WORKING,SRC]"],
  65         build_dir=spec.environment["paths"]["DIR[BUILD,HTML]"])
  66     execute_command(cmd)
  67
  68     with open(spec.environment["paths"]["DIR[CSS_PATCH_FILE]"], "w") as \
  69             css_file:
  70         css_file.write(THEME_OVERRIDES)
  71
  72     with open(spec.environment["paths"]["DIR[CSS_PATCH_FILE2]"], "w") as \
  73             css_file:
  74         css_file.write(THEME_OVERRIDES)
  75
  76     archive_input_data(spec)
  77
  78     logging.info("Done.")
  79
  80     return ret_code
  81
  82
  83 def _select_data(in_data, period, fill_missing=False, use_first=False):
  84     """Select the data from the full data set. The selection is done by picking
  85     the samples depending on the period: period = 1: All, period = 2: every
  86     second sample, period = 3: every third sample ...
  87
  88     :param in_data: Full set of data.
  89     :param period: Sampling period.
  90     :param fill_missing: If the chosen sample is missing in the full set, its
  91     nearest neighbour is used.
  92     :param use_first: Use the first sample even though it is not chosen.
  93     :type in_data: OrderedDict
  94     :type period: int
  95     :type fill_missing: bool
  96     :type use_first: bool
  97     :returns: Reduced data.
  98     :rtype: OrderedDict
  99     """
 100
 101     first_idx = min(in_data.keys())
 102     last_idx = max(in_data.keys())
 103
 104     idx = last_idx
 105     data_dict = dict()
 106     if use_first:
 107         data_dict[first_idx] = in_data[first_idx]
 108     while idx >= first_idx:
 109         data = in_data.get(idx, None)
 110         if data is None:
 111             if fill_missing:
 112                 threshold = int(round(idx - period / 2)) + 1 - period % 2
 113                 idx_low = first_idx if threshold < first_idx else threshold
 114                 threshold = int(round(idx + period / 2))
 115                 idx_high = last_idx if threshold > last_idx else threshold
 116
 117                 flag_l = True
 118                 flag_h = True
 119                 idx_lst = list()
 120                 inc = 1
 121                 while flag_l or flag_h:
 122                     if idx + inc > idx_high:
 123                         flag_h = False
 124                     else:
 125                         idx_lst.append(idx + inc)
 126                     if idx - inc < idx_low:
 127                         flag_l = False
 128                     else:
 129                         idx_lst.append(idx - inc)
 130                     inc += 1
 131
 132                 for i in idx_lst:
 133                     if i in in_data.keys():
 134                         data_dict[i] = in_data[i]
 135                         break
 136         else:
 137             data_dict[idx] = data
 138         idx -= period
 139
 140     return OrderedDict(sorted(data_dict.items(), key=lambda t: t[0]))
 141
 142
 143 def _evaluate_results(in_data, trimmed_data, window=10):
 144     """Evaluates if the sample value is regress, normal or progress compared to
 145     previous data within the window.
 146     We use the intervals defined as:
 147     - regress: less than median - 3 * stdev
 148     - normal: between median - 3 * stdev and median + 3 * stdev
 149     - progress: more than median + 3 * stdev
 150
 151     :param in_data: Full data set.
 152     :param trimmed_data: Full data set without the outliers.
 153     :param window: Window size used to calculate moving median and moving stdev.
 154     :type in_data: pandas.Series
 155     :type trimmed_data: pandas.Series
 156     :type window: int
 157     :returns: Evaluated results.
 158     :rtype: list
 159     """
 160
 161     if len(in_data) > 2:
 162         win_size = in_data.size if in_data.size < window else window
 163         results = [0.0, ] * win_size
 164         median = in_data.rolling(window=win_size).median()
 165         stdev_t = trimmed_data.rolling(window=win_size, min_periods=2).std()
 166         m_vals = median.values
 167         s_vals = stdev_t.values
 168         d_vals = in_data.values
 169         for day in range(win_size, in_data.size):
 170             if np.isnan(m_vals[day - 1]) or np.isnan(s_vals[day - 1]):
 171                 results.append(0.0)
 172             elif d_vals[day] < (m_vals[day - 1] - 3 * s_vals[day - 1]):
 173                 results.append(0.33)
 174             elif (m_vals[day - 1] - 3 * s_vals[day - 1]) <= d_vals[day] <= \
 175                     (m_vals[day - 1] + 3 * s_vals[day - 1]):
 176                 results.append(0.66)
 177             else:
 178                 results.append(1.0)
 179     else:
 180         results = [0.0, ]
 181         try:
 182             median = np.median(in_data)
 183             stdev = np.std(in_data)
 184             if in_data.values[-1] < (median - 3 * stdev):
 185                 results.append(0.33)
 186             elif (median - 3 * stdev) <= in_data.values[-1] <= (
 187                     median + 3 * stdev):
 188                 results.append(0.66)
 189             else:
 190                 results.append(1.0)
 191         except TypeError:
 192             results.append(None)
 193     return results
 194
 195
 196 def _generate_trending_traces(in_data, period, moving_win_size=10,
 197                               fill_missing=True, use_first=False,
 198                               show_moving_median=True, name="", color=""):
 199     """Generate the trending traces:
 200      - samples,
 201      - moving median (trending plot)
 202      - outliers, regress, progress
 203
 204     :param in_data: Full data set.
 205     :param period: Sampling period.
 206     :param moving_win_size: Window size.
 207     :param fill_missing: If the chosen sample is missing in the full set, its
 208     nearest neighbour is used.
 209     :param use_first: Use the first sample even though it is not chosen.
 210     :param show_moving_median: Show moving median (trending plot).
 211     :param name: Name of the plot
 212     :param color: Name of the color for the plot.
 213     :type in_data: OrderedDict
 214     :type period: int
 215     :type moving_win_size: int
 216     :type fill_missing: bool
 217     :type use_first: bool
 218     :type show_moving_median: bool
 219     :type name: str
 220     :type color: str
 221     :returns: Generated traces (list) and the evaluated result (float).
 222     :rtype: tuple(traces, result)
 223     """
 224
 225     if period > 1:
 226         in_data = _select_data(in_data, period,
 227                                fill_missing=fill_missing,
 228                                use_first=use_first)
 229
 230     data_x = [key for key in in_data.keys()]
 231     data_y = [val for val in in_data.values()]
 232     data_pd = pd.Series(data_y, index=data_x)
 233
 234     t_data, outliers = find_outliers(data_pd)
 235
 236     results = _evaluate_results(data_pd, t_data, window=moving_win_size)
 237
 238     anomalies = pd.Series()
 239     anomalies_res = list()
 240     for idx, item in enumerate(in_data.items()):
 241         item_pd = pd.Series([item[1], ], index=[item[0], ])
 242         if item[0] in outliers.keys():
 243             anomalies = anomalies.append(item_pd)
 244             anomalies_res.append(0.0)
 245         elif results[idx] in (0.33, 1.0):
 246             anomalies = anomalies.append(item_pd)
 247             anomalies_res.append(results[idx])
 248     anomalies_res.extend([0.0, 0.33, 0.66, 1.0])
 249
 250     # Create traces
 251     color_scale = [[0.00, "grey"],
 252                    [0.25, "grey"],
 253                    [0.25, "red"],
 254                    [0.50, "red"],
 255                    [0.50, "white"],
 256                    [0.75, "white"],
 257                    [0.75, "green"],
 258                    [1.00, "green"]]
 259
 260     trace_samples = plgo.Scatter(
 261         x=data_x,
 262         y=data_y,
 263         mode='markers',
 264         line={
 265             "width": 1
 266         },
 267         name="{name}-thput".format(name=name),
 268         marker={
 269             "size": 5,
 270             "color": color,
 271             "symbol": "circle",
 272         },
 273     )
 274     traces = [trace_samples, ]
 275
 276     trace_anomalies = plgo.Scatter(
 277         x=anomalies.keys(),
 278         y=anomalies.values,
 279         mode='markers',
 280         hoverinfo="none",
 281         showlegend=False,
 282         legendgroup=name,
 283         name="{name}: outliers".format(name=name),
 284         marker={
 285             "size": 15,
 286             "symbol": "circle-open",
 287             "color": anomalies_res,
 288             "colorscale": color_scale,
 289             "showscale": True,
 290
 291             "colorbar": {
 292                 "y": 0.5,
 293                 "len": 0.8,
 294                 "title": "Results Clasification",
 295                 "titleside": 'right',
 296                 "titlefont": {
 297                     "size": 14
 298                 },
 299                 "tickmode": 'array',
 300                 "tickvals": [0.125, 0.375, 0.625, 0.875],
 301                 "ticktext": ["Outlier", "Regress", "Normal", "Progress"],
 302                 "ticks": 'outside',
 303                 "ticklen": 0,
 304                 "tickangle": -90,
 305                 "thickness": 10
 306             }
 307         }
 308     )
 309     traces.append(trace_anomalies)
 310
 311     if show_moving_median:
 312         data_mean_y = pd.Series(data_y).rolling(
 313             window=moving_win_size).median()
 314         trace_median = plgo.Scatter(
 315             x=data_x,
 316             y=data_mean_y,
 317             mode='lines',
 318             line={
 319                 "shape": "spline",
 320                 "width": 1,
 321                 "color": color,
 322             },
 323             name='{name}-trend'.format(name=name, size=moving_win_size)
 324         )
 325         traces.append(trace_median)
 326
 327     return traces, results[-1]
 328
 329
 330 def _generate_chart(traces, layout, file_name):
 331     """Generates the whole chart using pre-generated traces.
 332
 333     :param traces: Traces for the chart.
 334     :param layout: Layout of the chart.
 335     :param file_name: File name for the generated chart.
 336     :type traces: list
 337     :type layout: dict
 338     :type file_name: str
 339     """
 340
 341     # Create plot
 342     logging.info("    Writing the file '{0}' ...".format(file_name))
 343     plpl = plgo.Figure(data=traces, layout=layout)
 344     ploff.plot(plpl, show_link=False, auto_open=False, filename=file_name)
 345
 346
 347 def _generate_all_charts(spec, input_data):
 348     """Generate all charts specified in the specification file.
 349
 350     :param spec: Specification.
 351     :param input_data: Full data set.
 352     :type spec: Specification
 353     :type input_data: InputData
 354     """
 355
 356     results = list()
 357     for chart in spec.cpta["plots"]:
 358         logging.info("  Generating the chart '{0}' ...".
 359                      format(chart.get("title", "")))
 360
 361         # Transform the data
 362         data = input_data.filter_data(chart, continue_on_error=True)
 363         if data is None:
 364             logging.error("No data.")
 365             return
 366
 367         chart_data = dict()
 368         for job in data:
 369             for idx, build in job.items():
 370                 for test in build:
 371                     if chart_data.get(test["name"], None) is None:
 372                         chart_data[test["name"]] = OrderedDict()
 373                     try:
 374                         chart_data[test["name"]][int(idx)] = \
 375                             test["result"]["throughput"]
 376                     except (KeyError, TypeError):
 377                         chart_data[test["name"]][int(idx)] = None
 378
 379         for period in chart["periods"]:
 380             # Generate traces:
 381             traces = list()
 382             win_size = 10 if period == 1 else 5 if period < 20 else 3
 383             idx = 0
 384             for test_name, test_data in chart_data.items():
 385                 if not test_data:
 386                     logging.warning("No data for the test '{0}'".
 387                                     format(test_name))
 388                     continue
 389                 trace, result = _generate_trending_traces(
 390                     test_data,
 391                     period=period,
 392                     moving_win_size=win_size,
 393                     fill_missing=True,
 394                     use_first=False,
 395                     name='-'.join(test_name.split('-')[3:-1]),
 396                     color=COLORS[idx])
 397                 traces.extend(trace)
 398                 results.append(result)
 399                 idx += 1
 400
 401             # Generate the chart:
 402             period_name = "Daily" if period == 1 else \
 403                 "Weekly" if period < 20 else "Monthly"
 404             chart["layout"]["title"] = chart["title"].format(period=period_name)
 405             _generate_chart(traces,
 406                             chart["layout"],
 407                             file_name="{0}-{1}-{2}{3}".format(
 408                                 spec.cpta["output-file"],
 409                                 chart["output-file-name"],
 410                                 period,
 411                                 spec.cpta["output-file-type"]))
 412
 413         logging.info("  Done.")
 414
 415     result = "PASS"
 416     for item in results:
 417         if item is None:
 418             result = "FAIL"
 419             break
 420         if item == 0.66 and result == "PASS":
 421             result = "PASS"
 422         elif item == 0.33 or item == 0.0:
 423             result = "FAIL"
 424     print(results)
 425     print(result)
 426     if result == "FAIL":
 427         return 1
 428     else:
 429         return 0