resources/tools/presentation/generator_CPTA.py

   1 # Copyright (c) 2018 Cisco and/or its affiliates.
   2 # Licensed under the Apache License, Version 2.0 (the "License");
   3 # you may not use this file except in compliance with the License.
   4 # You may obtain a copy of the License at:
   5 #
   6 #     http://www.apache.org/licenses/LICENSE-2.0
   7 #
   8 # Unless required by applicable law or agreed to in writing, software
   9 # distributed under the License is distributed on an "AS IS" BASIS,
  10 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  11 # See the License for the specific language governing permissions and
  12 # limitations under the License.
  13
  14 """Generation of Continuous Performance Trending and Analysis.
  15 """
  16
  17 import datetime
  18 import logging
  19 import csv
  20 import prettytable
  21 import plotly.offline as ploff
  22 import plotly.graph_objs as plgo
  23 import plotly.exceptions as plerr
  24 import numpy as np
  25 import pandas as pd
  26
  27 from collections import OrderedDict
  28 from utils import split_outliers, archive_input_data, execute_command
  29
  30
  31 # Command to build the html format of the report
  32 HTML_BUILDER = 'sphinx-build -v -c conf_cpta -a ' \
  33                '-b html -E ' \
  34                '-t html ' \
  35                '-D version="Generated on {date}" ' \
  36                '{working_dir} ' \
  37                '{build_dir}/'
  38
  39 # .css file for the html format of the report
  40 THEME_OVERRIDES = """/* override table width restrictions */
  41 .wy-nav-content {
  42     max-width: 1200px !important;
  43 }
  44 """
  45
  46 COLORS = ["SkyBlue", "Olive", "Purple", "Coral", "Indigo", "Pink",
  47           "Chocolate", "Brown", "Magenta", "Cyan", "Orange", "Black",
  48           "Violet", "Blue", "Yellow"]
  49
  50
  51 def generate_cpta(spec, data):
  52     """Generate all formats and versions of the Continuous Performance Trending
  53     and Analysis.
  54
  55     :param spec: Specification read from the specification file.
  56     :param data: Full data set.
  57     :type spec: Specification
  58     :type data: InputData
  59     """
  60
  61     logging.info("Generating the Continuous Performance Trending and Analysis "
  62                  "...")
  63
  64     ret_code = _generate_all_charts(spec, data)
  65
  66     cmd = HTML_BUILDER.format(
  67         date=datetime.date.today().strftime('%d-%b-%Y'),
  68         working_dir=spec.environment["paths"]["DIR[WORKING,SRC]"],
  69         build_dir=spec.environment["paths"]["DIR[BUILD,HTML]"])
  70     execute_command(cmd)
  71
  72     with open(spec.environment["paths"]["DIR[CSS_PATCH_FILE]"], "w") as \
  73             css_file:
  74         css_file.write(THEME_OVERRIDES)
  75
  76     with open(spec.environment["paths"]["DIR[CSS_PATCH_FILE2]"], "w") as \
  77             css_file:
  78         css_file.write(THEME_OVERRIDES)
  79
  80     archive_input_data(spec)
  81
  82     logging.info("Done.")
  83
  84     return ret_code
  85
  86
  87 def _select_data(in_data, period, fill_missing=False, use_first=False):
  88     """Select the data from the full data set. The selection is done by picking
  89     the samples depending on the period: period = 1: All, period = 2: every
  90     second sample, period = 3: every third sample ...
  91
  92     :param in_data: Full set of data.
  93     :param period: Sampling period.
  94     :param fill_missing: If the chosen sample is missing in the full set, its
  95     nearest neighbour is used.
  96     :param use_first: Use the first sample even though it is not chosen.
  97     :type in_data: OrderedDict
  98     :type period: int
  99     :type fill_missing: bool
 100     :type use_first: bool
 101     :returns: Reduced data.
 102     :rtype: OrderedDict
 103     """
 104
 105     first_idx = min(in_data.keys())
 106     last_idx = max(in_data.keys())
 107
 108     idx = last_idx
 109     data_dict = dict()
 110     if use_first:
 111         data_dict[first_idx] = in_data[first_idx]
 112     while idx >= first_idx:
 113         data = in_data.get(idx, None)
 114         if data is None:
 115             if fill_missing:
 116                 threshold = int(round(idx - period / 2)) + 1 - period % 2
 117                 idx_low = first_idx if threshold < first_idx else threshold
 118                 threshold = int(round(idx + period / 2))
 119                 idx_high = last_idx if threshold > last_idx else threshold
 120
 121                 flag_l = True
 122                 flag_h = True
 123                 idx_lst = list()
 124                 inc = 1
 125                 while flag_l or flag_h:
 126                     if idx + inc > idx_high:
 127                         flag_h = False
 128                     else:
 129                         idx_lst.append(idx + inc)
 130                     if idx - inc < idx_low:
 131                         flag_l = False
 132                     else:
 133                         idx_lst.append(idx - inc)
 134                     inc += 1
 135
 136                 for i in idx_lst:
 137                     if i in in_data.keys():
 138                         data_dict[i] = in_data[i]
 139                         break
 140         else:
 141             data_dict[idx] = data
 142         idx -= period
 143
 144     return OrderedDict(sorted(data_dict.items(), key=lambda t: t[0]))
 145
 146
 147 def _evaluate_results(trimmed_data, window=10):
 148     """Evaluates if the sample value is regress, normal or progress compared to
 149     previous data within the window.
 150     We use the intervals defined as:
 151     - regress: less than trimmed moving median - 3 * stdev
 152     - normal: between trimmed moving median - 3 * stdev and median + 3 * stdev
 153     - progress: more than trimmed moving median + 3 * stdev
 154     where stdev is trimmed moving standard deviation.
 155
 156     :param trimmed_data: Full data set with the outliers replaced by nan.
 157     :param window: Window size used to calculate moving average and moving stdev.
 158     :type trimmed_data: pandas.Series
 159     :type window: int
 160     :returns: Evaluated results.
 161     :rtype: list
 162     """
 163
 164     if len(trimmed_data) > 2:
 165         win_size = trimmed_data.size if trimmed_data.size < window else window
 166         results = [0.66, ]
 167         tmm = trimmed_data.rolling(window=win_size, min_periods=2).median()
 168         tmstd = trimmed_data.rolling(window=win_size, min_periods=2).std()
 169
 170         first = True
 171         for build_nr, value in trimmed_data.iteritems():
 172             if first:
 173                 first = False
 174                 continue
 175             if (np.isnan(value)
 176                     or np.isnan(tmm[build_nr])
 177                     or np.isnan(tmstd[build_nr])):
 178                 results.append(0.0)
 179             elif value < (tmm[build_nr] - 3 * tmstd[build_nr]):
 180                 results.append(0.33)
 181             elif value > (tmm[build_nr] + 3 * tmstd[build_nr]):
 182                 results.append(1.0)
 183             else:
 184                 results.append(0.66)
 185     else:
 186         results = [0.0, ]
 187         try:
 188             tmm = np.median(trimmed_data)
 189             tmstd = np.std(trimmed_data)
 190             if trimmed_data.values[-1] < (tmm - 3 * tmstd):
 191                 results.append(0.33)
 192             elif (tmm - 3 * tmstd) <= trimmed_data.values[-1] <= (
 193                     tmm + 3 * tmstd):
 194                 results.append(0.66)
 195             else:
 196                 results.append(1.0)
 197         except TypeError:
 198             results.append(None)
 199     return results
 200
 201
 202 def _generate_trending_traces(in_data, build_info, period, moving_win_size=10,
 203                               fill_missing=True, use_first=False,
 204                               show_trend_line=True, name="", color=""):
 205     """Generate the trending traces:
 206      - samples,
 207      - trimmed moving median (trending line)
 208      - outliers, regress, progress
 209
 210     :param in_data: Full data set.
 211     :param build_info: Information about the builds.
 212     :param period: Sampling period.
 213     :param moving_win_size: Window size.
 214     :param fill_missing: If the chosen sample is missing in the full set, its
 215         nearest neighbour is used.
 216     :param use_first: Use the first sample even though it is not chosen.
 217     :param show_trend_line: Show moving median (trending plot).
 218     :param name: Name of the plot
 219     :param color: Name of the color for the plot.
 220     :type in_data: OrderedDict
 221     :type build_info: dict
 222     :type period: int
 223     :type moving_win_size: int
 224     :type fill_missing: bool
 225     :type use_first: bool
 226     :type show_trend_line: bool
 227     :type name: str
 228     :type color: str
 229     :returns: Generated traces (list) and the evaluated result (float).
 230     :rtype: tuple(traces, result)
 231     """
 232
 233     if period > 1:
 234         in_data = _select_data(in_data, period,
 235                                fill_missing=fill_missing,
 236                                use_first=use_first)
 237
 238     data_x = list(in_data.keys())
 239     data_y = list(in_data.values())
 240
 241     hover_text = list()
 242     for idx in data_x:
 243         hover_text.append("vpp-build: {0}".
 244                           format(build_info[str(idx)][1].split("~")[-1]))
 245
 246     data_pd = pd.Series(data_y, index=data_x)
 247
 248     t_data, outliers = split_outliers(data_pd, outlier_const=1.5,
 249                                       window=moving_win_size)
 250     results = _evaluate_results(t_data, window=moving_win_size)
 251
 252     anomalies = pd.Series()
 253     anomalies_res = list()
 254     for idx, item in enumerate(in_data.items()):
 255         item_pd = pd.Series([item[1], ], index=[item[0], ])
 256         if item[0] in outliers.keys():
 257             anomalies = anomalies.append(item_pd)
 258             anomalies_res.append(0.0)
 259         elif results[idx] in (0.33, 1.0):
 260             anomalies = anomalies.append(item_pd)
 261             anomalies_res.append(results[idx])
 262     anomalies_res.extend([0.0, 0.33, 0.66, 1.0])
 263
 264     # Create traces
 265     color_scale = [[0.00, "grey"],
 266                    [0.25, "grey"],
 267                    [0.25, "red"],
 268                    [0.50, "red"],
 269                    [0.50, "white"],
 270                    [0.75, "white"],
 271                    [0.75, "green"],
 272                    [1.00, "green"]]
 273
 274     trace_samples = plgo.Scatter(
 275         x=data_x,
 276         y=data_y,
 277         mode='markers',
 278         line={
 279             "width": 1
 280         },
 281         name="{name}-thput".format(name=name),
 282         marker={
 283             "size": 5,
 284             "color": color,
 285             "symbol": "circle",
 286         },
 287         text=hover_text,
 288         hoverinfo="x+y+text+name"
 289     )
 290     traces = [trace_samples, ]
 291
 292     trace_anomalies = plgo.Scatter(
 293         x=anomalies.keys(),
 294         y=anomalies.values,
 295         mode='markers',
 296         hoverinfo="none",
 297         showlegend=True,
 298         legendgroup=name,
 299         name="{name}-anomalies".format(name=name),
 300         marker={
 301             "size": 15,
 302             "symbol": "circle-open",
 303             "color": anomalies_res,
 304             "colorscale": color_scale,
 305             "showscale": True,
 306             "line": {
 307                 "width": 2
 308             },
 309             "colorbar": {
 310                 "y": 0.5,
 311                 "len": 0.8,
 312                 "title": "Circles Marking Data Classification",
 313                 "titleside": 'right',
 314                 "titlefont": {
 315                     "size": 14
 316                 },
 317                 "tickmode": 'array',
 318                 "tickvals": [0.125, 0.375, 0.625, 0.875],
 319                 "ticktext": ["Outlier", "Regression", "Normal", "Progression"],
 320                 "ticks": "",
 321                 "ticklen": 0,
 322                 "tickangle": -90,
 323                 "thickness": 10
 324             }
 325         }
 326     )
 327     traces.append(trace_anomalies)
 328
 329     if show_trend_line:
 330         data_trend = t_data.rolling(window=moving_win_size,
 331                                     min_periods=2).median()
 332         trace_trend = plgo.Scatter(
 333             x=data_trend.keys(),
 334             y=data_trend.tolist(),
 335             mode='lines',
 336             line={
 337                 "shape": "spline",
 338                 "width": 1,
 339                 "color": color,
 340             },
 341             name='{name}-trend'.format(name=name)
 342         )
 343         traces.append(trace_trend)
 344
 345     return traces, results[-1]
 346
 347
 348 def _generate_chart(traces, layout, file_name):
 349     """Generates the whole chart using pre-generated traces.
 350
 351     :param traces: Traces for the chart.
 352     :param layout: Layout of the chart.
 353     :param file_name: File name for the generated chart.
 354     :type traces: list
 355     :type layout: dict
 356     :type file_name: str
 357     """
 358
 359     # Create plot
 360     logging.info("    Writing the file '{0}' ...".format(file_name))
 361     plpl = plgo.Figure(data=traces, layout=layout)
 362     try:
 363         ploff.plot(plpl, show_link=False, auto_open=False, filename=file_name)
 364     except plerr.PlotlyEmptyDataError:
 365         logging.warning(" No data for the plot. Skipped.")
 366
 367
 368 def _generate_all_charts(spec, input_data):
 369     """Generate all charts specified in the specification file.
 370
 371     :param spec: Specification.
 372     :param input_data: Full data set.
 373     :type spec: Specification
 374     :type input_data: InputData
 375     """
 376
 377     job_name = spec.cpta["data"].keys()[0]
 378
 379     builds_lst = list()
 380     for build in spec.input["builds"][job_name]:
 381         status = build["status"]
 382         if status != "failed" and status != "not found":
 383             builds_lst.append(str(build["build"]))
 384
 385     # Get "build ID": "date" dict:
 386     build_info = OrderedDict()
 387     for build in builds_lst:
 388         try:
 389             build_info[build] = (
 390                 input_data.metadata(job_name, build)["generated"][:14],
 391                 input_data.metadata(job_name, build)["version"]
 392             )
 393         except KeyError:
 394             build_info[build] = ("", "")
 395         logging.info("{}: {}, {}".format(build,
 396                                          build_info[build][0],
 397                                          build_info[build][1]))
 398
 399     # Create the header:
 400     csv_table = list()
 401     header = "Build Number:," + ",".join(builds_lst) + '\n'
 402     csv_table.append(header)
 403     build_dates = [x[0] for x in build_info.values()]
 404     header = "Build Date:," + ",".join(build_dates) + '\n'
 405     csv_table.append(header)
 406     vpp_versions = [x[1] for x in build_info.values()]
 407     header = "VPP Version:," + ",".join(vpp_versions) + '\n'
 408     csv_table.append(header)
 409
 410     results = list()
 411     for chart in spec.cpta["plots"]:
 412         logging.info("  Generating the chart '{0}' ...".
 413                      format(chart.get("title", "")))
 414
 415         # Transform the data
 416         data = input_data.filter_data(chart, continue_on_error=True)
 417         if data is None:
 418             logging.error("No data.")
 419             return
 420
 421         chart_data = dict()
 422         for job in data:
 423             for idx, build in job.items():
 424                 for test_name, test in build.items():
 425                     if chart_data.get(test_name, None) is None:
 426                         chart_data[test_name] = OrderedDict()
 427                     try:
 428                         chart_data[test_name][int(idx)] = \
 429                             test["result"]["throughput"]
 430                     except (KeyError, TypeError):
 431                         pass
 432
 433         # Add items to the csv table:
 434         for tst_name, tst_data in chart_data.items():
 435             tst_lst = list()
 436             for build in builds_lst:
 437                 item = tst_data.get(int(build), '')
 438                 tst_lst.append(str(item))
 439                 # tst_lst.append(str(item) if item else '')
 440             csv_table.append("{0},".format(tst_name) + ",".join(tst_lst) + '\n')
 441
 442         for period in chart["periods"]:
 443             # Generate traces:
 444             traces = list()
 445             win_size = 14 if period == 1 else 5 if period < 20 else 3
 446             idx = 0
 447             for test_name, test_data in chart_data.items():
 448                 if not test_data:
 449                     logging.warning("No data for the test '{0}'".
 450                                     format(test_name))
 451                     continue
 452                 test_name = test_name.split('.')[-1]
 453                 trace, result = _generate_trending_traces(
 454                     test_data,
 455                     build_info=build_info,
 456                     period=period,
 457                     moving_win_size=win_size,
 458                     fill_missing=True,
 459                     use_first=False,
 460                     name='-'.join(test_name.split('-')[3:-1]),
 461                     color=COLORS[idx])
 462                 traces.extend(trace)
 463                 results.append(result)
 464                 idx += 1
 465
 466             # Generate the chart:
 467             chart["layout"]["xaxis"]["title"] = \
 468                 chart["layout"]["xaxis"]["title"].format(job=job_name)
 469             _generate_chart(traces,
 470                             chart["layout"],
 471                             file_name="{0}-{1}-{2}{3}".format(
 472                                 spec.cpta["output-file"],
 473                                 chart["output-file-name"],
 474                                 period,
 475                                 spec.cpta["output-file-type"]))
 476
 477         logging.info("  Done.")
 478
 479     # Write the tables:
 480     file_name = spec.cpta["output-file"] + "-trending"
 481     with open("{0}.csv".format(file_name), 'w') as file_handler:
 482         file_handler.writelines(csv_table)
 483
 484     txt_table = None
 485     with open("{0}.csv".format(file_name), 'rb') as csv_file:
 486         csv_content = csv.reader(csv_file, delimiter=',', quotechar='"')
 487         line_nr = 0
 488         for row in csv_content:
 489             if txt_table is None:
 490                 txt_table = prettytable.PrettyTable(row)
 491             else:
 492                 if line_nr > 1:
 493                     for idx, item in enumerate(row):
 494                         try:
 495                             row[idx] = str(round(float(item) / 1000000, 2))
 496                         except ValueError:
 497                             pass
 498                 try:
 499                     txt_table.add_row(row)
 500                 except Exception as err:
 501                     logging.warning("Error occurred while generating TXT table:"
 502                                     "\n{0}".format(err))
 503             line_nr += 1
 504         txt_table.align["Build Number:"] = "l"
 505     with open("{0}.txt".format(file_name), "w") as txt_file:
 506         txt_file.write(str(txt_table))
 507
 508     # Evaluate result:
 509     result = "PASS"
 510     for item in results:
 511         if item is None:
 512             result = "FAIL"
 513             break
 514         if item == 0.66 and result == "PASS":
 515             result = "PASS"
 516         elif item == 0.33 or item == 0.0:
 517             result = "FAIL"
 518
 519     logging.info("Partial results: {0}".format(results))
 520     logging.info("Result: {0}".format(result))
 521
 522     return result