resources/tools/presentation/generator_CPTA.py

   1 # Copyright (c) 2018 Cisco and/or its affiliates.
   2 # Licensed under the Apache License, Version 2.0 (the "License");
   3 # you may not use this file except in compliance with the License.
   4 # You may obtain a copy of the License at:
   5 #
   6 #     http://www.apache.org/licenses/LICENSE-2.0
   7 #
   8 # Unless required by applicable law or agreed to in writing, software
   9 # distributed under the License is distributed on an "AS IS" BASIS,
  10 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  11 # See the License for the specific language governing permissions and
  12 # limitations under the License.
  13
  14 """Generation of Continuous Performance Trending and Analysis.
  15 """
  16
  17 import logging
  18 import csv
  19 import prettytable
  20 import plotly.offline as ploff
  21 import plotly.graph_objs as plgo
  22 import plotly.exceptions as plerr
  23 import numpy as np
  24 import pandas as pd
  25
  26 from collections import OrderedDict
  27 from datetime import datetime, timedelta
  28
  29 from utils import split_outliers, archive_input_data, execute_command
  30
  31
  32 # Command to build the html format of the report
  33 HTML_BUILDER = 'sphinx-build -v -c conf_cpta -a ' \
  34                '-b html -E ' \
  35                '-t html ' \
  36                '-D version="{date}" ' \
  37                '{working_dir} ' \
  38                '{build_dir}/'
  39
  40 # .css file for the html format of the report
  41 THEME_OVERRIDES = """/* override table width restrictions */
  42 .wy-nav-content {
  43     max-width: 1200px !important;
  44 }
  45 """
  46
  47 COLORS = ["SkyBlue", "Olive", "Purple", "Coral", "Indigo", "Pink",
  48           "Chocolate", "Brown", "Magenta", "Cyan", "Orange", "Black",
  49           "Violet", "Blue", "Yellow"]
  50
  51
  52 def generate_cpta(spec, data):
  53     """Generate all formats and versions of the Continuous Performance Trending
  54     and Analysis.
  55
  56     :param spec: Specification read from the specification file.
  57     :param data: Full data set.
  58     :type spec: Specification
  59     :type data: InputData
  60     """
  61
  62     logging.info("Generating the Continuous Performance Trending and Analysis "
  63                  "...")
  64
  65     ret_code = _generate_all_charts(spec, data)
  66
  67     cmd = HTML_BUILDER.format(
  68         date=datetime.utcnow().strftime('%m/%d/%Y %H:%M UTC'),
  69         working_dir=spec.environment["paths"]["DIR[WORKING,SRC]"],
  70         build_dir=spec.environment["paths"]["DIR[BUILD,HTML]"])
  71     execute_command(cmd)
  72
  73     with open(spec.environment["paths"]["DIR[CSS_PATCH_FILE]"], "w") as \
  74             css_file:
  75         css_file.write(THEME_OVERRIDES)
  76
  77     with open(spec.environment["paths"]["DIR[CSS_PATCH_FILE2]"], "w") as \
  78             css_file:
  79         css_file.write(THEME_OVERRIDES)
  80
  81     archive_input_data(spec)
  82
  83     logging.info("Done.")
  84
  85     return ret_code
  86
  87
  88 def _select_data(in_data, period, fill_missing=False, use_first=False):
  89     """Select the data from the full data set. The selection is done by picking
  90     the samples depending on the period: period = 1: All, period = 2: every
  91     second sample, period = 3: every third sample ...
  92
  93     :param in_data: Full set of data.
  94     :param period: Sampling period.
  95     :param fill_missing: If the chosen sample is missing in the full set, its
  96     nearest neighbour is used.
  97     :param use_first: Use the first sample even though it is not chosen.
  98     :type in_data: OrderedDict
  99     :type period: int
 100     :type fill_missing: bool
 101     :type use_first: bool
 102     :returns: Reduced data.
 103     :rtype: OrderedDict
 104     """
 105
 106     first_idx = min(in_data.keys())
 107     last_idx = max(in_data.keys())
 108
 109     idx = last_idx
 110     data_dict = dict()
 111     if use_first:
 112         data_dict[first_idx] = in_data[first_idx]
 113     while idx >= first_idx:
 114         data = in_data.get(idx, None)
 115         if data is None:
 116             if fill_missing:
 117                 threshold = int(round(idx - period / 2)) + 1 - period % 2
 118                 idx_low = first_idx if threshold < first_idx else threshold
 119                 threshold = int(round(idx + period / 2))
 120                 idx_high = last_idx if threshold > last_idx else threshold
 121
 122                 flag_l = True
 123                 flag_h = True
 124                 idx_lst = list()
 125                 inc = 1
 126                 while flag_l or flag_h:
 127                     if idx + inc > idx_high:
 128                         flag_h = False
 129                     else:
 130                         idx_lst.append(idx + inc)
 131                     if idx - inc < idx_low:
 132                         flag_l = False
 133                     else:
 134                         idx_lst.append(idx - inc)
 135                     inc += 1
 136
 137                 for i in idx_lst:
 138                     if i in in_data.keys():
 139                         data_dict[i] = in_data[i]
 140                         break
 141         else:
 142             data_dict[idx] = data
 143         idx -= period
 144
 145     return OrderedDict(sorted(data_dict.items(), key=lambda t: t[0]))
 146
 147
 148 def _evaluate_results(trimmed_data, window=10):
 149     """Evaluates if the sample value is regress, normal or progress compared to
 150     previous data within the window.
 151     We use the intervals defined as:
 152     - regress: less than trimmed moving median - 3 * stdev
 153     - normal: between trimmed moving median - 3 * stdev and median + 3 * stdev
 154     - progress: more than trimmed moving median + 3 * stdev
 155     where stdev is trimmed moving standard deviation.
 156
 157     :param trimmed_data: Full data set with the outliers replaced by nan.
 158     :param window: Window size used to calculate moving average and moving stdev.
 159     :type trimmed_data: pandas.Series
 160     :type window: int
 161     :returns: Evaluated results.
 162     :rtype: list
 163     """
 164
 165     if len(trimmed_data) > 2:
 166         win_size = trimmed_data.size if trimmed_data.size < window else window
 167         results = [0.66, ]
 168         tmm = trimmed_data.rolling(window=win_size, min_periods=2).median()
 169         tmstd = trimmed_data.rolling(window=win_size, min_periods=2).std()
 170
 171         first = True
 172         for build_nr, value in trimmed_data.iteritems():
 173             if first:
 174                 first = False
 175                 continue
 176             if (np.isnan(value)
 177                     or np.isnan(tmm[build_nr])
 178                     or np.isnan(tmstd[build_nr])):
 179                 results.append(0.0)
 180             elif value < (tmm[build_nr] - 3 * tmstd[build_nr]):
 181                 results.append(0.33)
 182             elif value > (tmm[build_nr] + 3 * tmstd[build_nr]):
 183                 results.append(1.0)
 184             else:
 185                 results.append(0.66)
 186     else:
 187         results = [0.0, ]
 188         try:
 189             tmm = np.median(trimmed_data)
 190             tmstd = np.std(trimmed_data)
 191             if trimmed_data.values[-1] < (tmm - 3 * tmstd):
 192                 results.append(0.33)
 193             elif (tmm - 3 * tmstd) <= trimmed_data.values[-1] <= (
 194                     tmm + 3 * tmstd):
 195                 results.append(0.66)
 196             else:
 197                 results.append(1.0)
 198         except TypeError:
 199             results.append(None)
 200     return results
 201
 202
 203 def _generate_trending_traces(in_data, build_info, period, moving_win_size=10,
 204                               fill_missing=True, use_first=False,
 205                               show_trend_line=True, name="", color=""):
 206     """Generate the trending traces:
 207      - samples,
 208      - trimmed moving median (trending line)
 209      - outliers, regress, progress
 210
 211     :param in_data: Full data set.
 212     :param build_info: Information about the builds.
 213     :param period: Sampling period.
 214     :param moving_win_size: Window size.
 215     :param fill_missing: If the chosen sample is missing in the full set, its
 216         nearest neighbour is used.
 217     :param use_first: Use the first sample even though it is not chosen.
 218     :param show_trend_line: Show moving median (trending plot).
 219     :param name: Name of the plot
 220     :param color: Name of the color for the plot.
 221     :type in_data: OrderedDict
 222     :type build_info: dict
 223     :type period: int
 224     :type moving_win_size: int
 225     :type fill_missing: bool
 226     :type use_first: bool
 227     :type show_trend_line: bool
 228     :type name: str
 229     :type color: str
 230     :returns: Generated traces (list), the evaluated result (float) and the
 231         first and last date.
 232     :rtype: tuple(traces, result)
 233     """
 234
 235     if period > 1:
 236         in_data = _select_data(in_data, period,
 237                                fill_missing=fill_missing,
 238                                use_first=use_first)
 239
 240     data_x = list(in_data.keys())
 241     data_y = list(in_data.values())
 242
 243     hover_text = list()
 244     xaxis = list()
 245     for idx in data_x:
 246         hover_text.append("vpp-ref: {0}<br>csit-ref: mrr-daily-build-{1}".
 247                           format(build_info[str(idx)][1].rsplit('~', 1)[0],
 248                                  idx))
 249         date = build_info[str(idx)][0]
 250         xaxis.append(datetime(int(date[0:4]), int(date[4:6]), int(date[6:8]),
 251                               int(date[9:11]), int(date[12:])))
 252
 253     data_pd = pd.Series(data_y, index=xaxis)
 254
 255     t_data, outliers = split_outliers(data_pd, outlier_const=1.5,
 256                                       window=moving_win_size)
 257     results = _evaluate_results(t_data, window=moving_win_size)
 258
 259     anomalies = pd.Series()
 260     anomalies_res = list()
 261     for idx, item in enumerate(data_pd.items()):
 262         item_pd = pd.Series([item[1], ], index=[item[0], ])
 263         if item[0] in outliers.keys():
 264             anomalies = anomalies.append(item_pd)
 265             anomalies_res.append(0.0)
 266         elif results[idx] in (0.33, 1.0):
 267             anomalies = anomalies.append(item_pd)
 268             anomalies_res.append(results[idx])
 269     anomalies_res.extend([0.0, 0.33, 0.66, 1.0])
 270
 271     # Create traces
 272     color_scale = [[0.00, "grey"],
 273                    [0.25, "grey"],
 274                    [0.25, "red"],
 275                    [0.50, "red"],
 276                    [0.50, "white"],
 277                    [0.75, "white"],
 278                    [0.75, "green"],
 279                    [1.00, "green"]]
 280
 281     trace_samples = plgo.Scatter(
 282         x=xaxis,
 283         y=data_y,
 284         mode='markers',
 285         line={
 286             "width": 1
 287         },
 288         name="{name}-thput".format(name=name),
 289         marker={
 290             "size": 5,
 291             "color": color,
 292             "symbol": "circle",
 293         },
 294         text=hover_text,
 295         hoverinfo="x+y+text+name"
 296     )
 297     traces = [trace_samples, ]
 298
 299     trace_anomalies = plgo.Scatter(
 300         x=anomalies.keys(),
 301         y=anomalies.values,
 302         mode='markers',
 303         hoverinfo="none",
 304         showlegend=True,
 305         legendgroup=name,
 306         name="{name}-anomalies".format(name=name),
 307         marker={
 308             "size": 15,
 309             "symbol": "circle-open",
 310             "color": anomalies_res,
 311             "colorscale": color_scale,
 312             "showscale": True,
 313             "line": {
 314                 "width": 2
 315             },
 316             "colorbar": {
 317                 "y": 0.5,
 318                 "len": 0.8,
 319                 "title": "Circles Marking Data Classification",
 320                 "titleside": 'right',
 321                 "titlefont": {
 322                     "size": 14
 323                 },
 324                 "tickmode": 'array',
 325                 "tickvals": [0.125, 0.375, 0.625, 0.875],
 326                 "ticktext": ["Outlier", "Regression", "Normal", "Progression"],
 327                 "ticks": "",
 328                 "ticklen": 0,
 329                 "tickangle": -90,
 330                 "thickness": 10
 331             }
 332         }
 333     )
 334     traces.append(trace_anomalies)
 335
 336     if show_trend_line:
 337         data_trend = t_data.rolling(window=moving_win_size,
 338                                     min_periods=2).median()
 339         trace_trend = plgo.Scatter(
 340             x=data_trend.keys(),
 341             y=data_trend.tolist(),
 342             mode='lines',
 343             line={
 344                 "shape": "spline",
 345                 "width": 1,
 346                 "color": color,
 347             },
 348             name='{name}-trend'.format(name=name)
 349         )
 350         traces.append(trace_trend)
 351
 352     return traces, results[-1], xaxis[0], xaxis[-1]
 353
 354
 355 def _generate_chart(traces, layout, file_name):
 356     """Generates the whole chart using pre-generated traces.
 357
 358     :param traces: Traces for the chart.
 359     :param layout: Layout of the chart.
 360     :param file_name: File name for the generated chart.
 361     :type traces: list
 362     :type layout: dict
 363     :type file_name: str
 364     """
 365
 366     # Create plot
 367     logging.info("    Writing the file '{0}' ...".format(file_name))
 368     plpl = plgo.Figure(data=traces, layout=layout)
 369     try:
 370         ploff.plot(plpl, show_link=False, auto_open=False, filename=file_name)
 371     except plerr.PlotlyEmptyDataError:
 372         logging.warning(" No data for the plot. Skipped.")
 373
 374
 375 def _generate_all_charts(spec, input_data):
 376     """Generate all charts specified in the specification file.
 377
 378     :param spec: Specification.
 379     :param input_data: Full data set.
 380     :type spec: Specification
 381     :type input_data: InputData
 382     """
 383
 384     job_name = spec.cpta["data"].keys()[0]
 385
 386     builds_lst = list()
 387     for build in spec.input["builds"][job_name]:
 388         status = build["status"]
 389         if status != "failed" and status != "not found":
 390             builds_lst.append(str(build["build"]))
 391
 392     # Get "build ID": "date" dict:
 393     build_info = OrderedDict()
 394     for build in builds_lst:
 395         try:
 396             build_info[build] = (
 397                 input_data.metadata(job_name, build)["generated"][:14],
 398                 input_data.metadata(job_name, build)["version"]
 399             )
 400         except KeyError:
 401             build_info[build] = ("", "")
 402         logging.info("{}: {}, {}".format(build,
 403                                          build_info[build][0],
 404                                          build_info[build][1]))
 405
 406     # Create the header:
 407     csv_table = list()
 408     header = "Build Number:," + ",".join(builds_lst) + '\n'
 409     csv_table.append(header)
 410     build_dates = [x[0] for x in build_info.values()]
 411     header = "Build Date:," + ",".join(build_dates) + '\n'
 412     csv_table.append(header)
 413     vpp_versions = [x[1] for x in build_info.values()]
 414     header = "VPP Version:," + ",".join(vpp_versions) + '\n'
 415     csv_table.append(header)
 416
 417     results = list()
 418     for chart in spec.cpta["plots"]:
 419         logging.info("  Generating the chart '{0}' ...".
 420                      format(chart.get("title", "")))
 421
 422         # Transform the data
 423         data = input_data.filter_data(chart, continue_on_error=True)
 424         if data is None:
 425             logging.error("No data.")
 426             return
 427
 428         chart_data = dict()
 429         for job in data:
 430             for idx, build in job.items():
 431                 for test_name, test in build.items():
 432                     if chart_data.get(test_name, None) is None:
 433                         chart_data[test_name] = OrderedDict()
 434                     try:
 435                         chart_data[test_name][int(idx)] = \
 436                             test["result"]["throughput"]
 437                     except (KeyError, TypeError):
 438                         pass
 439
 440         # Add items to the csv table:
 441         for tst_name, tst_data in chart_data.items():
 442             tst_lst = list()
 443             for build in builds_lst:
 444                 item = tst_data.get(int(build), '')
 445                 tst_lst.append(str(item))
 446             csv_table.append("{0},".format(tst_name) + ",".join(tst_lst) + '\n')
 447
 448         for period in chart["periods"]:
 449             # Generate traces:
 450             traces = list()
 451             win_size = 14
 452             idx = 0
 453             for test_name, test_data in chart_data.items():
 454                 if not test_data:
 455                     logging.warning("No data for the test '{0}'".
 456                                     format(test_name))
 457                     continue
 458                 test_name = test_name.split('.')[-1]
 459                 trace, result, first_date, last_date = \
 460                     _generate_trending_traces(
 461                         test_data,
 462                         build_info=build_info,
 463                         period=period,
 464                         moving_win_size=win_size,
 465                         fill_missing=True,
 466                         use_first=False,
 467                         name='-'.join(test_name.split('-')[3:-1]),
 468                         color=COLORS[idx])
 469                 traces.extend(trace)
 470                 results.append(result)
 471                 idx += 1
 472
 473             if traces:
 474                 # Generate the chart:
 475                 chart["layout"]["xaxis"]["title"] = \
 476                     chart["layout"]["xaxis"]["title"].format(job=job_name)
 477                 delta = timedelta(days=30)
 478                 start = last_date - delta
 479                 start = first_date if start < first_date else start
 480                 chart["layout"]["xaxis"]["range"] = [str(start.date()),
 481                                                      str(last_date.date())]
 482                 _generate_chart(traces,
 483                                 chart["layout"],
 484                                 file_name="{0}-{1}-{2}{3}".format(
 485                                     spec.cpta["output-file"],
 486                                     chart["output-file-name"],
 487                                     period,
 488                                     spec.cpta["output-file-type"]))
 489
 490         logging.info("  Done.")
 491
 492     # Write the tables:
 493     file_name = spec.cpta["output-file"] + "-trending"
 494     with open("{0}.csv".format(file_name), 'w') as file_handler:
 495         file_handler.writelines(csv_table)
 496
 497     txt_table = None
 498     with open("{0}.csv".format(file_name), 'rb') as csv_file:
 499         csv_content = csv.reader(csv_file, delimiter=',', quotechar='"')
 500         line_nr = 0
 501         for row in csv_content:
 502             if txt_table is None:
 503                 txt_table = prettytable.PrettyTable(row)
 504             else:
 505                 if line_nr > 1:
 506                     for idx, item in enumerate(row):
 507                         try:
 508                             row[idx] = str(round(float(item) / 1000000, 2))
 509                         except ValueError:
 510                             pass
 511                 try:
 512                     txt_table.add_row(row)
 513                 except Exception as err:
 514                     logging.warning("Error occurred while generating TXT table:"
 515                                     "\n{0}".format(err))
 516             line_nr += 1
 517         txt_table.align["Build Number:"] = "l"
 518     with open("{0}.txt".format(file_name), "w") as txt_file:
 519         txt_file.write(str(txt_table))
 520
 521     # Evaluate result:
 522     result = "PASS"
 523     for item in results:
 524         if item is None:
 525             result = "FAIL"
 526             break
 527         if item == 0.66 and result == "PASS":
 528             result = "PASS"
 529         elif item == 0.33 or item == 0.0:
 530             result = "FAIL"
 531
 532     logging.info("Partial results: {0}".format(results))
 533     logging.info("Result: {0}".format(result))
 534
 535     return result