resources/tools/presentation/generator_CPTA.py

   1 # Copyright (c) 2018 Cisco and/or its affiliates.
   2 # Licensed under the Apache License, Version 2.0 (the "License");
   3 # you may not use this file except in compliance with the License.
   4 # You may obtain a copy of the License at:
   5 #
   6 #     http://www.apache.org/licenses/LICENSE-2.0
   7 #
   8 # Unless required by applicable law or agreed to in writing, software
   9 # distributed under the License is distributed on an "AS IS" BASIS,
  10 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  11 # See the License for the specific language governing permissions and
  12 # limitations under the License.
  13
  14 """Generation of Continuous Performance Trending and Analysis.
  15 """
  16
  17 import datetime
  18 import logging
  19 import csv
  20 import prettytable
  21 import plotly.offline as ploff
  22 import plotly.graph_objs as plgo
  23 import plotly.exceptions as plerr
  24 import numpy as np
  25 import pandas as pd
  26
  27 from collections import OrderedDict
  28 from utils import find_outliers, archive_input_data, execute_command
  29
  30
  31 # Command to build the html format of the report
  32 HTML_BUILDER = 'sphinx-build -v -c conf_cpta -a ' \
  33                '-b html -E ' \
  34                '-t html ' \
  35                '-D version="Generated on {date}" ' \
  36                '{working_dir} ' \
  37                '{build_dir}/'
  38
  39 # .css file for the html format of the report
  40 THEME_OVERRIDES = """/* override table width restrictions */
  41 .wy-nav-content {
  42     max-width: 1200px !important;
  43 }
  44 """
  45
  46 COLORS = ["SkyBlue", "Olive", "Purple", "Coral", "Indigo", "Pink",
  47           "Chocolate", "Brown", "Magenta", "Cyan", "Orange", "Black",
  48           "Violet", "Blue", "Yellow"]
  49
  50
  51 def generate_cpta(spec, data):
  52     """Generate all formats and versions of the Continuous Performance Trending
  53     and Analysis.
  54
  55     :param spec: Specification read from the specification file.
  56     :param data: Full data set.
  57     :type spec: Specification
  58     :type data: InputData
  59     """
  60
  61     logging.info("Generating the Continuous Performance Trending and Analysis "
  62                  "...")
  63
  64     ret_code = _generate_all_charts(spec, data)
  65
  66     cmd = HTML_BUILDER.format(
  67         date=datetime.date.today().strftime('%d-%b-%Y'),
  68         working_dir=spec.environment["paths"]["DIR[WORKING,SRC]"],
  69         build_dir=spec.environment["paths"]["DIR[BUILD,HTML]"])
  70     execute_command(cmd)
  71
  72     with open(spec.environment["paths"]["DIR[CSS_PATCH_FILE]"], "w") as \
  73             css_file:
  74         css_file.write(THEME_OVERRIDES)
  75
  76     with open(spec.environment["paths"]["DIR[CSS_PATCH_FILE2]"], "w") as \
  77             css_file:
  78         css_file.write(THEME_OVERRIDES)
  79
  80     archive_input_data(spec)
  81
  82     logging.info("Done.")
  83
  84     return ret_code
  85
  86
  87 def _select_data(in_data, period, fill_missing=False, use_first=False):
  88     """Select the data from the full data set. The selection is done by picking
  89     the samples depending on the period: period = 1: All, period = 2: every
  90     second sample, period = 3: every third sample ...
  91
  92     :param in_data: Full set of data.
  93     :param period: Sampling period.
  94     :param fill_missing: If the chosen sample is missing in the full set, its
  95     nearest neighbour is used.
  96     :param use_first: Use the first sample even though it is not chosen.
  97     :type in_data: OrderedDict
  98     :type period: int
  99     :type fill_missing: bool
 100     :type use_first: bool
 101     :returns: Reduced data.
 102     :rtype: OrderedDict
 103     """
 104
 105     first_idx = min(in_data.keys())
 106     last_idx = max(in_data.keys())
 107
 108     idx = last_idx
 109     data_dict = dict()
 110     if use_first:
 111         data_dict[first_idx] = in_data[first_idx]
 112     while idx >= first_idx:
 113         data = in_data.get(idx, None)
 114         if data is None:
 115             if fill_missing:
 116                 threshold = int(round(idx - period / 2)) + 1 - period % 2
 117                 idx_low = first_idx if threshold < first_idx else threshold
 118                 threshold = int(round(idx + period / 2))
 119                 idx_high = last_idx if threshold > last_idx else threshold
 120
 121                 flag_l = True
 122                 flag_h = True
 123                 idx_lst = list()
 124                 inc = 1
 125                 while flag_l or flag_h:
 126                     if idx + inc > idx_high:
 127                         flag_h = False
 128                     else:
 129                         idx_lst.append(idx + inc)
 130                     if idx - inc < idx_low:
 131                         flag_l = False
 132                     else:
 133                         idx_lst.append(idx - inc)
 134                     inc += 1
 135
 136                 for i in idx_lst:
 137                     if i in in_data.keys():
 138                         data_dict[i] = in_data[i]
 139                         break
 140         else:
 141             data_dict[idx] = data
 142         idx -= period
 143
 144     return OrderedDict(sorted(data_dict.items(), key=lambda t: t[0]))
 145
 146
 147 def _evaluate_results(in_data, trimmed_data, window=10):
 148     """Evaluates if the sample value is regress, normal or progress compared to
 149     previous data within the window.
 150     We use the intervals defined as:
 151     - regress: less than median - 3 * stdev
 152     - normal: between median - 3 * stdev and median + 3 * stdev
 153     - progress: more than median + 3 * stdev
 154
 155     :param in_data: Full data set.
 156     :param trimmed_data: Full data set without the outliers.
 157     :param window: Window size used to calculate moving median and moving stdev.
 158     :type in_data: pandas.Series
 159     :type trimmed_data: pandas.Series
 160     :type window: int
 161     :returns: Evaluated results.
 162     :rtype: list
 163     """
 164
 165     if len(in_data) > 2:
 166         win_size = in_data.size if in_data.size < window else window
 167         results = [0.0, ]
 168         median = in_data.rolling(window=win_size).median()
 169         stdev_t = trimmed_data.rolling(window=win_size, min_periods=2).std()
 170         m_vals = median.values
 171         s_vals = stdev_t.values
 172         d_vals = in_data.values
 173         for day in range(1, in_data.size):
 174             if np.isnan(m_vals[day]) \
 175                     or np.isnan(s_vals[day]) \
 176                     or np.isnan(d_vals[day]):
 177                 results.append(0.0)
 178             elif d_vals[day] < (m_vals[day] - 3 * s_vals[day]):
 179                 results.append(0.33)
 180             elif (m_vals[day] - 3 * s_vals[day]) <= d_vals[day] <= \
 181                     (m_vals[day] + 3 * s_vals[day]):
 182                 results.append(0.66)
 183             else:
 184                 results.append(1.0)
 185     else:
 186         results = [0.0, ]
 187         try:
 188             median = np.median(in_data)
 189             stdev = np.std(in_data)
 190             if in_data.values[-1] < (median - 3 * stdev):
 191                 results.append(0.33)
 192             elif (median - 3 * stdev) <= in_data.values[-1] <= (
 193                     median + 3 * stdev):
 194                 results.append(0.66)
 195             else:
 196                 results.append(1.0)
 197         except TypeError:
 198             results.append(None)
 199     return results
 200
 201
 202 def _generate_trending_traces(in_data, build_info, period, moving_win_size=10,
 203                               fill_missing=True, use_first=False,
 204                               show_moving_median=True, name="", color=""):
 205     """Generate the trending traces:
 206      - samples,
 207      - moving median (trending plot)
 208      - outliers, regress, progress
 209
 210     :param in_data: Full data set.
 211     :param build_info: Information about the builds.
 212     :param period: Sampling period.
 213     :param moving_win_size: Window size.
 214     :param fill_missing: If the chosen sample is missing in the full set, its
 215     nearest neighbour is used.
 216     :param use_first: Use the first sample even though it is not chosen.
 217     :param show_moving_median: Show moving median (trending plot).
 218     :param name: Name of the plot
 219     :param color: Name of the color for the plot.
 220     :type in_data: OrderedDict
 221     :type build_info: dict
 222     :type period: int
 223     :type moving_win_size: int
 224     :type fill_missing: bool
 225     :type use_first: bool
 226     :type show_moving_median: bool
 227     :type name: str
 228     :type color: str
 229     :returns: Generated traces (list) and the evaluated result (float).
 230     :rtype: tuple(traces, result)
 231     """
 232
 233     if period > 1:
 234         in_data = _select_data(in_data, period,
 235                                fill_missing=fill_missing,
 236                                use_first=use_first)
 237     # try:
 238     #     data_x = ["{0}/{1}".format(key, build_info[str(key)][1].split("~")[-1])
 239     #               for key in in_data.keys()]
 240     # except KeyError:
 241     #     data_x = [key for key in in_data.keys()]
 242     hover_text = ["vpp-build: {0}".format(x[1].split("~")[-1])
 243                   for x in build_info.values()]
 244     data_x = [key for key in in_data.keys()]
 245
 246     data_y = [val for val in in_data.values()]
 247     data_pd = pd.Series(data_y, index=data_x)
 248
 249     t_data, outliers = find_outliers(data_pd, outlier_const=1.5)
 250
 251     results = _evaluate_results(data_pd, t_data, window=moving_win_size)
 252
 253     anomalies = pd.Series()
 254     anomalies_res = list()
 255     for idx, item in enumerate(in_data.items()):
 256         # item_pd = pd.Series([item[1], ],
 257         #                     index=["{0}/{1}".
 258         #                     format(item[0],
 259         #                            build_info[str(item[0])][1].split("~")[-1]),
 260         #                            ])
 261         item_pd = pd.Series([item[1], ], index=[item[0], ])
 262         if item[0] in outliers.keys():
 263             anomalies = anomalies.append(item_pd)
 264             anomalies_res.append(0.0)
 265         elif results[idx] in (0.33, 1.0):
 266             anomalies = anomalies.append(item_pd)
 267             anomalies_res.append(results[idx])
 268     anomalies_res.extend([0.0, 0.33, 0.66, 1.0])
 269
 270     # Create traces
 271     color_scale = [[0.00, "grey"],
 272                    [0.25, "grey"],
 273                    [0.25, "red"],
 274                    [0.50, "red"],
 275                    [0.50, "white"],
 276                    [0.75, "white"],
 277                    [0.75, "green"],
 278                    [1.00, "green"]]
 279
 280     trace_samples = plgo.Scatter(
 281         x=data_x,
 282         y=data_y,
 283         mode='markers',
 284         line={
 285             "width": 1
 286         },
 287         name="{name}-thput".format(name=name),
 288         marker={
 289             "size": 5,
 290             "color": color,
 291             "symbol": "circle",
 292         },
 293         text=hover_text,
 294         hoverinfo="x+y+text+name"
 295     )
 296     traces = [trace_samples, ]
 297
 298     trace_anomalies = plgo.Scatter(
 299         x=anomalies.keys(),
 300         y=anomalies.values,
 301         mode='markers',
 302         hoverinfo="none",
 303         showlegend=False,
 304         legendgroup=name,
 305         name="{name}: outliers".format(name=name),
 306         marker={
 307             "size": 15,
 308             "symbol": "circle-open",
 309             "color": anomalies_res,
 310             "colorscale": color_scale,
 311             "showscale": True,
 312             "line": {
 313                 "width": 2
 314             },
 315             "colorbar": {
 316                 "y": 0.5,
 317                 "len": 0.8,
 318                 "title": "Circles Marking Data Classification",
 319                 "titleside": 'right',
 320                 "titlefont": {
 321                     "size": 14
 322                 },
 323                 "tickmode": 'array',
 324                 "tickvals": [0.125, 0.375, 0.625, 0.875],
 325                 "ticktext": ["Outlier", "Regression", "Normal", "Progression"],
 326                 "ticks": "",
 327                 "ticklen": 0,
 328                 "tickangle": -90,
 329                 "thickness": 10
 330             }
 331         }
 332     )
 333     traces.append(trace_anomalies)
 334
 335     if show_moving_median:
 336         data_mean_y = pd.Series(data_y).rolling(
 337             window=moving_win_size, min_periods=2).median()
 338         trace_median = plgo.Scatter(
 339             x=data_x,
 340             y=data_mean_y,
 341             mode='lines',
 342             line={
 343                 "shape": "spline",
 344                 "width": 1,
 345                 "color": color,
 346             },
 347             name='{name}-trend'.format(name=name)
 348         )
 349         traces.append(trace_median)
 350
 351     return traces, results[-1]
 352
 353
 354 def _generate_chart(traces, layout, file_name):
 355     """Generates the whole chart using pre-generated traces.
 356
 357     :param traces: Traces for the chart.
 358     :param layout: Layout of the chart.
 359     :param file_name: File name for the generated chart.
 360     :type traces: list
 361     :type layout: dict
 362     :type file_name: str
 363     """
 364
 365     # Create plot
 366     logging.info("    Writing the file '{0}' ...".format(file_name))
 367     plpl = plgo.Figure(data=traces, layout=layout)
 368     try:
 369         ploff.plot(plpl, show_link=False, auto_open=False, filename=file_name)
 370     except plerr.PlotlyEmptyDataError:
 371         logging.warning(" No data for the plot. Skipped.")
 372
 373
 374 def _generate_all_charts(spec, input_data):
 375     """Generate all charts specified in the specification file.
 376
 377     :param spec: Specification.
 378     :param input_data: Full data set.
 379     :type spec: Specification
 380     :type input_data: InputData
 381     """
 382
 383     job_name = spec.cpta["data"].keys()[0]
 384
 385     builds_lst = list()
 386     for build in spec.input["builds"][job_name]:
 387         status = build["status"]
 388         if status != "failed" and status != "not found":
 389             builds_lst.append(str(build["build"]))
 390
 391     # Get "build ID": "date" dict:
 392     build_info = OrderedDict()
 393     for build in builds_lst:
 394         try:
 395             build_info[build] = (
 396                 input_data.metadata(job_name, build)["generated"][:14],
 397                 input_data.metadata(job_name, build)["version"]
 398             )
 399         except KeyError:
 400             build_info[build] = ("", "")
 401         logging.info("{}: {}, {}".format(build,
 402                                          build_info[build][0],
 403                                          build_info[build][1]))
 404
 405     # Create the header:
 406     csv_table = list()
 407     header = "Build Number:," + ",".join(builds_lst) + '\n'
 408     csv_table.append(header)
 409     build_dates = [x[0] for x in build_info.values()]
 410     header = "Build Date:," + ",".join(build_dates) + '\n'
 411     csv_table.append(header)
 412     vpp_versions = [x[1] for x in build_info.values()]
 413     header = "VPP Version:," + ",".join(vpp_versions) + '\n'
 414     csv_table.append(header)
 415
 416     results = list()
 417     for chart in spec.cpta["plots"]:
 418         logging.info("  Generating the chart '{0}' ...".
 419                      format(chart.get("title", "")))
 420
 421         # Transform the data
 422         data = input_data.filter_data(chart, continue_on_error=True)
 423         if data is None:
 424             logging.error("No data.")
 425             return
 426
 427         chart_data = dict()
 428         for job in data:
 429             for idx, build in job.items():
 430                 for test_name, test in build.items():
 431                     if chart_data.get(test_name, None) is None:
 432                         chart_data[test_name] = OrderedDict()
 433                     try:
 434                         chart_data[test_name][int(idx)] = \
 435                             test["result"]["throughput"]
 436                     except (KeyError, TypeError):
 437                         pass
 438
 439         # Add items to the csv table:
 440         for tst_name, tst_data in chart_data.items():
 441             tst_lst = list()
 442             for build in builds_lst:
 443                 item = tst_data.get(int(build), '')
 444                 tst_lst.append(str(item) if item else '')
 445             csv_table.append("{0},".format(tst_name) + ",".join(tst_lst) + '\n')
 446
 447         for period in chart["periods"]:
 448             # Generate traces:
 449             traces = list()
 450             win_size = 14 if period == 1 else 5 if period < 20 else 3
 451             idx = 0
 452             for test_name, test_data in chart_data.items():
 453                 if not test_data:
 454                     logging.warning("No data for the test '{0}'".
 455                                     format(test_name))
 456                     continue
 457                 test_name = test_name.split('.')[-1]
 458                 trace, result = _generate_trending_traces(
 459                     test_data,
 460                     build_info=build_info,
 461                     period=period,
 462                     moving_win_size=win_size,
 463                     fill_missing=True,
 464                     use_first=False,
 465                     name='-'.join(test_name.split('-')[3:-1]),
 466                     color=COLORS[idx])
 467                 traces.extend(trace)
 468                 results.append(result)
 469                 idx += 1
 470
 471             # Generate the chart:
 472             chart["layout"]["xaxis"]["title"] = \
 473                 chart["layout"]["xaxis"]["title"].format(job=job_name)
 474             _generate_chart(traces,
 475                             chart["layout"],
 476                             file_name="{0}-{1}-{2}{3}".format(
 477                                 spec.cpta["output-file"],
 478                                 chart["output-file-name"],
 479                                 period,
 480                                 spec.cpta["output-file-type"]))
 481
 482         logging.info("  Done.")
 483
 484     # Write the tables:
 485     file_name = spec.cpta["output-file"] + "-trending"
 486     with open("{0}.csv".format(file_name), 'w') as file_handler:
 487         file_handler.writelines(csv_table)
 488
 489     txt_table = None
 490     with open("{0}.csv".format(file_name), 'rb') as csv_file:
 491         csv_content = csv.reader(csv_file, delimiter=',', quotechar='"')
 492         line_nr = 0
 493         for row in csv_content:
 494             if txt_table is None:
 495                 txt_table = prettytable.PrettyTable(row)
 496             else:
 497                 if line_nr > 1:
 498                     for idx, item in enumerate(row):
 499                         try:
 500                             row[idx] = str(round(float(item) / 1000000, 2))
 501                         except ValueError:
 502                             pass
 503                 try:
 504                     txt_table.add_row(row)
 505                 except Exception as err:
 506                     logging.warning("Error occurred while generating TXT table:"
 507                                     "\n{0}".format(err))
 508             line_nr += 1
 509         txt_table.align["Build Number:"] = "l"
 510     with open("{0}.txt".format(file_name), "w") as txt_file:
 511         txt_file.write(str(txt_table))
 512
 513     # Evaluate result:
 514     result = "PASS"
 515     for item in results:
 516         if item is None:
 517             result = "FAIL"
 518             break
 519         if item == 0.66 and result == "PASS":
 520             result = "PASS"
 521         elif item == 0.33 or item == 0.0:
 522             result = "FAIL"
 523
 524     logging.info("Partial results: {0}".format(results))
 525     logging.info("Result: {0}".format(result))
 526
 527     return result