resources/tools/presentation/generator_CPTA.py

   1 # Copyright (c) 2018 Cisco and/or its affiliates.
   2 # Licensed under the Apache License, Version 2.0 (the "License");
   3 # you may not use this file except in compliance with the License.
   4 # You may obtain a copy of the License at:
   5 #
   6 #     http://www.apache.org/licenses/LICENSE-2.0
   7 #
   8 # Unless required by applicable law or agreed to in writing, software
   9 # distributed under the License is distributed on an "AS IS" BASIS,
  10 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  11 # See the License for the specific language governing permissions and
  12 # limitations under the License.
  13
  14 """Generation of Continuous Performance Trending and Analysis.
  15 """
  16
  17 import datetime
  18 import logging
  19 import csv
  20 import prettytable
  21 import plotly.offline as ploff
  22 import plotly.graph_objs as plgo
  23 import plotly.exceptions as plerr
  24 import numpy as np
  25 import pandas as pd
  26
  27 from collections import OrderedDict
  28 from utils import find_outliers, archive_input_data, execute_command
  29
  30
  31 # Command to build the html format of the report
  32 HTML_BUILDER = 'sphinx-build -v -c conf_cpta -a ' \
  33                '-b html -E ' \
  34                '-t html ' \
  35                '-D version="Generated on {date}" ' \
  36                '{working_dir} ' \
  37                '{build_dir}/'
  38
  39 # .css file for the html format of the report
  40 THEME_OVERRIDES = """/* override table width restrictions */
  41 .wy-nav-content {
  42     max-width: 1200px !important;
  43 }
  44 """
  45
  46 COLORS = ["SkyBlue", "Olive", "Purple", "Coral", "Indigo", "Pink",
  47           "Chocolate", "Brown", "Magenta", "Cyan", "Orange", "Black",
  48           "Violet", "Blue", "Yellow"]
  49
  50
  51 def generate_cpta(spec, data):
  52     """Generate all formats and versions of the Continuous Performance Trending
  53     and Analysis.
  54
  55     :param spec: Specification read from the specification file.
  56     :param data: Full data set.
  57     :type spec: Specification
  58     :type data: InputData
  59     """
  60
  61     logging.info("Generating the Continuous Performance Trending and Analysis "
  62                  "...")
  63
  64     ret_code = _generate_all_charts(spec, data)
  65
  66     cmd = HTML_BUILDER.format(
  67         date=datetime.date.today().strftime('%d-%b-%Y'),
  68         working_dir=spec.environment["paths"]["DIR[WORKING,SRC]"],
  69         build_dir=spec.environment["paths"]["DIR[BUILD,HTML]"])
  70     execute_command(cmd)
  71
  72     with open(spec.environment["paths"]["DIR[CSS_PATCH_FILE]"], "w") as \
  73             css_file:
  74         css_file.write(THEME_OVERRIDES)
  75
  76     with open(spec.environment["paths"]["DIR[CSS_PATCH_FILE2]"], "w") as \
  77             css_file:
  78         css_file.write(THEME_OVERRIDES)
  79
  80     archive_input_data(spec)
  81
  82     logging.info("Done.")
  83
  84     return ret_code
  85
  86
  87 def _select_data(in_data, period, fill_missing=False, use_first=False):
  88     """Select the data from the full data set. The selection is done by picking
  89     the samples depending on the period: period = 1: All, period = 2: every
  90     second sample, period = 3: every third sample ...
  91
  92     :param in_data: Full set of data.
  93     :param period: Sampling period.
  94     :param fill_missing: If the chosen sample is missing in the full set, its
  95     nearest neighbour is used.
  96     :param use_first: Use the first sample even though it is not chosen.
  97     :type in_data: OrderedDict
  98     :type period: int
  99     :type fill_missing: bool
 100     :type use_first: bool
 101     :returns: Reduced data.
 102     :rtype: OrderedDict
 103     """
 104
 105     first_idx = min(in_data.keys())
 106     last_idx = max(in_data.keys())
 107
 108     idx = last_idx
 109     data_dict = dict()
 110     if use_first:
 111         data_dict[first_idx] = in_data[first_idx]
 112     while idx >= first_idx:
 113         data = in_data.get(idx, None)
 114         if data is None:
 115             if fill_missing:
 116                 threshold = int(round(idx - period / 2)) + 1 - period % 2
 117                 idx_low = first_idx if threshold < first_idx else threshold
 118                 threshold = int(round(idx + period / 2))
 119                 idx_high = last_idx if threshold > last_idx else threshold
 120
 121                 flag_l = True
 122                 flag_h = True
 123                 idx_lst = list()
 124                 inc = 1
 125                 while flag_l or flag_h:
 126                     if idx + inc > idx_high:
 127                         flag_h = False
 128                     else:
 129                         idx_lst.append(idx + inc)
 130                     if idx - inc < idx_low:
 131                         flag_l = False
 132                     else:
 133                         idx_lst.append(idx - inc)
 134                     inc += 1
 135
 136                 for i in idx_lst:
 137                     if i in in_data.keys():
 138                         data_dict[i] = in_data[i]
 139                         break
 140         else:
 141             data_dict[idx] = data
 142         idx -= period
 143
 144     return OrderedDict(sorted(data_dict.items(), key=lambda t: t[0]))
 145
 146
 147 def _evaluate_results(in_data, trimmed_data, window=10):
 148     """Evaluates if the sample value is regress, normal or progress compared to
 149     previous data within the window.
 150     We use the intervals defined as:
 151     - regress: less than median - 3 * stdev
 152     - normal: between median - 3 * stdev and median + 3 * stdev
 153     - progress: more than median + 3 * stdev
 154
 155     :param in_data: Full data set.
 156     :param trimmed_data: Full data set without the outliers.
 157     :param window: Window size used to calculate moving median and moving stdev.
 158     :type in_data: pandas.Series
 159     :type trimmed_data: pandas.Series
 160     :type window: int
 161     :returns: Evaluated results.
 162     :rtype: list
 163     """
 164
 165     if len(in_data) > 2:
 166         win_size = in_data.size if in_data.size < window else window
 167         results = [0.0, ] * win_size
 168         median = in_data.rolling(window=win_size).median()
 169         stdev_t = trimmed_data.rolling(window=win_size, min_periods=2).std()
 170         m_vals = median.values
 171         s_vals = stdev_t.values
 172         d_vals = in_data.values
 173         for day in range(win_size, in_data.size):
 174             if np.isnan(m_vals[day - 1]) or np.isnan(s_vals[day - 1]):
 175                 results.append(0.0)
 176             elif d_vals[day] < (m_vals[day - 1] - 3 * s_vals[day - 1]):
 177                 results.append(0.33)
 178             elif (m_vals[day - 1] - 3 * s_vals[day - 1]) <= d_vals[day] <= \
 179                     (m_vals[day - 1] + 3 * s_vals[day - 1]):
 180                 results.append(0.66)
 181             else:
 182                 results.append(1.0)
 183     else:
 184         results = [0.0, ]
 185         try:
 186             median = np.median(in_data)
 187             stdev = np.std(in_data)
 188             if in_data.values[-1] < (median - 3 * stdev):
 189                 results.append(0.33)
 190             elif (median - 3 * stdev) <= in_data.values[-1] <= (
 191                     median + 3 * stdev):
 192                 results.append(0.66)
 193             else:
 194                 results.append(1.0)
 195         except TypeError:
 196             results.append(None)
 197     return results
 198
 199
 200 def _generate_trending_traces(in_data, period, moving_win_size=10,
 201                               fill_missing=True, use_first=False,
 202                               show_moving_median=True, name="", color=""):
 203     """Generate the trending traces:
 204      - samples,
 205      - moving median (trending plot)
 206      - outliers, regress, progress
 207
 208     :param in_data: Full data set.
 209     :param period: Sampling period.
 210     :param moving_win_size: Window size.
 211     :param fill_missing: If the chosen sample is missing in the full set, its
 212     nearest neighbour is used.
 213     :param use_first: Use the first sample even though it is not chosen.
 214     :param show_moving_median: Show moving median (trending plot).
 215     :param name: Name of the plot
 216     :param color: Name of the color for the plot.
 217     :type in_data: OrderedDict
 218     :type period: int
 219     :type moving_win_size: int
 220     :type fill_missing: bool
 221     :type use_first: bool
 222     :type show_moving_median: bool
 223     :type name: str
 224     :type color: str
 225     :returns: Generated traces (list) and the evaluated result (float).
 226     :rtype: tuple(traces, result)
 227     """
 228
 229     if period > 1:
 230         in_data = _select_data(in_data, period,
 231                                fill_missing=fill_missing,
 232                                use_first=use_first)
 233
 234     data_x = [key for key in in_data.keys()]
 235     data_y = [val for val in in_data.values()]
 236     data_pd = pd.Series(data_y, index=data_x)
 237
 238     t_data, outliers = find_outliers(data_pd)
 239
 240     results = _evaluate_results(data_pd, t_data, window=moving_win_size)
 241
 242     anomalies = pd.Series()
 243     anomalies_res = list()
 244     for idx, item in enumerate(in_data.items()):
 245         item_pd = pd.Series([item[1], ], index=[item[0], ])
 246         if item[0] in outliers.keys():
 247             anomalies = anomalies.append(item_pd)
 248             anomalies_res.append(0.0)
 249         elif results[idx] in (0.33, 1.0):
 250             anomalies = anomalies.append(item_pd)
 251             anomalies_res.append(results[idx])
 252     anomalies_res.extend([0.0, 0.33, 0.66, 1.0])
 253
 254     # Create traces
 255     color_scale = [[0.00, "grey"],
 256                    [0.25, "grey"],
 257                    [0.25, "red"],
 258                    [0.50, "red"],
 259                    [0.50, "white"],
 260                    [0.75, "white"],
 261                    [0.75, "green"],
 262                    [1.00, "green"]]
 263
 264     trace_samples = plgo.Scatter(
 265         x=data_x,
 266         y=data_y,
 267         mode='markers',
 268         line={
 269             "width": 1
 270         },
 271         name="{name}-thput".format(name=name),
 272         marker={
 273             "size": 5,
 274             "color": color,
 275             "symbol": "circle",
 276         },
 277     )
 278     traces = [trace_samples, ]
 279
 280     trace_anomalies = plgo.Scatter(
 281         x=anomalies.keys(),
 282         y=anomalies.values,
 283         mode='markers',
 284         hoverinfo="none",
 285         showlegend=False,
 286         legendgroup=name,
 287         name="{name}: outliers".format(name=name),
 288         marker={
 289             "size": 15,
 290             "symbol": "circle-open",
 291             "color": anomalies_res,
 292             "colorscale": color_scale,
 293             "showscale": True,
 294             "line": {
 295                 "width": 2
 296             },
 297             "colorbar": {
 298                 "y": 0.5,
 299                 "len": 0.8,
 300                 "title": "Circles Marking Data Classification",
 301                 "titleside": 'right',
 302                 "titlefont": {
 303                     "size": 14
 304                 },
 305                 "tickmode": 'array',
 306                 "tickvals": [0.125, 0.375, 0.625, 0.875],
 307                 "ticktext": ["Outlier", "Regression", "Normal", "Progression"],
 308                 "ticks": "",
 309                 "ticklen": 0,
 310                 "tickangle": -90,
 311                 "thickness": 10
 312             }
 313         }
 314     )
 315     traces.append(trace_anomalies)
 316
 317     if show_moving_median:
 318         data_mean_y = pd.Series(data_y).rolling(
 319             window=moving_win_size, min_periods=2).median()
 320         trace_median = plgo.Scatter(
 321             x=data_x,
 322             y=data_mean_y,
 323             mode='lines',
 324             line={
 325                 "shape": "spline",
 326                 "width": 1,
 327                 "color": color,
 328             },
 329             name='{name}-trend'.format(name=name)
 330         )
 331         traces.append(trace_median)
 332
 333     return traces, results[-1]
 334
 335
 336 def _generate_chart(traces, layout, file_name):
 337     """Generates the whole chart using pre-generated traces.
 338
 339     :param traces: Traces for the chart.
 340     :param layout: Layout of the chart.
 341     :param file_name: File name for the generated chart.
 342     :type traces: list
 343     :type layout: dict
 344     :type file_name: str
 345     """
 346
 347     # Create plot
 348     logging.info("    Writing the file '{0}' ...".format(file_name))
 349     plpl = plgo.Figure(data=traces, layout=layout)
 350     try:
 351         ploff.plot(plpl, show_link=False, auto_open=False, filename=file_name)
 352     except plerr.PlotlyEmptyDataError:
 353         logging.warning(" No data for the plot. Skipped.")
 354
 355
 356 def _generate_all_charts(spec, input_data):
 357     """Generate all charts specified in the specification file.
 358
 359     :param spec: Specification.
 360     :param input_data: Full data set.
 361     :type spec: Specification
 362     :type input_data: InputData
 363     """
 364
 365     builds = spec.cpta["data"].values()[0]
 366     job_name = spec.cpta["data"].keys()[0]
 367     # builds_lst = [str(build) for build in range(builds[0], builds[-1] + 1)]
 368
 369     builds_lst = list()
 370     for build in range(builds[0], builds[-1] + 1):
 371         status = spec.input["builds"][job_name][build]["status"]
 372         if status != "failed" and status != "not found":
 373             builds_lst.append(str(build))
 374     print(builds_lst)
 375     # Get "build ID": "date" dict:
 376     build_dates = dict()
 377     for build in builds_lst:
 378         try:
 379             build_dates[build] = \
 380                 input_data.metadata(job_name, build)["generated"][:14]
 381         except KeyError:
 382             pass
 383
 384     # Create the header:
 385     csv_table = list()
 386     header = "Build Number:," + ",".join(builds_lst) + '\n'
 387     csv_table.append(header)
 388     header = "Build Date:," + ",".join(build_dates.values()) + '\n'
 389     csv_table.append(header)
 390
 391     results = list()
 392     for chart in spec.cpta["plots"]:
 393         logging.info("  Generating the chart '{0}' ...".
 394                      format(chart.get("title", "")))
 395
 396         # Transform the data
 397         data = input_data.filter_data(chart, continue_on_error=True)
 398         if data is None:
 399             logging.error("No data.")
 400             return
 401
 402         chart_data = dict()
 403         for job in data:
 404             for idx, build in job.items():
 405                 for test_name, test in build.items():
 406                     if chart_data.get(test_name, None) is None:
 407                         chart_data[test_name] = OrderedDict()
 408                     try:
 409                         chart_data[test_name][int(idx)] = \
 410                             test["result"]["throughput"]
 411                     except (KeyError, TypeError):
 412                         pass
 413
 414         # Add items to the csv table:
 415         for tst_name, tst_data in chart_data.items():
 416             tst_lst = list()
 417             for build in builds_lst:
 418                 item = tst_data.get(int(build), '')
 419                 tst_lst.append(str(item) if item else '')
 420             csv_table.append("{0},".format(tst_name) + ",".join(tst_lst) + '\n')
 421
 422         for period in chart["periods"]:
 423             # Generate traces:
 424             traces = list()
 425             win_size = 10 if period == 1 else 5 if period < 20 else 3
 426             idx = 0
 427             for test_name, test_data in chart_data.items():
 428                 if not test_data:
 429                     logging.warning("No data for the test '{0}'".
 430                                     format(test_name))
 431                     continue
 432                 test_name = test_name.split('.')[-1]
 433                 trace, result = _generate_trending_traces(
 434                     test_data,
 435                     period=period,
 436                     moving_win_size=win_size,
 437                     fill_missing=True,
 438                     use_first=False,
 439                     name='-'.join(test_name.split('-')[3:-1]),
 440                     color=COLORS[idx])
 441                 traces.extend(trace)
 442                 results.append(result)
 443                 idx += 1
 444
 445             # Generate the chart:
 446             chart["layout"]["xaxis"]["title"] = \
 447                 chart["layout"]["xaxis"]["title"].format(job=job_name)
 448             _generate_chart(traces,
 449                             chart["layout"],
 450                             file_name="{0}-{1}-{2}{3}".format(
 451                                 spec.cpta["output-file"],
 452                                 chart["output-file-name"],
 453                                 period,
 454                                 spec.cpta["output-file-type"]))
 455
 456         logging.info("  Done.")
 457
 458     # Write the tables:
 459     file_name = spec.cpta["output-file"] + "-trending"
 460     with open("{0}.csv".format(file_name), 'w') as file_handler:
 461         file_handler.writelines(csv_table)
 462
 463     txt_table = None
 464     with open("{0}.csv".format(file_name), 'rb') as csv_file:
 465         csv_content = csv.reader(csv_file, delimiter=',', quotechar='"')
 466         line_nr = 0
 467         for row in csv_content:
 468             if txt_table is None:
 469                 txt_table = prettytable.PrettyTable(row)
 470             else:
 471                 if line_nr > 1:
 472                     for idx, item in enumerate(row):
 473                         try:
 474                             row[idx] = str(round(float(item) / 1000000, 2))
 475                         except ValueError:
 476                             pass
 477                 txt_table.add_row(row)
 478             line_nr += 1
 479         txt_table.align["Build Number:"] = "l"
 480     with open("{0}.txt".format(file_name), "w") as txt_file:
 481         txt_file.write(str(txt_table))
 482
 483     # Evaluate result:
 484     result = "PASS"
 485     for item in results:
 486         if item is None:
 487             result = "FAIL"
 488             break
 489         if item == 0.66 and result == "PASS":
 490             result = "PASS"
 491         elif item == 0.33 or item == 0.0:
 492             result = "FAIL"
 493
 494     logging.info("Partial results: {0}".format(results))
 495     logging.info("Result: {0}".format(result))
 496
 497     return result