resources/tools/presentation/generator_CPTA.py

   1 # Copyright (c) 2018 Cisco and/or its affiliates.
   2 # Licensed under the Apache License, Version 2.0 (the "License");
   3 # you may not use this file except in compliance with the License.
   4 # You may obtain a copy of the License at:
   5 #
   6 #     http://www.apache.org/licenses/LICENSE-2.0
   7 #
   8 # Unless required by applicable law or agreed to in writing, software
   9 # distributed under the License is distributed on an "AS IS" BASIS,
  10 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  11 # See the License for the specific language governing permissions and
  12 # limitations under the License.
  13
  14 """Generation of Continuous Performance Trending and Analysis.
  15 """
  16
  17 import multiprocessing
  18 import os
  19 import logging
  20 import csv
  21 import prettytable
  22 import plotly.offline as ploff
  23 import plotly.graph_objs as plgo
  24 import plotly.exceptions as plerr
  25 import numpy as np
  26 import pandas as pd
  27
  28 from collections import OrderedDict
  29 from datetime import datetime
  30
  31 from utils import split_outliers, archive_input_data, execute_command, Worker
  32
  33
  34 # Command to build the html format of the report
  35 HTML_BUILDER = 'sphinx-build -v -c conf_cpta -a ' \
  36                '-b html -E ' \
  37                '-t html ' \
  38                '-D version="{date}" ' \
  39                '{working_dir} ' \
  40                '{build_dir}/'
  41
  42 # .css file for the html format of the report
  43 THEME_OVERRIDES = """/* override table width restrictions */
  44 .wy-nav-content {
  45     max-width: 1200px !important;
  46 }
  47 """
  48
  49 COLORS = ["SkyBlue", "Olive", "Purple", "Coral", "Indigo", "Pink",
  50           "Chocolate", "Brown", "Magenta", "Cyan", "Orange", "Black",
  51           "Violet", "Blue", "Yellow"]
  52
  53
  54 def generate_cpta(spec, data):
  55     """Generate all formats and versions of the Continuous Performance Trending
  56     and Analysis.
  57
  58     :param spec: Specification read from the specification file.
  59     :param data: Full data set.
  60     :type spec: Specification
  61     :type data: InputData
  62     """
  63
  64     logging.info("Generating the Continuous Performance Trending and Analysis "
  65                  "...")
  66
  67     ret_code = _generate_all_charts(spec, data)
  68
  69     cmd = HTML_BUILDER.format(
  70         date=datetime.utcnow().strftime('%m/%d/%Y %H:%M UTC'),
  71         working_dir=spec.environment["paths"]["DIR[WORKING,SRC]"],
  72         build_dir=spec.environment["paths"]["DIR[BUILD,HTML]"])
  73     execute_command(cmd)
  74
  75     with open(spec.environment["paths"]["DIR[CSS_PATCH_FILE]"], "w") as \
  76             css_file:
  77         css_file.write(THEME_OVERRIDES)
  78
  79     with open(spec.environment["paths"]["DIR[CSS_PATCH_FILE2]"], "w") as \
  80             css_file:
  81         css_file.write(THEME_OVERRIDES)
  82
  83     archive_input_data(spec)
  84
  85     logging.info("Done.")
  86
  87     return ret_code
  88
  89
  90 def _evaluate_results(trimmed_data, window=10):
  91     """Evaluates if the sample value is regress, normal or progress compared to
  92     previous data within the window.
  93     We use the intervals defined as:
  94     - regress: less than trimmed moving median - 3 * stdev
  95     - normal: between trimmed moving median - 3 * stdev and median + 3 * stdev
  96     - progress: more than trimmed moving median + 3 * stdev
  97     where stdev is trimmed moving standard deviation.
  98
  99     :param trimmed_data: Full data set with the outliers replaced by nan.
 100     :param window: Window size used to calculate moving average and moving stdev.
 101     :type trimmed_data: pandas.Series
 102     :type window: int
 103     :returns: Evaluated results.
 104     :rtype: list
 105     """
 106
 107     if len(trimmed_data) > 2:
 108         win_size = trimmed_data.size if trimmed_data.size < window else window
 109         results = [0.66, ]
 110         tmm = trimmed_data.rolling(window=win_size, min_periods=2).median()
 111         tmstd = trimmed_data.rolling(window=win_size, min_periods=2).std()
 112
 113         first = True
 114         for build_nr, value in trimmed_data.iteritems():
 115             if first:
 116                 first = False
 117                 continue
 118             if (np.isnan(value)
 119                     or np.isnan(tmm[build_nr])
 120                     or np.isnan(tmstd[build_nr])):
 121                 results.append(0.0)
 122             elif value < (tmm[build_nr] - 3 * tmstd[build_nr]):
 123                 results.append(0.33)
 124             elif value > (tmm[build_nr] + 3 * tmstd[build_nr]):
 125                 results.append(1.0)
 126             else:
 127                 results.append(0.66)
 128     else:
 129         results = [0.0, ]
 130         try:
 131             tmm = np.median(trimmed_data)
 132             tmstd = np.std(trimmed_data)
 133             if trimmed_data.values[-1] < (tmm - 3 * tmstd):
 134                 results.append(0.33)
 135             elif (tmm - 3 * tmstd) <= trimmed_data.values[-1] <= (
 136                     tmm + 3 * tmstd):
 137                 results.append(0.66)
 138             else:
 139                 results.append(1.0)
 140         except TypeError:
 141             results.append(None)
 142     return results
 143
 144
 145 def _generate_trending_traces(in_data, build_info, moving_win_size=10,
 146                               show_trend_line=True, name="", color=""):
 147     """Generate the trending traces:
 148      - samples,
 149      - trimmed moving median (trending line)
 150      - outliers, regress, progress
 151
 152     :param in_data: Full data set.
 153     :param build_info: Information about the builds.
 154     :param moving_win_size: Window size.
 155     :param show_trend_line: Show moving median (trending plot).
 156     :param name: Name of the plot
 157     :param color: Name of the color for the plot.
 158     :type in_data: OrderedDict
 159     :type build_info: dict
 160     :type moving_win_size: int
 161     :type show_trend_line: bool
 162     :type name: str
 163     :type color: str
 164     :returns: Generated traces (list) and the evaluated result.
 165     :rtype: tuple(traces, result)
 166     """
 167
 168     data_x = list(in_data.keys())
 169     data_y = list(in_data.values())
 170
 171     hover_text = list()
 172     xaxis = list()
 173     for idx in data_x:
 174         hover_text.append("vpp-ref: {0}<br>csit-ref: mrr-daily-build-{1}".
 175                           format(build_info[str(idx)][1].rsplit('~', 1)[0],
 176                                  idx))
 177         date = build_info[str(idx)][0]
 178         xaxis.append(datetime(int(date[0:4]), int(date[4:6]), int(date[6:8]),
 179                               int(date[9:11]), int(date[12:])))
 180
 181     data_pd = pd.Series(data_y, index=xaxis)
 182
 183     t_data, outliers = split_outliers(data_pd, outlier_const=1.5,
 184                                       window=moving_win_size)
 185     results = _evaluate_results(t_data, window=moving_win_size)
 186
 187     anomalies = pd.Series()
 188     anomalies_res = list()
 189     for idx, item in enumerate(data_pd.items()):
 190         item_pd = pd.Series([item[1], ], index=[item[0], ])
 191         if item[0] in outliers.keys():
 192             anomalies = anomalies.append(item_pd)
 193             anomalies_res.append(0.0)
 194         elif results[idx] in (0.33, 1.0):
 195             anomalies = anomalies.append(item_pd)
 196             anomalies_res.append(results[idx])
 197     anomalies_res.extend([0.0, 0.33, 0.66, 1.0])
 198
 199     # Create traces
 200     color_scale = [[0.00, "grey"],
 201                    [0.25, "grey"],
 202                    [0.25, "red"],
 203                    [0.50, "red"],
 204                    [0.50, "white"],
 205                    [0.75, "white"],
 206                    [0.75, "green"],
 207                    [1.00, "green"]]
 208
 209     trace_samples = plgo.Scatter(
 210         x=xaxis,
 211         y=data_y,
 212         mode='markers',
 213         line={
 214             "width": 1
 215         },
 216         legendgroup=name,
 217         name="{name}-thput".format(name=name),
 218         marker={
 219             "size": 5,
 220             "color": color,
 221             "symbol": "circle",
 222         },
 223         text=hover_text,
 224         hoverinfo="x+y+text+name"
 225     )
 226     traces = [trace_samples, ]
 227
 228     trace_anomalies = plgo.Scatter(
 229         x=anomalies.keys(),
 230         y=anomalies.values,
 231         mode='markers',
 232         hoverinfo="none",
 233         showlegend=True,
 234         legendgroup=name,
 235         name="{name}-anomalies".format(name=name),
 236         marker={
 237             "size": 15,
 238             "symbol": "circle-open",
 239             "color": anomalies_res,
 240             "colorscale": color_scale,
 241             "showscale": True,
 242             "line": {
 243                 "width": 2
 244             },
 245             "colorbar": {
 246                 "y": 0.5,
 247                 "len": 0.8,
 248                 "title": "Circles Marking Data Classification",
 249                 "titleside": 'right',
 250                 "titlefont": {
 251                     "size": 14
 252                 },
 253                 "tickmode": 'array',
 254                 "tickvals": [0.125, 0.375, 0.625, 0.875],
 255                 "ticktext": ["Outlier", "Regression", "Normal", "Progression"],
 256                 "ticks": "",
 257                 "ticklen": 0,
 258                 "tickangle": -90,
 259                 "thickness": 10
 260             }
 261         }
 262     )
 263     traces.append(trace_anomalies)
 264
 265     if show_trend_line:
 266         data_trend = t_data.rolling(window=moving_win_size,
 267                                     min_periods=2).median()
 268         trace_trend = plgo.Scatter(
 269             x=data_trend.keys(),
 270             y=data_trend.tolist(),
 271             mode='lines',
 272             line={
 273                 "shape": "spline",
 274                 "width": 1,
 275                 "color": color,
 276             },
 277             legendgroup=name,
 278             name='{name}-trend'.format(name=name)
 279         )
 280         traces.append(trace_trend)
 281
 282     return traces, results[-1]
 283
 284
 285 def _generate_all_charts(spec, input_data):
 286     """Generate all charts specified in the specification file.
 287
 288     :param spec: Specification.
 289     :param input_data: Full data set.
 290     :type spec: Specification
 291     :type input_data: InputData
 292     """
 293
 294     def _generate_chart(_, data_q, graph):
 295         """Generates the chart.
 296         """
 297
 298         logs = list()
 299
 300         logging.info("  Generating the chart '{0}' ...".
 301                      format(graph.get("title", "")))
 302         logs.append(("INFO", "  Generating the chart '{0}' ...".
 303                      format(graph.get("title", ""))))
 304
 305         job_name = spec.cpta["data"].keys()[0]
 306
 307         csv_tbl = list()
 308         res = list()
 309
 310         # Transform the data
 311         logs.append(("INFO", "    Creating the data set for the {0} '{1}'.".
 312                      format(graph.get("type", ""), graph.get("title", ""))))
 313         data = input_data.filter_data(graph, continue_on_error=True)
 314         if data is None:
 315             logging.error("No data.")
 316             return
 317
 318         chart_data = dict()
 319         for job in data:
 320             for index, bld in job.items():
 321                 for test_name, test in bld.items():
 322                     if chart_data.get(test_name, None) is None:
 323                         chart_data[test_name] = OrderedDict()
 324                     try:
 325                         chart_data[test_name][int(index)] = \
 326                             test["result"]["throughput"]
 327                     except (KeyError, TypeError):
 328                         pass
 329
 330         # Add items to the csv table:
 331         for tst_name, tst_data in chart_data.items():
 332             tst_lst = list()
 333             for bld in builds_lst:
 334                 itm = tst_data.get(int(bld), '')
 335                 tst_lst.append(str(itm))
 336             csv_tbl.append("{0},".format(tst_name) + ",".join(tst_lst) + '\n')
 337         # Generate traces:
 338         traces = list()
 339         win_size = 14
 340         index = 0
 341         for test_name, test_data in chart_data.items():
 342             if not test_data:
 343                 logs.append(("WARNING", "No data for the test '{0}'".
 344                              format(test_name)))
 345                 continue
 346             test_name = test_name.split('.')[-1]
 347             trace, rslt = _generate_trending_traces(
 348                 test_data,
 349                 build_info=build_info,
 350                 moving_win_size=win_size,
 351                 name='-'.join(test_name.split('-')[3:-1]),
 352                 color=COLORS[index])
 353             traces.extend(trace)
 354             res.append(rslt)
 355             index += 1
 356
 357         if traces:
 358             # Generate the chart:
 359             graph["layout"]["xaxis"]["title"] = \
 360                 graph["layout"]["xaxis"]["title"].format(job=job_name)
 361             name_file = "{0}-{1}{2}".format(spec.cpta["output-file"],
 362                                             graph["output-file-name"],
 363                                             spec.cpta["output-file-type"])
 364
 365             logs.append(("INFO", "    Writing the file '{0}' ...".
 366                          format(name_file)))
 367             plpl = plgo.Figure(data=traces, layout=graph["layout"])
 368             try:
 369                 ploff.plot(plpl, show_link=False, auto_open=False,
 370                            filename=name_file)
 371             except plerr.PlotlyEmptyDataError:
 372                 logs.append(("WARNING", "No data for the plot. Skipped."))
 373
 374         logging.info("  Done.")
 375
 376         data_out = {
 377             "csv_table": csv_tbl,
 378             "results": res,
 379             "logs": logs
 380         }
 381         data_q.put(data_out)
 382
 383     job_name = spec.cpta["data"].keys()[0]
 384
 385     builds_lst = list()
 386     for build in spec.input["builds"][job_name]:
 387         status = build["status"]
 388         if status != "failed" and status != "not found":
 389             builds_lst.append(str(build["build"]))
 390
 391     # Get "build ID": "date" dict:
 392     build_info = OrderedDict()
 393     for build in builds_lst:
 394         try:
 395             build_info[build] = (
 396                 input_data.metadata(job_name, build)["generated"][:14],
 397                 input_data.metadata(job_name, build)["version"]
 398             )
 399         except KeyError:
 400             build_info[build] = ("", "")
 401
 402     work_queue = multiprocessing.JoinableQueue()
 403     manager = multiprocessing.Manager()
 404     data_queue = manager.Queue()
 405     cpus = multiprocessing.cpu_count()
 406
 407     workers = list()
 408     for cpu in range(cpus):
 409         worker = Worker(work_queue,
 410                         data_queue,
 411                         _generate_chart)
 412         worker.daemon = True
 413         worker.start()
 414         workers.append(worker)
 415         os.system("taskset -p -c {0} {1} > /dev/null 2>&1".
 416                   format(cpu, worker.pid))
 417
 418     for chart in spec.cpta["plots"]:
 419         work_queue.put((chart, ))
 420     work_queue.join()
 421
 422     results = list()
 423
 424     # Create the header:
 425     csv_table = list()
 426     header = "Build Number:," + ",".join(builds_lst) + '\n'
 427     csv_table.append(header)
 428     build_dates = [x[0] for x in build_info.values()]
 429     header = "Build Date:," + ",".join(build_dates) + '\n'
 430     csv_table.append(header)
 431     vpp_versions = [x[1] for x in build_info.values()]
 432     header = "VPP Version:," + ",".join(vpp_versions) + '\n'
 433     csv_table.append(header)
 434
 435     while not data_queue.empty():
 436         result = data_queue.get()
 437
 438         results.extend(result["results"])
 439         csv_table.extend(result["csv_table"])
 440
 441         for item in result["logs"]:
 442             if item[0] == "INFO":
 443                 logging.info(item[1])
 444             elif item[0] == "ERROR":
 445                 logging.error(item[1])
 446             elif item[0] == "DEBUG":
 447                 logging.debug(item[1])
 448             elif item[0] == "CRITICAL":
 449                 logging.critical(item[1])
 450             elif item[0] == "WARNING":
 451                 logging.warning(item[1])
 452
 453     del data_queue
 454
 455     # Terminate all workers
 456     for worker in workers:
 457         worker.terminate()
 458         worker.join()
 459
 460     # Write the tables:
 461     file_name = spec.cpta["output-file"] + "-trending"
 462     with open("{0}.csv".format(file_name), 'w') as file_handler:
 463         file_handler.writelines(csv_table)
 464
 465     txt_table = None
 466     with open("{0}.csv".format(file_name), 'rb') as csv_file:
 467         csv_content = csv.reader(csv_file, delimiter=',', quotechar='"')
 468         line_nr = 0
 469         for row in csv_content:
 470             if txt_table is None:
 471                 txt_table = prettytable.PrettyTable(row)
 472             else:
 473                 if line_nr > 1:
 474                     for idx, item in enumerate(row):
 475                         try:
 476                             row[idx] = str(round(float(item) / 1000000, 2))
 477                         except ValueError:
 478                             pass
 479                 try:
 480                     txt_table.add_row(row)
 481                 except Exception as err:
 482                     logging.warning("Error occurred while generating TXT table:"
 483                                     "\n{0}".format(err))
 484             line_nr += 1
 485         txt_table.align["Build Number:"] = "l"
 486     with open("{0}.txt".format(file_name), "w") as txt_file:
 487         txt_file.write(str(txt_table))
 488
 489     # Evaluate result:
 490     result = "PASS"
 491     for item in results:
 492         if item is None:
 493             result = "FAIL"
 494             break
 495         if item == 0.66 and result == "PASS":
 496             result = "PASS"
 497         elif item == 0.33 or item == 0.0:
 498             result = "FAIL"
 499
 500     logging.info("Partial results: {0}".format(results))
 501     logging.info("Result: {0}".format(result))
 502
 503     return result