resources/tools/presentation/generator_CPTA.py

   1 # Copyright (c) 2018 Cisco and/or its affiliates.
   2 # Licensed under the Apache License, Version 2.0 (the "License");
   3 # you may not use this file except in compliance with the License.
   4 # You may obtain a copy of the License at:
   5 #
   6 #     http://www.apache.org/licenses/LICENSE-2.0
   7 #
   8 # Unless required by applicable law or agreed to in writing, software
   9 # distributed under the License is distributed on an "AS IS" BASIS,
  10 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  11 # See the License for the specific language governing permissions and
  12 # limitations under the License.
  13
  14 """Generation of Continuous Performance Trending and Analysis.
  15 """
  16
  17 import multiprocessing
  18 import os
  19 import logging
  20 import csv
  21 import prettytable
  22 import plotly.offline as ploff
  23 import plotly.graph_objs as plgo
  24 import plotly.exceptions as plerr
  25 import pandas as pd
  26
  27 from collections import OrderedDict
  28 from datetime import datetime
  29
  30 from utils import split_outliers, archive_input_data, execute_command,\
  31     classify_anomalies, Worker
  32
  33
  34 # Command to build the html format of the report
  35 HTML_BUILDER = 'sphinx-build -v -c conf_cpta -a ' \
  36                '-b html -E ' \
  37                '-t html ' \
  38                '-D version="{date}" ' \
  39                '{working_dir} ' \
  40                '{build_dir}/'
  41
  42 # .css file for the html format of the report
  43 THEME_OVERRIDES = """/* override table width restrictions */
  44 .wy-nav-content {
  45     max-width: 1200px !important;
  46 }
  47 """
  48
  49 COLORS = ["SkyBlue", "Olive", "Purple", "Coral", "Indigo", "Pink",
  50           "Chocolate", "Brown", "Magenta", "Cyan", "Orange", "Black",
  51           "Violet", "Blue", "Yellow"]
  52
  53
  54 def generate_cpta(spec, data):
  55     """Generate all formats and versions of the Continuous Performance Trending
  56     and Analysis.
  57
  58     :param spec: Specification read from the specification file.
  59     :param data: Full data set.
  60     :type spec: Specification
  61     :type data: InputData
  62     """
  63
  64     logging.info("Generating the Continuous Performance Trending and Analysis "
  65                  "...")
  66
  67     ret_code = _generate_all_charts(spec, data)
  68
  69     cmd = HTML_BUILDER.format(
  70         date=datetime.utcnow().strftime('%m/%d/%Y %H:%M UTC'),
  71         working_dir=spec.environment["paths"]["DIR[WORKING,SRC]"],
  72         build_dir=spec.environment["paths"]["DIR[BUILD,HTML]"])
  73     execute_command(cmd)
  74
  75     with open(spec.environment["paths"]["DIR[CSS_PATCH_FILE]"], "w") as \
  76             css_file:
  77         css_file.write(THEME_OVERRIDES)
  78
  79     with open(spec.environment["paths"]["DIR[CSS_PATCH_FILE2]"], "w") as \
  80             css_file:
  81         css_file.write(THEME_OVERRIDES)
  82
  83     archive_input_data(spec)
  84
  85     logging.info("Done.")
  86
  87     return ret_code
  88
  89
  90 def _generate_trending_traces(in_data, job_name, build_info, moving_win_size=10,
  91                               show_trend_line=True, name="", color=""):
  92     """Generate the trending traces:
  93      - samples,
  94      - trimmed moving median (trending line)
  95      - outliers, regress, progress
  96
  97     :param in_data: Full data set.
  98     :param job_name: The name of job which generated the data.
  99     :param build_info: Information about the builds.
 100     :param moving_win_size: Window size.
 101     :param show_trend_line: Show moving median (trending plot).
 102     :param name: Name of the plot
 103     :param color: Name of the color for the plot.
 104     :type in_data: OrderedDict
 105     :type job_name: str
 106     :type build_info: dict
 107     :type moving_win_size: int
 108     :type show_trend_line: bool
 109     :type name: str
 110     :type color: str
 111     :returns: Generated traces (list) and the evaluated result.
 112     :rtype: tuple(traces, result)
 113     """
 114
 115     data_x = list(in_data.keys())
 116     data_y = list(in_data.values())
 117
 118     hover_text = list()
 119     xaxis = list()
 120     for idx in data_x:
 121         if "dpdk" in job_name:
 122             hover_text.append("dpdk-ref: {0}<br>csit-ref: mrr-weekly-build-{1}".
 123                               format(build_info[job_name][str(idx)][1].
 124                                      rsplit('~', 1)[0], idx))
 125         elif "vpp" in job_name:
 126             hover_text.append("vpp-ref: {0}<br>csit-ref: mrr-daily-build-{1}".
 127                               format(build_info[job_name][str(idx)][1].
 128                                      rsplit('~', 1)[0], idx))
 129         date = build_info[job_name][str(idx)][0]
 130         xaxis.append(datetime(int(date[0:4]), int(date[4:6]), int(date[6:8]),
 131                               int(date[9:11]), int(date[12:])))
 132
 133     data_pd = pd.Series(data_y, index=xaxis)
 134
 135     t_data, outliers = split_outliers(data_pd, outlier_const=1.5,
 136                                       window=moving_win_size)
 137     anomaly_classification = classify_anomalies(t_data, window=moving_win_size)
 138
 139     anomalies = pd.Series()
 140     anomalies_colors = list()
 141     anomaly_color = {
 142         "outlier": 0.0,
 143         "regression": 0.33,
 144         "normal": 0.66,
 145         "progression": 1.0
 146     }
 147     if anomaly_classification:
 148         for idx, item in enumerate(data_pd.items()):
 149             if anomaly_classification[idx] in \
 150                     ("outlier", "regression", "progression"):
 151                 anomalies = anomalies.append(pd.Series([item[1], ],
 152                                                        index=[item[0], ]))
 153                 anomalies_colors.append(
 154                     anomaly_color[anomaly_classification[idx]])
 155         anomalies_colors.extend([0.0, 0.33, 0.66, 1.0])
 156
 157     # Create traces
 158
 159     trace_samples = plgo.Scatter(
 160         x=xaxis,
 161         y=data_y,
 162         mode='markers',
 163         line={
 164             "width": 1
 165         },
 166         showlegend=True,
 167         legendgroup=name,
 168         name="{name}".format(name=name),
 169         marker={
 170             "size": 5,
 171             "color": color,
 172             "symbol": "circle",
 173         },
 174         text=hover_text,
 175         hoverinfo="x+y+text+name"
 176     )
 177     traces = [trace_samples, ]
 178
 179     trace_anomalies = plgo.Scatter(
 180         x=anomalies.keys(),
 181         y=anomalies.values,
 182         mode='markers',
 183         hoverinfo="none",
 184         showlegend=False,
 185         legendgroup=name,
 186         name="{name}-anomalies".format(name=name),
 187         marker={
 188             "size": 15,
 189             "symbol": "circle-open",
 190             "color": anomalies_colors,
 191             "colorscale": [[0.00, "grey"],
 192                            [0.25, "grey"],
 193                            [0.25, "red"],
 194                            [0.50, "red"],
 195                            [0.50, "white"],
 196                            [0.75, "white"],
 197                            [0.75, "green"],
 198                            [1.00, "green"]],
 199             "showscale": True,
 200             "line": {
 201                 "width": 2
 202             },
 203             "colorbar": {
 204                 "y": 0.5,
 205                 "len": 0.8,
 206                 "title": "Circles Marking Data Classification",
 207                 "titleside": 'right',
 208                 "titlefont": {
 209                     "size": 14
 210                 },
 211                 "tickmode": 'array',
 212                 "tickvals": [0.125, 0.375, 0.625, 0.875],
 213                 "ticktext": ["Outlier", "Regression", "Normal", "Progression"],
 214                 "ticks": "",
 215                 "ticklen": 0,
 216                 "tickangle": -90,
 217                 "thickness": 10
 218             }
 219         }
 220     )
 221     traces.append(trace_anomalies)
 222
 223     if show_trend_line:
 224         data_trend = t_data.rolling(window=moving_win_size,
 225                                     min_periods=2).median()
 226         trace_trend = plgo.Scatter(
 227             x=data_trend.keys(),
 228             y=data_trend.tolist(),
 229             mode='lines',
 230             line={
 231                 "shape": "spline",
 232                 "width": 1,
 233                 "color": color,
 234             },
 235             showlegend=False,
 236             legendgroup=name,
 237             name='{name}-trend'.format(name=name)
 238         )
 239         traces.append(trace_trend)
 240
 241     if anomaly_classification:
 242         return traces, anomaly_classification[-1]
 243     else:
 244         return traces, None
 245
 246
 247 def _generate_all_charts(spec, input_data):
 248     """Generate all charts specified in the specification file.
 249
 250     :param spec: Specification.
 251     :param input_data: Full data set.
 252     :type spec: Specification
 253     :type input_data: InputData
 254     """
 255
 256     def _generate_chart(_, data_q, graph):
 257         """Generates the chart.
 258         """
 259
 260         logs = list()
 261
 262         logging.info("  Generating the chart '{0}' ...".
 263                      format(graph.get("title", "")))
 264         logs.append(("INFO", "  Generating the chart '{0}' ...".
 265                      format(graph.get("title", ""))))
 266
 267         job_name = graph["data"].keys()[0]
 268
 269         csv_tbl = list()
 270         res = list()
 271
 272         # Transform the data
 273         logs.append(("INFO", "    Creating the data set for the {0} '{1}'.".
 274                      format(graph.get("type", ""), graph.get("title", ""))))
 275         data = input_data.filter_data(graph, continue_on_error=True)
 276         if data is None:
 277             logging.error("No data.")
 278             return
 279
 280         chart_data = dict()
 281         for job, job_data in data.iteritems():
 282             if job != job_name:
 283                 continue
 284             for index, bld in job_data.items():
 285                 for test_name, test in bld.items():
 286                     if chart_data.get(test_name, None) is None:
 287                         chart_data[test_name] = OrderedDict()
 288                     try:
 289                         chart_data[test_name][int(index)] = \
 290                             test["result"]["throughput"]
 291                     except (KeyError, TypeError):
 292                         pass
 293
 294         # Add items to the csv table:
 295         for tst_name, tst_data in chart_data.items():
 296             tst_lst = list()
 297             for bld in builds_dict[job_name]:
 298                 itm = tst_data.get(int(bld), '')
 299                 tst_lst.append(str(itm))
 300             csv_tbl.append("{0},".format(tst_name) + ",".join(tst_lst) + '\n')
 301         # Generate traces:
 302         traces = list()
 303         win_size = 14
 304         index = 0
 305         for test_name, test_data in chart_data.items():
 306             if not test_data:
 307                 logs.append(("WARNING", "No data for the test '{0}'".
 308                              format(test_name)))
 309                 continue
 310             test_name = test_name.split('.')[-1]
 311             trace, rslt = _generate_trending_traces(
 312                 test_data,
 313                 job_name=job_name,
 314                 build_info=build_info,
 315                 moving_win_size=win_size,
 316                 name='-'.join(test_name.split('-')[3:-1]),
 317                 color=COLORS[index])
 318             traces.extend(trace)
 319             res.append(rslt)
 320             index += 1
 321
 322         if traces:
 323             # Generate the chart:
 324             graph["layout"]["xaxis"]["title"] = \
 325                 graph["layout"]["xaxis"]["title"].format(job=job_name)
 326             name_file = "{0}-{1}{2}".format(spec.cpta["output-file"],
 327                                             graph["output-file-name"],
 328                                             spec.cpta["output-file-type"])
 329
 330             logs.append(("INFO", "    Writing the file '{0}' ...".
 331                          format(name_file)))
 332             plpl = plgo.Figure(data=traces, layout=graph["layout"])
 333             try:
 334                 ploff.plot(plpl, show_link=False, auto_open=False,
 335                            filename=name_file)
 336             except plerr.PlotlyEmptyDataError:
 337                 logs.append(("WARNING", "No data for the plot. Skipped."))
 338
 339         data_out = {
 340             "job_name": job_name,
 341             "csv_table": csv_tbl,
 342             "results": res,
 343             "logs": logs
 344         }
 345         data_q.put(data_out)
 346
 347     builds_dict = dict()
 348     for job in spec.input["builds"].keys():
 349         if builds_dict.get(job, None) is None:
 350             builds_dict[job] = list()
 351         for build in spec.input["builds"][job]:
 352             status = build["status"]
 353             if status != "failed" and status != "not found":
 354                 builds_dict[job].append(str(build["build"]))
 355
 356     # Create "build ID": "date" dict:
 357     build_info = dict()
 358     for job_name, job_data in builds_dict.items():
 359         if build_info.get(job_name, None) is None:
 360             build_info[job_name] = OrderedDict()
 361         for build in job_data:
 362             build_info[job_name][build] = (
 363                 input_data.metadata(job_name, build).get("generated", ""),
 364                 input_data.metadata(job_name, build).get("version", "")
 365             )
 366
 367     work_queue = multiprocessing.JoinableQueue()
 368     manager = multiprocessing.Manager()
 369     data_queue = manager.Queue()
 370     cpus = multiprocessing.cpu_count()
 371
 372     workers = list()
 373     for cpu in range(cpus):
 374         worker = Worker(work_queue,
 375                         data_queue,
 376                         _generate_chart)
 377         worker.daemon = True
 378         worker.start()
 379         workers.append(worker)
 380         os.system("taskset -p -c {0} {1} > /dev/null 2>&1".
 381                   format(cpu, worker.pid))
 382
 383     for chart in spec.cpta["plots"]:
 384         work_queue.put((chart, ))
 385     work_queue.join()
 386
 387     anomaly_classifications = list()
 388
 389     # Create the header:
 390     csv_tables = dict()
 391     for job_name in builds_dict.keys():
 392         if csv_tables.get(job_name, None) is None:
 393             csv_tables[job_name] = list()
 394         header = "Build Number:," + ",".join(builds_dict[job_name]) + '\n'
 395         csv_tables[job_name].append(header)
 396         build_dates = [x[0] for x in build_info[job_name].values()]
 397         header = "Build Date:," + ",".join(build_dates) + '\n'
 398         csv_tables[job_name].append(header)
 399         versions = [x[1] for x in build_info[job_name].values()]
 400         header = "Version:," + ",".join(versions) + '\n'
 401         csv_tables[job_name].append(header)
 402
 403     while not data_queue.empty():
 404         result = data_queue.get()
 405
 406         anomaly_classifications.extend(result["results"])
 407         csv_tables[result["job_name"]].extend(result["csv_table"])
 408
 409         for item in result["logs"]:
 410             if item[0] == "INFO":
 411                 logging.info(item[1])
 412             elif item[0] == "ERROR":
 413                 logging.error(item[1])
 414             elif item[0] == "DEBUG":
 415                 logging.debug(item[1])
 416             elif item[0] == "CRITICAL":
 417                 logging.critical(item[1])
 418             elif item[0] == "WARNING":
 419                 logging.warning(item[1])
 420
 421     del data_queue
 422
 423     # Terminate all workers
 424     for worker in workers:
 425         worker.terminate()
 426         worker.join()
 427
 428     # Write the tables:
 429     for job_name, csv_table in csv_tables.items():
 430         file_name = spec.cpta["output-file"] + "-" + job_name + "-trending"
 431         with open("{0}.csv".format(file_name), 'w') as file_handler:
 432             file_handler.writelines(csv_table)
 433
 434         txt_table = None
 435         with open("{0}.csv".format(file_name), 'rb') as csv_file:
 436             csv_content = csv.reader(csv_file, delimiter=',', quotechar='"')
 437             line_nr = 0
 438             for row in csv_content:
 439                 if txt_table is None:
 440                     txt_table = prettytable.PrettyTable(row)
 441                 else:
 442                     if line_nr > 1:
 443                         for idx, item in enumerate(row):
 444                             try:
 445                                 row[idx] = str(round(float(item) / 1000000, 2))
 446                             except ValueError:
 447                                 pass
 448                     try:
 449                         txt_table.add_row(row)
 450                     except Exception as err:
 451                         logging.warning("Error occurred while generating TXT "
 452                                         "table:\n{0}".format(err))
 453                 line_nr += 1
 454             txt_table.align["Build Number:"] = "l"
 455         with open("{0}.txt".format(file_name), "w") as txt_file:
 456             txt_file.write(str(txt_table))
 457
 458     # Evaluate result:
 459     if anomaly_classifications:
 460         result = "PASS"
 461         for classification in anomaly_classifications:
 462             if classification == "regression" or classification == "outlier":
 463                 result = "FAIL"
 464                 break
 465     else:
 466         result = "FAIL"
 467
 468     logging.info("Partial results: {0}".format(anomaly_classifications))
 469     logging.info("Result: {0}".format(result))
 470
 471     return result