resources/tools/presentation/generator_CPTA.py

   1 # Copyright (c) 2018 Cisco and/or its affiliates.
   2 # Licensed under the Apache License, Version 2.0 (the "License");
   3 # you may not use this file except in compliance with the License.
   4 # You may obtain a copy of the License at:
   5 #
   6 #     http://www.apache.org/licenses/LICENSE-2.0
   7 #
   8 # Unless required by applicable law or agreed to in writing, software
   9 # distributed under the License is distributed on an "AS IS" BASIS,
  10 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  11 # See the License for the specific language governing permissions and
  12 # limitations under the License.
  13
  14 """Generation of Continuous Performance Trending and Analysis.
  15 """
  16
  17 import multiprocessing
  18 import os
  19 import logging
  20 import csv
  21 import prettytable
  22 import plotly.offline as ploff
  23 import plotly.graph_objs as plgo
  24 import plotly.exceptions as plerr
  25 import pandas as pd
  26
  27 from collections import OrderedDict
  28 from datetime import datetime
  29
  30 from utils import archive_input_data, execute_command, \
  31     classify_anomalies, Worker
  32
  33
  34 # Command to build the html format of the report
  35 HTML_BUILDER = 'sphinx-build -v -c conf_cpta -a ' \
  36                '-b html -E ' \
  37                '-t html ' \
  38                '-D version="{date}" ' \
  39                '{working_dir} ' \
  40                '{build_dir}/'
  41
  42 # .css file for the html format of the report
  43 THEME_OVERRIDES = """/* override table width restrictions */
  44 .wy-nav-content {
  45     max-width: 1200px !important;
  46 }
  47 """
  48
  49 COLORS = ["SkyBlue", "Olive", "Purple", "Coral", "Indigo", "Pink",
  50           "Chocolate", "Brown", "Magenta", "Cyan", "Orange", "Black",
  51           "Violet", "Blue", "Yellow"]
  52
  53
  54 def generate_cpta(spec, data):
  55     """Generate all formats and versions of the Continuous Performance Trending
  56     and Analysis.
  57
  58     :param spec: Specification read from the specification file.
  59     :param data: Full data set.
  60     :type spec: Specification
  61     :type data: InputData
  62     """
  63
  64     logging.info("Generating the Continuous Performance Trending and Analysis "
  65                  "...")
  66
  67     ret_code = _generate_all_charts(spec, data)
  68
  69     cmd = HTML_BUILDER.format(
  70         date=datetime.utcnow().strftime('%m/%d/%Y %H:%M UTC'),
  71         working_dir=spec.environment["paths"]["DIR[WORKING,SRC]"],
  72         build_dir=spec.environment["paths"]["DIR[BUILD,HTML]"])
  73     execute_command(cmd)
  74
  75     with open(spec.environment["paths"]["DIR[CSS_PATCH_FILE]"], "w") as \
  76             css_file:
  77         css_file.write(THEME_OVERRIDES)
  78
  79     with open(spec.environment["paths"]["DIR[CSS_PATCH_FILE2]"], "w") as \
  80             css_file:
  81         css_file.write(THEME_OVERRIDES)
  82
  83     archive_input_data(spec)
  84
  85     logging.info("Done.")
  86
  87     return ret_code
  88
  89
  90 def _generate_trending_traces(in_data, job_name, build_info,
  91                               show_trend_line=True, name="", color=""):
  92     """Generate the trending traces:
  93      - samples,
  94      - outliers, regress, progress
  95      - average of normal samples (trending line)
  96
  97     :param in_data: Full data set.
  98     :param job_name: The name of job which generated the data.
  99     :param build_info: Information about the builds.
 100     :param show_trend_line: Show moving median (trending plot).
 101     :param name: Name of the plot
 102     :param color: Name of the color for the plot.
 103     :type in_data: OrderedDict
 104     :type job_name: str
 105     :type build_info: dict
 106     :type show_trend_line: bool
 107     :type name: str
 108     :type color: str
 109     :returns: Generated traces (list) and the evaluated result.
 110     :rtype: tuple(traces, result)
 111     """
 112
 113     data_x = list(in_data.keys())
 114     data_y = list(in_data.values())
 115
 116     hover_text = list()
 117     xaxis = list()
 118     for idx in data_x:
 119         if "dpdk" in job_name:
 120             hover_text.append("dpdk-ref: {0}<br>csit-ref: mrr-weekly-build-{1}".
 121                               format(build_info[job_name][str(idx)][1].
 122                                      rsplit('~', 1)[0], idx))
 123         elif "vpp" in job_name:
 124             hover_text.append("vpp-ref: {0}<br>csit-ref: mrr-daily-build-{1}".
 125                               format(build_info[job_name][str(idx)][1].
 126                                      rsplit('~', 1)[0], idx))
 127         date = build_info[job_name][str(idx)][0]
 128         xaxis.append(datetime(int(date[0:4]), int(date[4:6]), int(date[6:8]),
 129                               int(date[9:11]), int(date[12:])))
 130
 131     data_pd = pd.Series(data_y, index=xaxis)
 132
 133     anomaly_classification, avgs = classify_anomalies(data_pd)
 134
 135     anomalies = pd.Series()
 136     anomalies_colors = list()
 137     anomalies_avgs = list()
 138     anomaly_color = {
 139         "regression": 0.0,
 140         "normal": 0.5,
 141         "progression": 1.0
 142     }
 143     if anomaly_classification:
 144         for idx, item in enumerate(data_pd.items()):
 145             if anomaly_classification[idx] in \
 146                     ("outlier", "regression", "progression"):
 147                 anomalies = anomalies.append(pd.Series([item[1], ],
 148                                                        index=[item[0], ]))
 149                 anomalies_colors.append(
 150                     anomaly_color[anomaly_classification[idx]])
 151                 anomalies_avgs.append(avgs[idx])
 152         anomalies_colors.extend([0.0, 0.5, 1.0])
 153
 154     # Create traces
 155
 156     trace_samples = plgo.Scatter(
 157         x=xaxis,
 158         y=data_y,
 159         mode='markers',
 160         line={
 161             "width": 1
 162         },
 163         showlegend=True,
 164         legendgroup=name,
 165         name="{name}".format(name=name),
 166         marker={
 167             "size": 5,
 168             "color": color,
 169             "symbol": "circle",
 170         },
 171         text=hover_text,
 172         hoverinfo="x+y+text+name"
 173     )
 174     traces = [trace_samples, ]
 175
 176     if show_trend_line:
 177         trace_trend = plgo.Scatter(
 178             x=xaxis,
 179             y=avgs,
 180             mode='lines',
 181             line={
 182                 "shape": "linear",
 183                 "width": 1,
 184                 "color": color,
 185             },
 186             showlegend=False,
 187             legendgroup=name,
 188             name='{name}-trend'.format(name=name)
 189         )
 190         traces.append(trace_trend)
 191
 192     trace_anomalies = plgo.Scatter(
 193         x=anomalies.keys(),
 194         y=anomalies_avgs,
 195         mode='markers',
 196         hoverinfo="none",
 197         showlegend=False,
 198         legendgroup=name,
 199         name="{name}-anomalies".format(name=name),
 200         marker={
 201             "size": 15,
 202             "symbol": "circle-open",
 203             "color": anomalies_colors,
 204             "colorscale": [[0.00, "red"],
 205                            [0.33, "red"],
 206                            [0.33, "white"],
 207                            [0.66, "white"],
 208                            [0.66, "green"],
 209                            [1.00, "green"]],
 210             "showscale": True,
 211             "line": {
 212                 "width": 2
 213             },
 214             "colorbar": {
 215                 "y": 0.5,
 216                 "len": 0.8,
 217                 "title": "Circles Marking Data Classification",
 218                 "titleside": 'right',
 219                 "titlefont": {
 220                     "size": 14
 221                 },
 222                 "tickmode": 'array',
 223                 "tickvals": [0.167, 0.500, 0.833],
 224                 "ticktext": ["Regression", "Normal", "Progression"],
 225                 "ticks": "",
 226                 "ticklen": 0,
 227                 "tickangle": -90,
 228                 "thickness": 10
 229             }
 230         }
 231     )
 232     traces.append(trace_anomalies)
 233
 234     if anomaly_classification:
 235         return traces, anomaly_classification[-1]
 236     else:
 237         return traces, None
 238
 239
 240 def _generate_all_charts(spec, input_data):
 241     """Generate all charts specified in the specification file.
 242
 243     :param spec: Specification.
 244     :param input_data: Full data set.
 245     :type spec: Specification
 246     :type input_data: InputData
 247     """
 248
 249     def _generate_chart(_, data_q, graph):
 250         """Generates the chart.
 251         """
 252
 253         logs = list()
 254
 255         logging.info("  Generating the chart '{0}' ...".
 256                      format(graph.get("title", "")))
 257         logs.append(("INFO", "  Generating the chart '{0}' ...".
 258                      format(graph.get("title", ""))))
 259
 260         job_name = graph["data"].keys()[0]
 261
 262         csv_tbl = list()
 263         res = list()
 264
 265         # Transform the data
 266         logs.append(("INFO", "    Creating the data set for the {0} '{1}'.".
 267                      format(graph.get("type", ""), graph.get("title", ""))))
 268         data = input_data.filter_data(graph, continue_on_error=True)
 269         if data is None:
 270             logging.error("No data.")
 271             return
 272
 273         chart_data = dict()
 274         for job, job_data in data.iteritems():
 275             if job != job_name:
 276                 continue
 277             for index, bld in job_data.items():
 278                 for test_name, test in bld.items():
 279                     if chart_data.get(test_name, None) is None:
 280                         chart_data[test_name] = OrderedDict()
 281                     try:
 282                         chart_data[test_name][int(index)] = \
 283                             test["result"]["throughput"]
 284                     except (KeyError, TypeError):
 285                         pass
 286
 287         # Add items to the csv table:
 288         for tst_name, tst_data in chart_data.items():
 289             tst_lst = list()
 290             for bld in builds_dict[job_name]:
 291                 itm = tst_data.get(int(bld), '')
 292                 tst_lst.append(str(itm))
 293             csv_tbl.append("{0},".format(tst_name) + ",".join(tst_lst) + '\n')
 294         # Generate traces:
 295         traces = list()
 296         win_size = 14
 297         index = 0
 298         for test_name, test_data in chart_data.items():
 299             if not test_data:
 300                 logs.append(("WARNING", "No data for the test '{0}'".
 301                              format(test_name)))
 302                 continue
 303             test_name = test_name.split('.')[-1]
 304             trace, rslt = _generate_trending_traces(
 305                 test_data,
 306                 job_name=job_name,
 307                 build_info=build_info,
 308                 name='-'.join(test_name.split('-')[3:-1]),
 309                 color=COLORS[index])
 310             traces.extend(trace)
 311             res.append(rslt)
 312             index += 1
 313
 314         if traces:
 315             # Generate the chart:
 316             graph["layout"]["xaxis"]["title"] = \
 317                 graph["layout"]["xaxis"]["title"].format(job=job_name)
 318             name_file = "{0}-{1}{2}".format(spec.cpta["output-file"],
 319                                             graph["output-file-name"],
 320                                             spec.cpta["output-file-type"])
 321
 322             logs.append(("INFO", "    Writing the file '{0}' ...".
 323                          format(name_file)))
 324             plpl = plgo.Figure(data=traces, layout=graph["layout"])
 325             try:
 326                 ploff.plot(plpl, show_link=False, auto_open=False,
 327                            filename=name_file)
 328             except plerr.PlotlyEmptyDataError:
 329                 logs.append(("WARNING", "No data for the plot. Skipped."))
 330
 331         data_out = {
 332             "job_name": job_name,
 333             "csv_table": csv_tbl,
 334             "results": res,
 335             "logs": logs
 336         }
 337         data_q.put(data_out)
 338
 339     builds_dict = dict()
 340     for job in spec.input["builds"].keys():
 341         if builds_dict.get(job, None) is None:
 342             builds_dict[job] = list()
 343         for build in spec.input["builds"][job]:
 344             status = build["status"]
 345             if status != "failed" and status != "not found":
 346                 builds_dict[job].append(str(build["build"]))
 347
 348     # Create "build ID": "date" dict:
 349     build_info = dict()
 350     for job_name, job_data in builds_dict.items():
 351         if build_info.get(job_name, None) is None:
 352             build_info[job_name] = OrderedDict()
 353         for build in job_data:
 354             build_info[job_name][build] = (
 355                 input_data.metadata(job_name, build).get("generated", ""),
 356                 input_data.metadata(job_name, build).get("version", "")
 357             )
 358
 359     work_queue = multiprocessing.JoinableQueue()
 360     manager = multiprocessing.Manager()
 361     data_queue = manager.Queue()
 362     cpus = multiprocessing.cpu_count()
 363
 364     workers = list()
 365     for cpu in range(cpus):
 366         worker = Worker(work_queue,
 367                         data_queue,
 368                         _generate_chart)
 369         worker.daemon = True
 370         worker.start()
 371         workers.append(worker)
 372         os.system("taskset -p -c {0} {1} > /dev/null 2>&1".
 373                   format(cpu, worker.pid))
 374
 375     for chart in spec.cpta["plots"]:
 376         work_queue.put((chart, ))
 377     work_queue.join()
 378
 379     anomaly_classifications = list()
 380
 381     # Create the header:
 382     csv_tables = dict()
 383     for job_name in builds_dict.keys():
 384         if csv_tables.get(job_name, None) is None:
 385             csv_tables[job_name] = list()
 386         header = "Build Number:," + ",".join(builds_dict[job_name]) + '\n'
 387         csv_tables[job_name].append(header)
 388         build_dates = [x[0] for x in build_info[job_name].values()]
 389         header = "Build Date:," + ",".join(build_dates) + '\n'
 390         csv_tables[job_name].append(header)
 391         versions = [x[1] for x in build_info[job_name].values()]
 392         header = "Version:," + ",".join(versions) + '\n'
 393         csv_tables[job_name].append(header)
 394
 395     while not data_queue.empty():
 396         result = data_queue.get()
 397
 398         anomaly_classifications.extend(result["results"])
 399         csv_tables[result["job_name"]].extend(result["csv_table"])
 400
 401         for item in result["logs"]:
 402             if item[0] == "INFO":
 403                 logging.info(item[1])
 404             elif item[0] == "ERROR":
 405                 logging.error(item[1])
 406             elif item[0] == "DEBUG":
 407                 logging.debug(item[1])
 408             elif item[0] == "CRITICAL":
 409                 logging.critical(item[1])
 410             elif item[0] == "WARNING":
 411                 logging.warning(item[1])
 412
 413     del data_queue
 414
 415     # Terminate all workers
 416     for worker in workers:
 417         worker.terminate()
 418         worker.join()
 419
 420     # Write the tables:
 421     for job_name, csv_table in csv_tables.items():
 422         file_name = spec.cpta["output-file"] + "-" + job_name + "-trending"
 423         with open("{0}.csv".format(file_name), 'w') as file_handler:
 424             file_handler.writelines(csv_table)
 425
 426         txt_table = None
 427         with open("{0}.csv".format(file_name), 'rb') as csv_file:
 428             csv_content = csv.reader(csv_file, delimiter=',', quotechar='"')
 429             line_nr = 0
 430             for row in csv_content:
 431                 if txt_table is None:
 432                     txt_table = prettytable.PrettyTable(row)
 433                 else:
 434                     if line_nr > 1:
 435                         for idx, item in enumerate(row):
 436                             try:
 437                                 row[idx] = str(round(float(item) / 1000000, 2))
 438                             except ValueError:
 439                                 pass
 440                     try:
 441                         txt_table.add_row(row)
 442                     except Exception as err:
 443                         logging.warning("Error occurred while generating TXT "
 444                                         "table:\n{0}".format(err))
 445                 line_nr += 1
 446             txt_table.align["Build Number:"] = "l"
 447         with open("{0}.txt".format(file_name), "w") as txt_file:
 448             txt_file.write(str(txt_table))
 449
 450     # Evaluate result:
 451     if anomaly_classifications:
 452         result = "PASS"
 453         for classification in anomaly_classifications:
 454             if classification == "regression" or classification == "outlier":
 455                 result = "FAIL"
 456                 break
 457     else:
 458         result = "FAIL"
 459
 460     logging.info("Partial results: {0}".format(anomaly_classifications))
 461     logging.info("Result: {0}".format(result))
 462
 463     return result