resources/tools/presentation/generator_CPTA.py

   1 # Copyright (c) 2018 Cisco and/or its affiliates.
   2 # Licensed under the Apache License, Version 2.0 (the "License");
   3 # you may not use this file except in compliance with the License.
   4 # You may obtain a copy of the License at:
   5 #
   6 #     http://www.apache.org/licenses/LICENSE-2.0
   7 #
   8 # Unless required by applicable law or agreed to in writing, software
   9 # distributed under the License is distributed on an "AS IS" BASIS,
  10 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  11 # See the License for the specific language governing permissions and
  12 # limitations under the License.
  13
  14 """Generation of Continuous Performance Trending and Analysis.
  15 """
  16
  17 import multiprocessing
  18 import os
  19 import logging
  20 import csv
  21 import prettytable
  22 import plotly.offline as ploff
  23 import plotly.graph_objs as plgo
  24 import plotly.exceptions as plerr
  25 import pandas as pd
  26
  27 from collections import OrderedDict
  28 from datetime import datetime
  29
  30 from utils import split_outliers, archive_input_data, execute_command,\
  31     classify_anomalies, Worker
  32
  33
  34 # Command to build the html format of the report
  35 HTML_BUILDER = 'sphinx-build -v -c conf_cpta -a ' \
  36                '-b html -E ' \
  37                '-t html ' \
  38                '-D version="{date}" ' \
  39                '{working_dir} ' \
  40                '{build_dir}/'
  41
  42 # .css file for the html format of the report
  43 THEME_OVERRIDES = """/* override table width restrictions */
  44 .wy-nav-content {
  45     max-width: 1200px !important;
  46 }
  47 """
  48
  49 COLORS = ["SkyBlue", "Olive", "Purple", "Coral", "Indigo", "Pink",
  50           "Chocolate", "Brown", "Magenta", "Cyan", "Orange", "Black",
  51           "Violet", "Blue", "Yellow"]
  52
  53
  54 def generate_cpta(spec, data):
  55     """Generate all formats and versions of the Continuous Performance Trending
  56     and Analysis.
  57
  58     :param spec: Specification read from the specification file.
  59     :param data: Full data set.
  60     :type spec: Specification
  61     :type data: InputData
  62     """
  63
  64     logging.info("Generating the Continuous Performance Trending and Analysis "
  65                  "...")
  66
  67     ret_code = _generate_all_charts(spec, data)
  68
  69     cmd = HTML_BUILDER.format(
  70         date=datetime.utcnow().strftime('%m/%d/%Y %H:%M UTC'),
  71         working_dir=spec.environment["paths"]["DIR[WORKING,SRC]"],
  72         build_dir=spec.environment["paths"]["DIR[BUILD,HTML]"])
  73     execute_command(cmd)
  74
  75     with open(spec.environment["paths"]["DIR[CSS_PATCH_FILE]"], "w") as \
  76             css_file:
  77         css_file.write(THEME_OVERRIDES)
  78
  79     with open(spec.environment["paths"]["DIR[CSS_PATCH_FILE2]"], "w") as \
  80             css_file:
  81         css_file.write(THEME_OVERRIDES)
  82
  83     archive_input_data(spec)
  84
  85     logging.info("Done.")
  86
  87     return ret_code
  88
  89
  90 def _generate_trending_traces(in_data, job_name, build_info, moving_win_size=10,
  91                               show_trend_line=True, name="", color=""):
  92     """Generate the trending traces:
  93      - samples,
  94      - trimmed moving median (trending line)
  95      - outliers, regress, progress
  96
  97     :param in_data: Full data set.
  98     :param job_name: The name of job which generated the data.
  99     :param build_info: Information about the builds.
 100     :param moving_win_size: Window size.
 101     :param show_trend_line: Show moving median (trending plot).
 102     :param name: Name of the plot
 103     :param color: Name of the color for the plot.
 104     :type in_data: OrderedDict
 105     :type job_name: str
 106     :type build_info: dict
 107     :type moving_win_size: int
 108     :type show_trend_line: bool
 109     :type name: str
 110     :type color: str
 111     :returns: Generated traces (list) and the evaluated result.
 112     :rtype: tuple(traces, result)
 113     """
 114
 115     data_x = list(in_data.keys())
 116     data_y = list(in_data.values())
 117
 118     hover_text = list()
 119     xaxis = list()
 120     for idx in data_x:
 121         if "dpdk" in job_name:
 122             hover_text.append("dpdk-ref: {0}<br>csit-ref: mrr-weekly-build-{1}".
 123                               format(build_info[job_name][str(idx)][1].
 124                                      rsplit('~', 1)[0], idx))
 125         elif "vpp" in job_name:
 126             hover_text.append("vpp-ref: {0}<br>csit-ref: mrr-daily-build-{1}".
 127                               format(build_info[job_name][str(idx)][1].
 128                                      rsplit('~', 1)[0], idx))
 129         date = build_info[job_name][str(idx)][0]
 130         xaxis.append(datetime(int(date[0:4]), int(date[4:6]), int(date[6:8]),
 131                               int(date[9:11]), int(date[12:])))
 132
 133     data_pd = pd.Series(data_y, index=xaxis)
 134
 135     t_data, outliers = split_outliers(data_pd, outlier_const=1.5,
 136                                       window=moving_win_size)
 137     anomaly_classification = classify_anomalies(t_data, window=moving_win_size)
 138
 139     anomalies = pd.Series()
 140     anomalies_colors = list()
 141     anomaly_color = {
 142         "outlier": 0.0,
 143         "regression": 0.33,
 144         "normal": 0.66,
 145         "progression": 1.0
 146     }
 147     if anomaly_classification:
 148         for idx, item in enumerate(data_pd.items()):
 149             if anomaly_classification[idx] in \
 150                     ("outlier", "regression", "progression"):
 151                 anomalies = anomalies.append(pd.Series([item[1], ],
 152                                                        index=[item[0], ]))
 153                 anomalies_colors.append(
 154                     anomaly_color[anomaly_classification[idx]])
 155         anomalies_colors.extend([0.0, 0.33, 0.66, 1.0])
 156
 157     # Create traces
 158
 159     trace_samples = plgo.Scatter(
 160         x=xaxis,
 161         y=data_y,
 162         mode='markers',
 163         line={
 164             "width": 1
 165         },
 166         legendgroup=name,
 167         name="{name}-thput".format(name=name),
 168         marker={
 169             "size": 5,
 170             "color": color,
 171             "symbol": "circle",
 172         },
 173         text=hover_text,
 174         hoverinfo="x+y+text+name"
 175     )
 176     traces = [trace_samples, ]
 177
 178     trace_anomalies = plgo.Scatter(
 179         x=anomalies.keys(),
 180         y=anomalies.values,
 181         mode='markers',
 182         hoverinfo="none",
 183         showlegend=True,
 184         legendgroup=name,
 185         name="{name}-anomalies".format(name=name),
 186         marker={
 187             "size": 15,
 188             "symbol": "circle-open",
 189             "color": anomalies_colors,
 190             "colorscale": [[0.00, "grey"],
 191                            [0.25, "grey"],
 192                            [0.25, "red"],
 193                            [0.50, "red"],
 194                            [0.50, "white"],
 195                            [0.75, "white"],
 196                            [0.75, "green"],
 197                            [1.00, "green"]],
 198             "showscale": True,
 199             "line": {
 200                 "width": 2
 201             },
 202             "colorbar": {
 203                 "y": 0.5,
 204                 "len": 0.8,
 205                 "title": "Circles Marking Data Classification",
 206                 "titleside": 'right',
 207                 "titlefont": {
 208                     "size": 14
 209                 },
 210                 "tickmode": 'array',
 211                 "tickvals": [0.125, 0.375, 0.625, 0.875],
 212                 "ticktext": ["Outlier", "Regression", "Normal", "Progression"],
 213                 "ticks": "",
 214                 "ticklen": 0,
 215                 "tickangle": -90,
 216                 "thickness": 10
 217             }
 218         }
 219     )
 220     traces.append(trace_anomalies)
 221
 222     if show_trend_line:
 223         data_trend = t_data.rolling(window=moving_win_size,
 224                                     min_periods=2).median()
 225         trace_trend = plgo.Scatter(
 226             x=data_trend.keys(),
 227             y=data_trend.tolist(),
 228             mode='lines',
 229             line={
 230                 "shape": "spline",
 231                 "width": 1,
 232                 "color": color,
 233             },
 234             legendgroup=name,
 235             name='{name}-trend'.format(name=name)
 236         )
 237         traces.append(trace_trend)
 238
 239     if anomaly_classification:
 240         return traces, anomaly_classification[-1]
 241     else:
 242         return traces, None
 243
 244
 245 def _generate_all_charts(spec, input_data):
 246     """Generate all charts specified in the specification file.
 247
 248     :param spec: Specification.
 249     :param input_data: Full data set.
 250     :type spec: Specification
 251     :type input_data: InputData
 252     """
 253
 254     def _generate_chart(_, data_q, graph):
 255         """Generates the chart.
 256         """
 257
 258         logs = list()
 259
 260         logging.info("  Generating the chart '{0}' ...".
 261                      format(graph.get("title", "")))
 262         logs.append(("INFO", "  Generating the chart '{0}' ...".
 263                      format(graph.get("title", ""))))
 264
 265         job_name = graph["data"].keys()[0]
 266
 267         csv_tbl = list()
 268         res = list()
 269
 270         # Transform the data
 271         logs.append(("INFO", "    Creating the data set for the {0} '{1}'.".
 272                      format(graph.get("type", ""), graph.get("title", ""))))
 273         data = input_data.filter_data(graph, continue_on_error=True)
 274         if data is None:
 275             logging.error("No data.")
 276             return
 277
 278         chart_data = dict()
 279         for job, job_data in data.iteritems():
 280             if job != job_name:
 281                 continue
 282             for index, bld in job_data.items():
 283                 for test_name, test in bld.items():
 284                     if chart_data.get(test_name, None) is None:
 285                         chart_data[test_name] = OrderedDict()
 286                     try:
 287                         chart_data[test_name][int(index)] = \
 288                             test["result"]["throughput"]
 289                     except (KeyError, TypeError):
 290                         pass
 291
 292         # Add items to the csv table:
 293         for tst_name, tst_data in chart_data.items():
 294             tst_lst = list()
 295             for bld in builds_dict[job_name]:
 296                 itm = tst_data.get(int(bld), '')
 297                 tst_lst.append(str(itm))
 298             csv_tbl.append("{0},".format(tst_name) + ",".join(tst_lst) + '\n')
 299         # Generate traces:
 300         traces = list()
 301         win_size = 14
 302         index = 0
 303         for test_name, test_data in chart_data.items():
 304             if not test_data:
 305                 logs.append(("WARNING", "No data for the test '{0}'".
 306                              format(test_name)))
 307                 continue
 308             test_name = test_name.split('.')[-1]
 309             trace, rslt = _generate_trending_traces(
 310                 test_data,
 311                 job_name=job_name,
 312                 build_info=build_info,
 313                 moving_win_size=win_size,
 314                 name='-'.join(test_name.split('-')[3:-1]),
 315                 color=COLORS[index])
 316             traces.extend(trace)
 317             res.append(rslt)
 318             index += 1
 319
 320         if traces:
 321             # Generate the chart:
 322             graph["layout"]["xaxis"]["title"] = \
 323                 graph["layout"]["xaxis"]["title"].format(job=job_name)
 324             name_file = "{0}-{1}{2}".format(spec.cpta["output-file"],
 325                                             graph["output-file-name"],
 326                                             spec.cpta["output-file-type"])
 327
 328             logs.append(("INFO", "    Writing the file '{0}' ...".
 329                          format(name_file)))
 330             plpl = plgo.Figure(data=traces, layout=graph["layout"])
 331             try:
 332                 ploff.plot(plpl, show_link=False, auto_open=False,
 333                            filename=name_file)
 334             except plerr.PlotlyEmptyDataError:
 335                 logs.append(("WARNING", "No data for the plot. Skipped."))
 336
 337         data_out = {
 338             "job_name": job_name,
 339             "csv_table": csv_tbl,
 340             "results": res,
 341             "logs": logs
 342         }
 343         data_q.put(data_out)
 344
 345     builds_dict = dict()
 346     for job in spec.input["builds"].keys():
 347         if builds_dict.get(job, None) is None:
 348             builds_dict[job] = list()
 349         for build in spec.input["builds"][job]:
 350             status = build["status"]
 351             if status != "failed" and status != "not found":
 352                 builds_dict[job].append(str(build["build"]))
 353
 354     # Create "build ID": "date" dict:
 355     build_info = dict()
 356     for job_name, job_data in builds_dict.items():
 357         if build_info.get(job_name, None) is None:
 358             build_info[job_name] = OrderedDict()
 359         for build in job_data:
 360             build_info[job_name][build] = (
 361                 input_data.metadata(job_name, build).get("generated", ""),
 362                 input_data.metadata(job_name, build).get("version", "")
 363             )
 364
 365     work_queue = multiprocessing.JoinableQueue()
 366     manager = multiprocessing.Manager()
 367     data_queue = manager.Queue()
 368     cpus = multiprocessing.cpu_count()
 369
 370     workers = list()
 371     for cpu in range(cpus):
 372         worker = Worker(work_queue,
 373                         data_queue,
 374                         _generate_chart)
 375         worker.daemon = True
 376         worker.start()
 377         workers.append(worker)
 378         os.system("taskset -p -c {0} {1} > /dev/null 2>&1".
 379                   format(cpu, worker.pid))
 380
 381     for chart in spec.cpta["plots"]:
 382         work_queue.put((chart, ))
 383     work_queue.join()
 384
 385     anomaly_classifications = list()
 386
 387     # Create the header:
 388     csv_tables = dict()
 389     for job_name in builds_dict.keys():
 390         if csv_tables.get(job_name, None) is None:
 391             csv_tables[job_name] = list()
 392         header = "Build Number:," + ",".join(builds_dict[job_name]) + '\n'
 393         csv_tables[job_name].append(header)
 394         build_dates = [x[0] for x in build_info[job_name].values()]
 395         header = "Build Date:," + ",".join(build_dates) + '\n'
 396         csv_tables[job_name].append(header)
 397         versions = [x[1] for x in build_info[job_name].values()]
 398         header = "Version:," + ",".join(versions) + '\n'
 399         csv_tables[job_name].append(header)
 400
 401     while not data_queue.empty():
 402         result = data_queue.get()
 403
 404         anomaly_classifications.extend(result["results"])
 405         csv_tables[result["job_name"]].extend(result["csv_table"])
 406
 407         for item in result["logs"]:
 408             if item[0] == "INFO":
 409                 logging.info(item[1])
 410             elif item[0] == "ERROR":
 411                 logging.error(item[1])
 412             elif item[0] == "DEBUG":
 413                 logging.debug(item[1])
 414             elif item[0] == "CRITICAL":
 415                 logging.critical(item[1])
 416             elif item[0] == "WARNING":
 417                 logging.warning(item[1])
 418
 419     del data_queue
 420
 421     # Terminate all workers
 422     for worker in workers:
 423         worker.terminate()
 424         worker.join()
 425
 426     # Write the tables:
 427     for job_name, csv_table in csv_tables.items():
 428         file_name = spec.cpta["output-file"] + "-" + job_name + "-trending"
 429         with open("{0}.csv".format(file_name), 'w') as file_handler:
 430             file_handler.writelines(csv_table)
 431
 432         txt_table = None
 433         with open("{0}.csv".format(file_name), 'rb') as csv_file:
 434             csv_content = csv.reader(csv_file, delimiter=',', quotechar='"')
 435             line_nr = 0
 436             for row in csv_content:
 437                 if txt_table is None:
 438                     txt_table = prettytable.PrettyTable(row)
 439                 else:
 440                     if line_nr > 1:
 441                         for idx, item in enumerate(row):
 442                             try:
 443                                 row[idx] = str(round(float(item) / 1000000, 2))
 444                             except ValueError:
 445                                 pass
 446                     try:
 447                         txt_table.add_row(row)
 448                     except Exception as err:
 449                         logging.warning("Error occurred while generating TXT "
 450                                         "table:\n{0}".format(err))
 451                 line_nr += 1
 452             txt_table.align["Build Number:"] = "l"
 453         with open("{0}.txt".format(file_name), "w") as txt_file:
 454             txt_file.write(str(txt_table))
 455
 456     # Evaluate result:
 457     if anomaly_classifications:
 458         result = "PASS"
 459         for classification in anomaly_classifications:
 460             if classification == "regression" or classification == "outlier":
 461                 result = "FAIL"
 462                 break
 463     else:
 464         result = "FAIL"
 465
 466     logging.info("Partial results: {0}".format(anomaly_classifications))
 467     logging.info("Result: {0}".format(result))
 468
 469     return result