resources/tools/presentation/new/generator_CPTA.py

   1 # Copyright (c) 2018 Cisco and/or its affiliates.
   2 # Licensed under the Apache License, Version 2.0 (the "License");
   3 # you may not use this file except in compliance with the License.
   4 # You may obtain a copy of the License at:
   5 #
   6 #     http://www.apache.org/licenses/LICENSE-2.0
   7 #
   8 # Unless required by applicable law or agreed to in writing, software
   9 # distributed under the License is distributed on an "AS IS" BASIS,
  10 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  11 # See the License for the specific language governing permissions and
  12 # limitations under the License.
  13
  14 """Generation of Continuous Performance Trending and Analysis.
  15 """
  16
  17 import multiprocessing
  18 import os
  19 import logging
  20 import csv
  21 import prettytable
  22 import plotly.offline as ploff
  23 import plotly.graph_objs as plgo
  24 import plotly.exceptions as plerr
  25 import pandas as pd
  26
  27 from collections import OrderedDict
  28 from datetime import datetime
  29
  30 from utils import archive_input_data, execute_command, \
  31     classify_anomalies, Worker
  32
  33
  34 # Command to build the html format of the report
  35 HTML_BUILDER = 'sphinx-build -v -c conf_cpta -a ' \
  36                '-b html -E ' \
  37                '-t html ' \
  38                '-D version="{date}" ' \
  39                '{working_dir} ' \
  40                '{build_dir}/'
  41
  42 # .css file for the html format of the report
  43 THEME_OVERRIDES = """/* override table width restrictions */
  44 .wy-nav-content {
  45     max-width: 1200px !important;
  46 }
  47 """
  48
  49 COLORS = ["SkyBlue", "Olive", "Purple", "Coral", "Indigo", "Pink",
  50           "Chocolate", "Brown", "Magenta", "Cyan", "Orange", "Black",
  51           "Violet", "Blue", "Yellow"]
  52
  53
  54 def generate_cpta(spec, data):
  55     """Generate all formats and versions of the Continuous Performance Trending
  56     and Analysis.
  57
  58     :param spec: Specification read from the specification file.
  59     :param data: Full data set.
  60     :type spec: Specification
  61     :type data: InputData
  62     """
  63
  64     logging.info("Generating the Continuous Performance Trending and Analysis "
  65                  "...")
  66
  67     ret_code = _generate_all_charts(spec, data)
  68
  69     cmd = HTML_BUILDER.format(
  70         date=datetime.utcnow().strftime('%m/%d/%Y %H:%M UTC'),
  71         working_dir=spec.environment["paths"]["DIR[WORKING,SRC]"],
  72         build_dir=spec.environment["paths"]["DIR[BUILD,HTML]"])
  73     execute_command(cmd)
  74
  75     with open(spec.environment["paths"]["DIR[CSS_PATCH_FILE]"], "w") as \
  76             css_file:
  77         css_file.write(THEME_OVERRIDES)
  78
  79     with open(spec.environment["paths"]["DIR[CSS_PATCH_FILE2]"], "w") as \
  80             css_file:
  81         css_file.write(THEME_OVERRIDES)
  82
  83     archive_input_data(spec)
  84
  85     logging.info("Done.")
  86
  87     return ret_code
  88
  89
  90 def _generate_trending_traces(in_data, job_name, build_info,
  91                               show_trend_line=True, name="", color=""):
  92     """Generate the trending traces:
  93      - samples,
  94      - outliers, regress, progress
  95      - average of normal samples (trending line)
  96
  97     :param in_data: Full data set.
  98     :param job_name: The name of job which generated the data.
  99     :param build_info: Information about the builds.
 100     :param show_trend_line: Show moving median (trending plot).
 101     :param name: Name of the plot
 102     :param color: Name of the color for the plot.
 103     :type in_data: OrderedDict
 104     :type job_name: str
 105     :type build_info: dict
 106     :type show_trend_line: bool
 107     :type name: str
 108     :type color: str
 109     :returns: Generated traces (list) and the evaluated result.
 110     :rtype: tuple(traces, result)
 111     """
 112
 113     data_x = list(in_data.keys())
 114     data_y = list(in_data.values())
 115
 116     hover_text = list()
 117     xaxis = list()
 118     for idx in data_x:
 119         if "dpdk" in job_name:
 120             hover_text.append("dpdk-ref: {0}<br>csit-ref: mrr-weekly-build-{1}".
 121                               format(build_info[job_name][str(idx)][1].
 122                                      rsplit('~', 1)[0], idx))
 123         elif "vpp" in job_name:
 124             hover_text.append("vpp-ref: {0}<br>csit-ref: mrr-daily-build-{1}".
 125                               format(build_info[job_name][str(idx)][1].
 126                                      rsplit('~', 1)[0], idx))
 127         date = build_info[job_name][str(idx)][0]
 128         xaxis.append(datetime(int(date[0:4]), int(date[4:6]), int(date[6:8]),
 129                               int(date[9:11]), int(date[12:])))
 130
 131     data_pd = pd.Series(data_y, index=xaxis)
 132
 133     anomaly_classification, avgs = classify_anomalies(data_pd)
 134
 135     anomalies = pd.Series()
 136     anomalies_colors = list()
 137     anomalies_avgs = list()
 138     anomaly_color = {
 139         "outlier": 0.0,
 140         "regression": 0.33,
 141         "normal": 0.66,
 142         "progression": 1.0
 143     }
 144     if anomaly_classification:
 145         for idx, item in enumerate(data_pd.items()):
 146             if anomaly_classification[idx] in \
 147                     ("outlier", "regression", "progression"):
 148                 anomalies = anomalies.append(pd.Series([item[1], ],
 149                                                        index=[item[0], ]))
 150                 anomalies_colors.append(
 151                     anomaly_color[anomaly_classification[idx]])
 152                 anomalies_avgs.append(avgs[idx])
 153         anomalies_colors.extend([0.0, 0.33, 0.66, 1.0])
 154
 155     # Create traces
 156
 157     trace_samples = plgo.Scatter(
 158         x=xaxis,
 159         y=data_y,
 160         mode='markers',
 161         line={
 162             "width": 1
 163         },
 164         legendgroup=name,
 165         name="{name}-thput".format(name=name),
 166         marker={
 167             "size": 5,
 168             "color": color,
 169             "symbol": "circle",
 170         },
 171         text=hover_text,
 172         hoverinfo="x+y+text+name"
 173     )
 174     traces = [trace_samples, ]
 175
 176     if show_trend_line:
 177         trace_trend = plgo.Scatter(
 178             x=xaxis,
 179             y=avgs,
 180             mode='lines',
 181             line={
 182                 "shape": "linear",
 183                 "width": 1,
 184                 "color": color,
 185             },
 186             legendgroup=name,
 187             name='{name}-trend'.format(name=name)
 188         )
 189         traces.append(trace_trend)
 190
 191     trace_anomalies = plgo.Scatter(
 192         x=anomalies.keys(),
 193         y=anomalies_avgs,
 194         mode='markers',
 195         hoverinfo="none",
 196         showlegend=True,
 197         legendgroup=name,
 198         name="{name}-anomalies".format(name=name),
 199         marker={
 200             "size": 15,
 201             "symbol": "circle-open",
 202             "color": anomalies_colors,
 203             "colorscale": [[0.00, "grey"],
 204                            [0.25, "grey"],
 205                            [0.25, "red"],
 206                            [0.50, "red"],
 207                            [0.50, "white"],
 208                            [0.75, "white"],
 209                            [0.75, "green"],
 210                            [1.00, "green"]],
 211             "showscale": True,
 212             "line": {
 213                 "width": 2
 214             },
 215             "colorbar": {
 216                 "y": 0.5,
 217                 "len": 0.8,
 218                 "title": "Circles Marking Data Classification",
 219                 "titleside": 'right',
 220                 "titlefont": {
 221                     "size": 14
 222                 },
 223                 "tickmode": 'array',
 224                 "tickvals": [0.125, 0.375, 0.625, 0.875],
 225                 "ticktext": ["Outlier", "Regression", "Normal", "Progression"],
 226                 "ticks": "",
 227                 "ticklen": 0,
 228                 "tickangle": -90,
 229                 "thickness": 10
 230             }
 231         }
 232     )
 233     traces.append(trace_anomalies)
 234
 235     if anomaly_classification:
 236         return traces, anomaly_classification[-1]
 237     else:
 238         return traces, None
 239
 240
 241 def _generate_all_charts(spec, input_data):
 242     """Generate all charts specified in the specification file.
 243
 244     :param spec: Specification.
 245     :param input_data: Full data set.
 246     :type spec: Specification
 247     :type input_data: InputData
 248     """
 249
 250     def _generate_chart(_, data_q, graph):
 251         """Generates the chart.
 252         """
 253
 254         logs = list()
 255
 256         logging.info("  Generating the chart '{0}' ...".
 257                      format(graph.get("title", "")))
 258         logs.append(("INFO", "  Generating the chart '{0}' ...".
 259                      format(graph.get("title", ""))))
 260
 261         job_name = graph["data"].keys()[0]
 262
 263         csv_tbl = list()
 264         res = list()
 265
 266         # Transform the data
 267         logs.append(("INFO", "    Creating the data set for the {0} '{1}'.".
 268                      format(graph.get("type", ""), graph.get("title", ""))))
 269         data = input_data.filter_data(graph, continue_on_error=True)
 270         if data is None:
 271             logging.error("No data.")
 272             return
 273
 274         chart_data = dict()
 275         for job, job_data in data.iteritems():
 276             if job != job_name:
 277                 continue
 278             for index, bld in job_data.items():
 279                 for test_name, test in bld.items():
 280                     if chart_data.get(test_name, None) is None:
 281                         chart_data[test_name] = OrderedDict()
 282                     try:
 283                         chart_data[test_name][int(index)] = \
 284                             test["result"]["throughput"]
 285                     except (KeyError, TypeError):
 286                         pass
 287
 288         # Add items to the csv table:
 289         for tst_name, tst_data in chart_data.items():
 290             tst_lst = list()
 291             for bld in builds_dict[job_name]:
 292                 itm = tst_data.get(int(bld), '')
 293                 tst_lst.append(str(itm))
 294             csv_tbl.append("{0},".format(tst_name) + ",".join(tst_lst) + '\n')
 295         # Generate traces:
 296         traces = list()
 297         win_size = 14
 298         index = 0
 299         for test_name, test_data in chart_data.items():
 300             if not test_data:
 301                 logs.append(("WARNING", "No data for the test '{0}'".
 302                              format(test_name)))
 303                 continue
 304             test_name = test_name.split('.')[-1]
 305             trace, rslt = _generate_trending_traces(
 306                 test_data,
 307                 job_name=job_name,
 308                 build_info=build_info,
 309                 name='-'.join(test_name.split('-')[3:-1]),
 310                 color=COLORS[index])
 311             traces.extend(trace)
 312             res.append(rslt)
 313             index += 1
 314
 315         if traces:
 316             # Generate the chart:
 317             graph["layout"]["xaxis"]["title"] = \
 318                 graph["layout"]["xaxis"]["title"].format(job=job_name)
 319             name_file = "{0}-{1}{2}".format(spec.cpta["output-file"],
 320                                             graph["output-file-name"],
 321                                             spec.cpta["output-file-type"])
 322
 323             logs.append(("INFO", "    Writing the file '{0}' ...".
 324                          format(name_file)))
 325             plpl = plgo.Figure(data=traces, layout=graph["layout"])
 326             try:
 327                 ploff.plot(plpl, show_link=False, auto_open=False,
 328                            filename=name_file)
 329             except plerr.PlotlyEmptyDataError:
 330                 logs.append(("WARNING", "No data for the plot. Skipped."))
 331
 332         data_out = {
 333             "job_name": job_name,
 334             "csv_table": csv_tbl,
 335             "results": res,
 336             "logs": logs
 337         }
 338         data_q.put(data_out)
 339
 340     builds_dict = dict()
 341     for job in spec.input["builds"].keys():
 342         if builds_dict.get(job, None) is None:
 343             builds_dict[job] = list()
 344         for build in spec.input["builds"][job]:
 345             status = build["status"]
 346             if status != "failed" and status != "not found":
 347                 builds_dict[job].append(str(build["build"]))
 348
 349     # Create "build ID": "date" dict:
 350     build_info = dict()
 351     for job_name, job_data in builds_dict.items():
 352         if build_info.get(job_name, None) is None:
 353             build_info[job_name] = OrderedDict()
 354         for build in job_data:
 355             build_info[job_name][build] = (
 356                 input_data.metadata(job_name, build).get("generated", ""),
 357                 input_data.metadata(job_name, build).get("version", "")
 358             )
 359
 360     work_queue = multiprocessing.JoinableQueue()
 361     manager = multiprocessing.Manager()
 362     data_queue = manager.Queue()
 363     cpus = multiprocessing.cpu_count()
 364
 365     workers = list()
 366     for cpu in range(cpus):
 367         worker = Worker(work_queue,
 368                         data_queue,
 369                         _generate_chart)
 370         worker.daemon = True
 371         worker.start()
 372         workers.append(worker)
 373         os.system("taskset -p -c {0} {1} > /dev/null 2>&1".
 374                   format(cpu, worker.pid))
 375
 376     for chart in spec.cpta["plots"]:
 377         work_queue.put((chart, ))
 378     work_queue.join()
 379
 380     anomaly_classifications = list()
 381
 382     # Create the header:
 383     csv_tables = dict()
 384     for job_name in builds_dict.keys():
 385         if csv_tables.get(job_name, None) is None:
 386             csv_tables[job_name] = list()
 387         header = "Build Number:," + ",".join(builds_dict[job_name]) + '\n'
 388         csv_tables[job_name].append(header)
 389         build_dates = [x[0] for x in build_info[job_name].values()]
 390         header = "Build Date:," + ",".join(build_dates) + '\n'
 391         csv_tables[job_name].append(header)
 392         versions = [x[1] for x in build_info[job_name].values()]
 393         header = "Version:," + ",".join(versions) + '\n'
 394         csv_tables[job_name].append(header)
 395
 396     while not data_queue.empty():
 397         result = data_queue.get()
 398
 399         anomaly_classifications.extend(result["results"])
 400         csv_tables[result["job_name"]].extend(result["csv_table"])
 401
 402         for item in result["logs"]:
 403             if item[0] == "INFO":
 404                 logging.info(item[1])
 405             elif item[0] == "ERROR":
 406                 logging.error(item[1])
 407             elif item[0] == "DEBUG":
 408                 logging.debug(item[1])
 409             elif item[0] == "CRITICAL":
 410                 logging.critical(item[1])
 411             elif item[0] == "WARNING":
 412                 logging.warning(item[1])
 413
 414     del data_queue
 415
 416     # Terminate all workers
 417     for worker in workers:
 418         worker.terminate()
 419         worker.join()
 420
 421     # Write the tables:
 422     for job_name, csv_table in csv_tables.items():
 423         file_name = spec.cpta["output-file"] + "-" + job_name + "-trending"
 424         with open("{0}.csv".format(file_name), 'w') as file_handler:
 425             file_handler.writelines(csv_table)
 426
 427         txt_table = None
 428         with open("{0}.csv".format(file_name), 'rb') as csv_file:
 429             csv_content = csv.reader(csv_file, delimiter=',', quotechar='"')
 430             line_nr = 0
 431             for row in csv_content:
 432                 if txt_table is None:
 433                     txt_table = prettytable.PrettyTable(row)
 434                 else:
 435                     if line_nr > 1:
 436                         for idx, item in enumerate(row):
 437                             try:
 438                                 row[idx] = str(round(float(item) / 1000000, 2))
 439                             except ValueError:
 440                                 pass
 441                     try:
 442                         txt_table.add_row(row)
 443                     except Exception as err:
 444                         logging.warning("Error occurred while generating TXT "
 445                                         "table:\n{0}".format(err))
 446                 line_nr += 1
 447             txt_table.align["Build Number:"] = "l"
 448         with open("{0}.txt".format(file_name), "w") as txt_file:
 449             txt_file.write(str(txt_table))
 450
 451     # Evaluate result:
 452     if anomaly_classifications:
 453         result = "PASS"
 454         for classification in anomaly_classifications:
 455             if classification == "regression" or classification == "outlier":
 456                 result = "FAIL"
 457                 break
 458     else:
 459         result = "FAIL"
 460
 461     logging.info("Partial results: {0}".format(anomaly_classifications))
 462     logging.info("Result: {0}".format(result))
 463
 464     return result