resources/tools/presentation/generator_CPTA.py

   1 # Copyright (c) 2018 Cisco and/or its affiliates.
   2 # Licensed under the Apache License, Version 2.0 (the "License");
   3 # you may not use this file except in compliance with the License.
   4 # You may obtain a copy of the License at:
   5 #
   6 #     http://www.apache.org/licenses/LICENSE-2.0
   7 #
   8 # Unless required by applicable law or agreed to in writing, software
   9 # distributed under the License is distributed on an "AS IS" BASIS,
  10 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  11 # See the License for the specific language governing permissions and
  12 # limitations under the License.
  13
  14 """Generation of Continuous Performance Trending and Analysis.
  15 """
  16
  17 import multiprocessing
  18 import os
  19 import logging
  20 import csv
  21 import prettytable
  22 import plotly.offline as ploff
  23 import plotly.graph_objs as plgo
  24 import plotly.exceptions as plerr
  25
  26 from collections import OrderedDict
  27 from datetime import datetime
  28
  29 from utils import archive_input_data, execute_command, \
  30     classify_anomalies, Worker
  31
  32
  33 # Command to build the html format of the report
  34 HTML_BUILDER = 'sphinx-build -v -c conf_cpta -a ' \
  35                '-b html -E ' \
  36                '-t html ' \
  37                '-D version="{date}" ' \
  38                '{working_dir} ' \
  39                '{build_dir}/'
  40
  41 # .css file for the html format of the report
  42 THEME_OVERRIDES = """/* override table width restrictions */
  43 .wy-nav-content {
  44     max-width: 1200px !important;
  45 }
  46 """
  47
  48 COLORS = ["SkyBlue", "Olive", "Purple", "Coral", "Indigo", "Pink",
  49           "Chocolate", "Brown", "Magenta", "Cyan", "Orange", "Black",
  50           "Violet", "Blue", "Yellow"]
  51
  52
  53 def generate_cpta(spec, data):
  54     """Generate all formats and versions of the Continuous Performance Trending
  55     and Analysis.
  56
  57     :param spec: Specification read from the specification file.
  58     :param data: Full data set.
  59     :type spec: Specification
  60     :type data: InputData
  61     """
  62
  63     logging.info("Generating the Continuous Performance Trending and Analysis "
  64                  "...")
  65
  66     ret_code = _generate_all_charts(spec, data)
  67
  68     cmd = HTML_BUILDER.format(
  69         date=datetime.utcnow().strftime('%m/%d/%Y %H:%M UTC'),
  70         working_dir=spec.environment["paths"]["DIR[WORKING,SRC]"],
  71         build_dir=spec.environment["paths"]["DIR[BUILD,HTML]"])
  72     execute_command(cmd)
  73
  74     with open(spec.environment["paths"]["DIR[CSS_PATCH_FILE]"], "w") as \
  75             css_file:
  76         css_file.write(THEME_OVERRIDES)
  77
  78     with open(spec.environment["paths"]["DIR[CSS_PATCH_FILE2]"], "w") as \
  79             css_file:
  80         css_file.write(THEME_OVERRIDES)
  81
  82     archive_input_data(spec)
  83
  84     logging.info("Done.")
  85
  86     return ret_code
  87
  88
  89 def _generate_trending_traces(in_data, job_name, build_info,
  90                               show_trend_line=True, name="", color=""):
  91     """Generate the trending traces:
  92      - samples,
  93      - outliers, regress, progress
  94      - average of normal samples (trending line)
  95
  96     :param in_data: Full data set.
  97     :param job_name: The name of job which generated the data.
  98     :param build_info: Information about the builds.
  99     :param show_trend_line: Show moving median (trending plot).
 100     :param name: Name of the plot
 101     :param color: Name of the color for the plot.
 102     :type in_data: OrderedDict
 103     :type job_name: str
 104     :type build_info: dict
 105     :type show_trend_line: bool
 106     :type name: str
 107     :type color: str
 108     :returns: Generated traces (list) and the evaluated result.
 109     :rtype: tuple(traces, result)
 110     """
 111
 112     data_x = list(in_data.keys())
 113     data_y = list(in_data.values())
 114
 115     hover_text = list()
 116     xaxis = list()
 117     for idx in data_x:
 118         date = build_info[job_name][str(idx)][0]
 119         hover_str = ("date: {0}<br>"
 120                      "value: {1:,}<br>"
 121                      "{2}-ref: {3}<br>"
 122                      "csit-ref: mrr-{4}-build-{5}")
 123         if "dpdk" in job_name:
 124             hover_text.append(hover_str.format(
 125                 date,
 126                 int(in_data[idx].avg),
 127                 "dpdk",
 128                 build_info[job_name][str(idx)][1].
 129                 rsplit('~', 1)[0],
 130                 "weekly",
 131                 idx))
 132         elif "vpp" in job_name:
 133             hover_text.append(hover_str.format(
 134                 date,
 135                 int(in_data[idx].avg),
 136                 "vpp",
 137                 build_info[job_name][str(idx)][1].
 138                 rsplit('~', 1)[0],
 139                 "daily",
 140                 idx))
 141
 142         xaxis.append(datetime(int(date[0:4]), int(date[4:6]), int(date[6:8]),
 143                               int(date[9:11]), int(date[12:])))
 144
 145     data_pd = OrderedDict()
 146     for key, value in zip(xaxis, data_y):
 147         data_pd[key] = value
 148
 149     anomaly_classification, avgs = classify_anomalies(data_pd)
 150
 151     anomalies = OrderedDict()
 152     anomalies_colors = list()
 153     anomalies_avgs = list()
 154     anomaly_color = {
 155         "regression": 0.0,
 156         "normal": 0.5,
 157         "progression": 1.0
 158     }
 159     if anomaly_classification:
 160         for idx, (key, value) in enumerate(data_pd.iteritems()):
 161             if anomaly_classification[idx] in \
 162                     ("outlier", "regression", "progression"):
 163                 anomalies[key] = value
 164                 anomalies_colors.append(
 165                     anomaly_color[anomaly_classification[idx]])
 166                 anomalies_avgs.append(avgs[idx])
 167         anomalies_colors.extend([0.0, 0.5, 1.0])
 168
 169     # Create traces
 170
 171     trace_samples = plgo.Scatter(
 172         x=xaxis,
 173         y=[y.avg for y in data_y],
 174         mode='markers',
 175         line={
 176             "width": 1
 177         },
 178         showlegend=True,
 179         legendgroup=name,
 180         name="{name}".format(name=name),
 181         marker={
 182             "size": 5,
 183             "color": color,
 184             "symbol": "circle",
 185         },
 186         text=hover_text,
 187         hoverinfo="text"
 188     )
 189     traces = [trace_samples, ]
 190
 191     if show_trend_line:
 192         trace_trend = plgo.Scatter(
 193             x=xaxis,
 194             y=avgs,
 195             mode='lines',
 196             line={
 197                 "shape": "linear",
 198                 "width": 1,
 199                 "color": color,
 200             },
 201             showlegend=False,
 202             legendgroup=name,
 203             name='{name}'.format(name=name),
 204             text=["trend: {0:,}".format(int(avg)) for avg in avgs],
 205             hoverinfo="text+name"
 206         )
 207         traces.append(trace_trend)
 208
 209     trace_anomalies = plgo.Scatter(
 210         x=anomalies.keys(),
 211         y=anomalies_avgs,
 212         mode='markers',
 213         hoverinfo="none",
 214         showlegend=False,
 215         legendgroup=name,
 216         name="{name}-anomalies".format(name=name),
 217         marker={
 218             "size": 15,
 219             "symbol": "circle-open",
 220             "color": anomalies_colors,
 221             "colorscale": [[0.00, "red"],
 222                            [0.33, "red"],
 223                            [0.33, "white"],
 224                            [0.66, "white"],
 225                            [0.66, "green"],
 226                            [1.00, "green"]],
 227             "showscale": True,
 228             "line": {
 229                 "width": 2
 230             },
 231             "colorbar": {
 232                 "y": 0.5,
 233                 "len": 0.8,
 234                 "title": "Circles Marking Data Classification",
 235                 "titleside": 'right',
 236                 "titlefont": {
 237                     "size": 14
 238                 },
 239                 "tickmode": 'array',
 240                 "tickvals": [0.167, 0.500, 0.833],
 241                 "ticktext": ["Regression", "Normal", "Progression"],
 242                 "ticks": "",
 243                 "ticklen": 0,
 244                 "tickangle": -90,
 245                 "thickness": 10
 246             }
 247         }
 248     )
 249     traces.append(trace_anomalies)
 250
 251     if anomaly_classification:
 252         return traces, anomaly_classification[-1]
 253     else:
 254         return traces, None
 255
 256
 257 def _generate_all_charts(spec, input_data):
 258     """Generate all charts specified in the specification file.
 259
 260     :param spec: Specification.
 261     :param input_data: Full data set.
 262     :type spec: Specification
 263     :type input_data: InputData
 264     """
 265
 266     def _generate_chart(_, data_q, graph):
 267         """Generates the chart.
 268         """
 269
 270         logs = list()
 271
 272         logging.info("  Generating the chart '{0}' ...".
 273                      format(graph.get("title", "")))
 274         logs.append(("INFO", "  Generating the chart '{0}' ...".
 275                      format(graph.get("title", ""))))
 276
 277         job_name = graph["data"].keys()[0]
 278
 279         csv_tbl = list()
 280         res = list()
 281
 282         # Transform the data
 283         logs.append(("INFO", "    Creating the data set for the {0} '{1}'.".
 284                      format(graph.get("type", ""), graph.get("title", ""))))
 285         data = input_data.filter_data(graph, continue_on_error=True)
 286         if data is None:
 287             logging.error("No data.")
 288             return
 289
 290         chart_data = dict()
 291         for job, job_data in data.iteritems():
 292             if job != job_name:
 293                 continue
 294             for index, bld in job_data.items():
 295                 for test_name, test in bld.items():
 296                     if chart_data.get(test_name, None) is None:
 297                         chart_data[test_name] = OrderedDict()
 298                     try:
 299                         chart_data[test_name][int(index)] = \
 300                             test["result"]["receive-rate"]
 301                     except (KeyError, TypeError):
 302                         pass
 303
 304         # Add items to the csv table:
 305         for tst_name, tst_data in chart_data.items():
 306             tst_lst = list()
 307             for bld in builds_dict[job_name]:
 308                 itm = tst_data.get(int(bld), '')
 309                 if not isinstance(itm, str):
 310                     itm = itm.avg
 311                 tst_lst.append(str(itm))
 312             csv_tbl.append("{0},".format(tst_name) + ",".join(tst_lst) + '\n')
 313         # Generate traces:
 314         traces = list()
 315         index = 0
 316         for test_name, test_data in chart_data.items():
 317             if not test_data:
 318                 logs.append(("WARNING", "No data for the test '{0}'".
 319                              format(test_name)))
 320                 continue
 321             message = "index: {index}, test: {test}".format(
 322                 index=index, test=test_name)
 323             test_name = test_name.split('.')[-1]
 324             try:
 325                 trace, rslt = _generate_trending_traces(
 326                     test_data,
 327                     job_name=job_name,
 328                     build_info=build_info,
 329                     name='-'.join(test_name.split('-')[2:-1]),
 330                     color=COLORS[index])
 331             except IndexError:
 332                 message = "Out of colors: {}".format(message)
 333                 logs.append(("ERROR", message))
 334                 logging.error(message)
 335                 index += 1
 336                 continue
 337             traces.extend(trace)
 338             res.append(rslt)
 339             index += 1
 340
 341         if traces:
 342             # Generate the chart:
 343             graph["layout"]["xaxis"]["title"] = \
 344                 graph["layout"]["xaxis"]["title"].format(job=job_name)
 345             name_file = "{0}-{1}{2}".format(spec.cpta["output-file"],
 346                                             graph["output-file-name"],
 347                                             spec.cpta["output-file-type"])
 348
 349             logs.append(("INFO", "    Writing the file '{0}' ...".
 350                          format(name_file)))
 351             plpl = plgo.Figure(data=traces, layout=graph["layout"])
 352             try:
 353                 ploff.plot(plpl, show_link=False, auto_open=False,
 354                            filename=name_file)
 355             except plerr.PlotlyEmptyDataError:
 356                 logs.append(("WARNING", "No data for the plot. Skipped."))
 357
 358         data_out = {
 359             "job_name": job_name,
 360             "csv_table": csv_tbl,
 361             "results": res,
 362             "logs": logs
 363         }
 364         data_q.put(data_out)
 365
 366     builds_dict = dict()
 367     for job in spec.input["builds"].keys():
 368         if builds_dict.get(job, None) is None:
 369             builds_dict[job] = list()
 370         for build in spec.input["builds"][job]:
 371             status = build["status"]
 372             if status != "failed" and status != "not found":
 373                 builds_dict[job].append(str(build["build"]))
 374
 375     # Create "build ID": "date" dict:
 376     build_info = dict()
 377     for job_name, job_data in builds_dict.items():
 378         if build_info.get(job_name, None) is None:
 379             build_info[job_name] = OrderedDict()
 380         for build in job_data:
 381             build_info[job_name][build] = (
 382                 input_data.metadata(job_name, build).get("generated", ""),
 383                 input_data.metadata(job_name, build).get("version", "")
 384             )
 385
 386     work_queue = multiprocessing.JoinableQueue()
 387     manager = multiprocessing.Manager()
 388     data_queue = manager.Queue()
 389     cpus = multiprocessing.cpu_count()
 390
 391     workers = list()
 392     for cpu in range(cpus):
 393         worker = Worker(work_queue,
 394                         data_queue,
 395                         _generate_chart)
 396         worker.daemon = True
 397         worker.start()
 398         workers.append(worker)
 399         os.system("taskset -p -c {0} {1} > /dev/null 2>&1".
 400                   format(cpu, worker.pid))
 401
 402     for chart in spec.cpta["plots"]:
 403         work_queue.put((chart, ))
 404     work_queue.join()
 405
 406     anomaly_classifications = list()
 407
 408     # Create the header:
 409     csv_tables = dict()
 410     for job_name in builds_dict.keys():
 411         if csv_tables.get(job_name, None) is None:
 412             csv_tables[job_name] = list()
 413         header = "Build Number:," + ",".join(builds_dict[job_name]) + '\n'
 414         csv_tables[job_name].append(header)
 415         build_dates = [x[0] for x in build_info[job_name].values()]
 416         header = "Build Date:," + ",".join(build_dates) + '\n'
 417         csv_tables[job_name].append(header)
 418         versions = [x[1] for x in build_info[job_name].values()]
 419         header = "Version:," + ",".join(versions) + '\n'
 420         csv_tables[job_name].append(header)
 421
 422     while not data_queue.empty():
 423         result = data_queue.get()
 424
 425         anomaly_classifications.extend(result["results"])
 426         csv_tables[result["job_name"]].extend(result["csv_table"])
 427
 428         for item in result["logs"]:
 429             if item[0] == "INFO":
 430                 logging.info(item[1])
 431             elif item[0] == "ERROR":
 432                 logging.error(item[1])
 433             elif item[0] == "DEBUG":
 434                 logging.debug(item[1])
 435             elif item[0] == "CRITICAL":
 436                 logging.critical(item[1])
 437             elif item[0] == "WARNING":
 438                 logging.warning(item[1])
 439
 440     del data_queue
 441
 442     # Terminate all workers
 443     for worker in workers:
 444         worker.terminate()
 445         worker.join()
 446
 447     # Write the tables:
 448     for job_name, csv_table in csv_tables.items():
 449         file_name = spec.cpta["output-file"] + "-" + job_name + "-trending"
 450         with open("{0}.csv".format(file_name), 'w') as file_handler:
 451             file_handler.writelines(csv_table)
 452
 453         txt_table = None
 454         with open("{0}.csv".format(file_name), 'rb') as csv_file:
 455             csv_content = csv.reader(csv_file, delimiter=',', quotechar='"')
 456             line_nr = 0
 457             for row in csv_content:
 458                 if txt_table is None:
 459                     txt_table = prettytable.PrettyTable(row)
 460                 else:
 461                     if line_nr > 1:
 462                         for idx, item in enumerate(row):
 463                             try:
 464                                 row[idx] = str(round(float(item) / 1000000, 2))
 465                             except ValueError:
 466                                 pass
 467                     try:
 468                         txt_table.add_row(row)
 469                     except Exception as err:
 470                         logging.warning("Error occurred while generating TXT "
 471                                         "table:\n{0}".format(err))
 472                 line_nr += 1
 473             txt_table.align["Build Number:"] = "l"
 474         with open("{0}.txt".format(file_name), "w") as txt_file:
 475             txt_file.write(str(txt_table))
 476
 477     # Evaluate result:
 478     if anomaly_classifications:
 479         result = "PASS"
 480         for classification in anomaly_classifications:
 481             if classification == "regression" or classification == "outlier":
 482                 result = "FAIL"
 483                 break
 484     else:
 485         result = "FAIL"
 486
 487     logging.info("Partial results: {0}".format(anomaly_classifications))
 488     logging.info("Result: {0}".format(result))
 489
 490     return result