resources/tools/presentation/utils.py

   1 # Copyright (c) 2018 Cisco and/or its affiliates.
   2 # Licensed under the Apache License, Version 2.0 (the "License");
   3 # you may not use this file except in compliance with the License.
   4 # You may obtain a copy of the License at:
   5 #
   6 #     http://www.apache.org/licenses/LICENSE-2.0
   7 #
   8 # Unless required by applicable law or agreed to in writing, software
   9 # distributed under the License is distributed on an "AS IS" BASIS,
  10 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  11 # See the License for the specific language governing permissions and
  12 # limitations under the License.
  13
  14 """General purpose utilities.
  15 """
  16
  17 import multiprocessing
  18 import subprocess
  19 import math
  20 import numpy as np
  21 import logging
  22 import csv
  23 import prettytable
  24
  25 from os import walk, makedirs, environ
  26 from os.path import join, isdir
  27 from shutil import move, Error
  28 from datetime import datetime
  29 from pandas import Series
  30
  31 from errors import PresentationError
  32 from jumpavg.BitCountingClassifier import BitCountingClassifier
  33
  34
  35 def mean(items):
  36     """Calculate mean value from the items.
  37
  38     :param items: Mean value is calculated from these items.
  39     :type items: list
  40     :returns: MEan value.
  41     :rtype: float
  42     """
  43
  44     return float(sum(items)) / len(items)
  45
  46
  47 def stdev(items):
  48     """Calculate stdev from the items.
  49
  50     :param items: Stdev is calculated from these items.
  51     :type items: list
  52     :returns: Stdev.
  53     :rtype: float
  54     """
  55     return Series.std(Series(items))
  56
  57
  58 def relative_change(nr1, nr2):
  59     """Compute relative change of two values.
  60
  61     :param nr1: The first number.
  62     :param nr2: The second number.
  63     :type nr1: float
  64     :type nr2: float
  65     :returns: Relative change of nr1.
  66     :rtype: float
  67     """
  68
  69     return float(((nr2 - nr1) / nr1) * 100)
  70
  71
  72 def relative_change_stdev(mean1, mean2, std1, std2):
  73     """Compute relative standard deviation of change of two values.
  74
  75     The "1" values are the base for comparison.
  76     Results are returned as percentage (and percentual points for stdev).
  77     Linearized theory is used, so results are wrong for relatively large stdev.
  78
  79     :param mean1: Mean of the first number.
  80     :param mean2: Mean of the second number.
  81     :param std1: Standard deviation estimate of the first number.
  82     :param std2: Standard deviation estimate of the second number.
  83     :type mean1: float
  84     :type mean2: float
  85     :type std1: float
  86     :type std2: float
  87     :returns: Relative change and its stdev.
  88     :rtype: float
  89     """
  90     mean1, mean2 = float(mean1), float(mean2)
  91     quotient = mean2 / mean1
  92     first = std1 / mean1
  93     second = std2 / mean2
  94     std = quotient * math.sqrt(first * first + second * second)
  95     return (quotient - 1) * 100, std * 100
  96
  97
  98 def get_files(path, extension=None, full_path=True):
  99     """Generates the list of files to process.
 100
 101     :param path: Path to files.
 102     :param extension: Extension of files to process. If it is the empty string,
 103         all files will be processed.
 104     :param full_path: If True, the files with full path are generated.
 105     :type path: str
 106     :type extension: str
 107     :type full_path: bool
 108     :returns: List of files to process.
 109     :rtype: list
 110     """
 111
 112     file_list = list()
 113     for root, _, files in walk(path):
 114         for filename in files:
 115             if extension:
 116                 if filename.endswith(extension):
 117                     if full_path:
 118                         file_list.append(join(root, filename))
 119                     else:
 120                         file_list.append(filename)
 121             else:
 122                 file_list.append(join(root, filename))
 123
 124     return file_list
 125
 126
 127 def get_rst_title_char(level):
 128     """Return character used for the given title level in rst files.
 129
 130     :param level: Level of the title.
 131     :type: int
 132     :returns: Character used for the given title level in rst files.
 133     :rtype: str
 134     """
 135     chars = ('=', '-', '`', "'", '.', '~', '*', '+', '^')
 136     if level < len(chars):
 137         return chars[level]
 138     else:
 139         return chars[-1]
 140
 141
 142 def execute_command(cmd):
 143     """Execute the command in a subprocess and log the stdout and stderr.
 144
 145     :param cmd: Command to execute.
 146     :type cmd: str
 147     :returns: Return code of the executed command, stdout and stderr.
 148     :rtype: tuple(int, str, str)
 149     """
 150
 151     env = environ.copy()
 152     proc = subprocess.Popen(
 153         [cmd],
 154         stdout=subprocess.PIPE,
 155         stderr=subprocess.PIPE,
 156         shell=True,
 157         env=env)
 158
 159     stdout, stderr = proc.communicate()
 160
 161     if stdout:
 162         logging.info(stdout)
 163     if stderr:
 164         logging.info(stderr)
 165
 166     if proc.returncode != 0:
 167         logging.error("    Command execution failed.")
 168     return proc.returncode, stdout, stderr
 169
 170
 171 def get_last_successful_build_number(jenkins_url, job_name):
 172     """Get the number of the last successful build of the given job.
 173
 174     :param jenkins_url: Jenkins URL.
 175     :param job_name: Job name.
 176     :type jenkins_url: str
 177     :type job_name: str
 178     :returns: The build number as a string.
 179     :rtype: str
 180     """
 181
 182     url = "{}/{}/lastSuccessfulBuild/buildNumber".format(jenkins_url, job_name)
 183     cmd = "wget -qO- {url}".format(url=url)
 184
 185     return execute_command(cmd)
 186
 187
 188 def get_last_completed_build_number(jenkins_url, job_name):
 189     """Get the number of the last completed build of the given job.
 190
 191     :param jenkins_url: Jenkins URL.
 192     :param job_name: Job name.
 193     :type jenkins_url: str
 194     :type job_name: str
 195     :returns: The build number as a string.
 196     :rtype: str
 197     """
 198
 199     url = "{}/{}/lastCompletedBuild/buildNumber".format(jenkins_url, job_name)
 200     cmd = "wget -qO- {url}".format(url=url)
 201
 202     return execute_command(cmd)
 203
 204
 205 def get_build_timestamp(jenkins_url, job_name, build_nr):
 206     """Get the timestamp of the build of the given job.
 207
 208     :param jenkins_url: Jenkins URL.
 209     :param job_name: Job name.
 210     :param build_nr: Build number.
 211     :type jenkins_url: str
 212     :type job_name: str
 213     :type build_nr: int
 214     :returns: The timestamp.
 215     :rtype: datetime.datetime
 216     """
 217
 218     url = "{jenkins_url}/{job_name}/{build_nr}".format(jenkins_url=jenkins_url,
 219                                                        job_name=job_name,
 220                                                        build_nr=build_nr)
 221     cmd = "wget -qO- {url}".format(url=url)
 222
 223     timestamp = execute_command(cmd)
 224
 225     return datetime.fromtimestamp(timestamp/1000)
 226
 227
 228 def archive_input_data(spec):
 229     """Archive the report.
 230
 231     :param spec: Specification read from the specification file.
 232     :type spec: Specification
 233     :raises PresentationError: If it is not possible to archive the input data.
 234     """
 235
 236     logging.info("    Archiving the input data files ...")
 237
 238     extension = spec.input["file-format"]
 239     data_files = get_files(spec.environment["paths"]["DIR[WORKING,DATA]"],
 240                            extension=extension)
 241     dst = spec.environment["paths"]["DIR[STATIC,ARCH]"]
 242     logging.info("      Destination: {0}".format(dst))
 243
 244     try:
 245         if not isdir(dst):
 246             makedirs(dst)
 247
 248         for data_file in data_files:
 249             logging.info("      Moving the file: {0} ...".format(data_file))
 250             move(data_file, dst)
 251
 252     except (Error, OSError) as err:
 253         raise PresentationError("Not possible to archive the input data.",
 254                                 str(err))
 255
 256     logging.info("    Done.")
 257
 258
 259 def classify_anomalies(data):
 260     """Process the data and return anomalies and trending values.
 261
 262     Gather data into groups with average as trend value.
 263     Decorate values within groups to be normal,
 264     the first value of changed average as a regression, or a progression.
 265
 266     :param data: Full data set with unavailable samples replaced by nan.
 267     :type data: OrderedDict
 268     :returns: Classification and trend values
 269     :rtype: 2-tuple, list of strings and list of floats
 270     """
 271     # Nan mean something went wrong.
 272     # Use 0.0 to cause that being reported as a severe regression.
 273     bare_data = [0.0 if np.isnan(sample.avg) else sample
 274                  for _, sample in data.iteritems()]
 275     # TODO: Put analogous iterator into jumpavg library.
 276     groups = BitCountingClassifier().classify(bare_data)
 277     groups.reverse()  # Just to use .pop() for FIFO.
 278     classification = []
 279     avgs = []
 280     active_group = None
 281     values_left = 0
 282     avg = 0.0
 283     for _, sample in data.iteritems():
 284         if np.isnan(sample.avg):
 285             classification.append("outlier")
 286             avgs.append(sample.avg)
 287             continue
 288         if values_left < 1 or active_group is None:
 289             values_left = 0
 290             while values_left < 1:  # Ignore empty groups (should not happen).
 291                 active_group = groups.pop()
 292                 values_left = len(active_group.values)
 293             avg = active_group.metadata.avg
 294             classification.append(active_group.metadata.classification)
 295             avgs.append(avg)
 296             values_left -= 1
 297             continue
 298         classification.append("normal")
 299         avgs.append(avg)
 300         values_left -= 1
 301     return classification, avgs
 302
 303
 304 def convert_csv_to_pretty_txt(csv_file, txt_file):
 305     """Convert the given csv table to pretty text table.
 306
 307     :param csv_file: The path to the input csv file.
 308     :param txt_file: The path to the output pretty text file.
 309     :type csv_file: str
 310     :type txt_file: str
 311     """
 312
 313     txt_table = None
 314     with open(csv_file, 'rb') as csv_file:
 315         csv_content = csv.reader(csv_file, delimiter=',', quotechar='"')
 316         for row in csv_content:
 317             if txt_table is None:
 318                 txt_table = prettytable.PrettyTable(row)
 319             else:
 320                 txt_table.add_row(row)
 321         txt_table.align["Test case"] = "l"
 322     if txt_table:
 323         with open(txt_file, "w") as txt_file:
 324             txt_file.write(str(txt_table))
 325
 326
 327 class Worker(multiprocessing.Process):
 328     """Worker class used to process tasks in separate parallel processes.
 329     """
 330
 331     def __init__(self, work_queue, data_queue, func):
 332         """Initialization.
 333
 334         :param work_queue: Queue with items to process.
 335         :param data_queue: Shared memory between processes. Queue which keeps
 336             the result data. This data is then read by the main process and used
 337             in further processing.
 338         :param func: Function which is executed by the worker.
 339         :type work_queue: multiprocessing.JoinableQueue
 340         :type data_queue: multiprocessing.Manager().Queue()
 341         :type func: Callable object
 342         """
 343         super(Worker, self).__init__()
 344         self._work_queue = work_queue
 345         self._data_queue = data_queue
 346         self._func = func
 347
 348     def run(self):
 349         """Method representing the process's activity.
 350         """
 351
 352         while True:
 353             try:
 354                 self.process(self._work_queue.get())
 355             finally:
 356                 self._work_queue.task_done()
 357
 358     def process(self, item_to_process):
 359         """Method executed by the runner.
 360
 361         :param item_to_process: Data to be processed by the function.
 362         :type item_to_process: tuple
 363         """
 364         self._func(self.pid, self._data_queue, *item_to_process)