resources/tools/presentation/utils.py

   1 # Copyright (c) 2018 Cisco and/or its affiliates.
   2 # Licensed under the Apache License, Version 2.0 (the "License");
   3 # you may not use this file except in compliance with the License.
   4 # You may obtain a copy of the License at:
   5 #
   6 #     http://www.apache.org/licenses/LICENSE-2.0
   7 #
   8 # Unless required by applicable law or agreed to in writing, software
   9 # distributed under the License is distributed on an "AS IS" BASIS,
  10 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  11 # See the License for the specific language governing permissions and
  12 # limitations under the License.
  13
  14 """General purpose utilities.
  15 """
  16
  17 import multiprocessing
  18 import subprocess
  19 import numpy as np
  20 import pandas as pd
  21 import logging
  22 import csv
  23 import prettytable
  24
  25 from os import walk, makedirs, environ
  26 from os.path import join, isdir
  27 from shutil import move, Error
  28 from math import sqrt
  29
  30 from errors import PresentationError
  31
  32
  33 def mean(items):
  34     """Calculate mean value from the items.
  35
  36     :param items: Mean value is calculated from these items.
  37     :type items: list
  38     :returns: MEan value.
  39     :rtype: float
  40     """
  41
  42     return float(sum(items)) / len(items)
  43
  44
  45 def stdev(items):
  46     """Calculate stdev from the items.
  47
  48     :param items: Stdev is calculated from these items.
  49     :type items: list
  50     :returns: Stdev.
  51     :rtype: float
  52     """
  53
  54     avg = mean(items)
  55     variance = [(x - avg) ** 2 for x in items]
  56     stddev = sqrt(mean(variance))
  57     return stddev
  58
  59
  60 def relative_change(nr1, nr2):
  61     """Compute relative change of two values.
  62
  63     :param nr1: The first number.
  64     :param nr2: The second number.
  65     :type nr1: float
  66     :type nr2: float
  67     :returns: Relative change of nr1.
  68     :rtype: float
  69     """
  70
  71     return float(((nr2 - nr1) / nr1) * 100)
  72
  73
  74 def remove_outliers(input_list, outlier_const=1.5, window=14):
  75     """Return list with outliers removed, using split_outliers.
  76
  77     :param input_list: Data from which the outliers will be removed.
  78     :param outlier_const: Outlier constant.
  79     :param window: How many preceding values to take into account.
  80     :type input_list: list of floats
  81     :type outlier_const: float
  82     :type window: int
  83     :returns: The input list without outliers.
  84     :rtype: list of floats
  85     """
  86
  87     data = np.array(input_list)
  88     upper_quartile = np.percentile(data, 75)
  89     lower_quartile = np.percentile(data, 25)
  90     iqr = (upper_quartile - lower_quartile) * outlier_const
  91     quartile_set = (lower_quartile - iqr, upper_quartile + iqr)
  92     result_lst = list()
  93     for y in input_list:
  94         if quartile_set[0] <= y <= quartile_set[1]:
  95             result_lst.append(y)
  96     return result_lst
  97
  98
  99 def split_outliers(input_series, outlier_const=1.5, window=14):
 100     """Go through the input data and generate two pandas series:
 101     - input data with outliers replaced by NAN
 102     - outliers.
 103     The function uses IQR to detect outliers.
 104
 105     :param input_series: Data to be examined for outliers.
 106     :param outlier_const: Outlier constant.
 107     :param window: How many preceding values to take into account.
 108     :type input_series: pandas.Series
 109     :type outlier_const: float
 110     :type window: int
 111     :returns: Input data with NAN outliers and Outliers.
 112     :rtype: (pandas.Series, pandas.Series)
 113     """
 114
 115     list_data = list(input_series.items())
 116     head_size = min(window, len(list_data))
 117     head_list = list_data[:head_size]
 118     trimmed_data = pd.Series()
 119     outliers = pd.Series()
 120     for item_x, item_y in head_list:
 121         item_pd = pd.Series([item_y, ], index=[item_x, ])
 122         trimmed_data = trimmed_data.append(item_pd)
 123     for index, (item_x, item_y) in list(enumerate(list_data))[head_size:]:
 124         y_rolling_list = [y for (x, y) in list_data[index - head_size:index]]
 125         y_rolling_array = np.array(y_rolling_list)
 126         q1 = np.percentile(y_rolling_array, 25)
 127         q3 = np.percentile(y_rolling_array, 75)
 128         iqr = (q3 - q1) * outlier_const
 129         low = q1 - iqr
 130         item_pd = pd.Series([item_y, ], index=[item_x, ])
 131         if low <= item_y:
 132             trimmed_data = trimmed_data.append(item_pd)
 133         else:
 134             outliers = outliers.append(item_pd)
 135             nan_pd = pd.Series([np.nan, ], index=[item_x, ])
 136             trimmed_data = trimmed_data.append(nan_pd)
 137
 138     return trimmed_data, outliers
 139
 140
 141 def get_files(path, extension=None, full_path=True):
 142     """Generates the list of files to process.
 143
 144     :param path: Path to files.
 145     :param extension: Extension of files to process. If it is the empty string,
 146         all files will be processed.
 147     :param full_path: If True, the files with full path are generated.
 148     :type path: str
 149     :type extension: str
 150     :type full_path: bool
 151     :returns: List of files to process.
 152     :rtype: list
 153     """
 154
 155     file_list = list()
 156     for root, _, files in walk(path):
 157         for filename in files:
 158             if extension:
 159                 if filename.endswith(extension):
 160                     if full_path:
 161                         file_list.append(join(root, filename))
 162                     else:
 163                         file_list.append(filename)
 164             else:
 165                 file_list.append(join(root, filename))
 166
 167     return file_list
 168
 169
 170 def get_rst_title_char(level):
 171     """Return character used for the given title level in rst files.
 172
 173     :param level: Level of the title.
 174     :type: int
 175     :returns: Character used for the given title level in rst files.
 176     :rtype: str
 177     """
 178     chars = ('=', '-', '`', "'", '.', '~', '*', '+', '^')
 179     if level < len(chars):
 180         return chars[level]
 181     else:
 182         return chars[-1]
 183
 184
 185 def execute_command(cmd):
 186     """Execute the command in a subprocess and log the stdout and stderr.
 187
 188     :param cmd: Command to execute.
 189     :type cmd: str
 190     :returns: Return code of the executed command.
 191     :rtype: int
 192     """
 193
 194     env = environ.copy()
 195     proc = subprocess.Popen(
 196         [cmd],
 197         stdout=subprocess.PIPE,
 198         stderr=subprocess.PIPE,
 199         shell=True,
 200         env=env)
 201
 202     stdout, stderr = proc.communicate()
 203
 204     if stdout:
 205         logging.info(stdout)
 206     if stderr:
 207         logging.info(stderr)
 208
 209     if proc.returncode != 0:
 210         logging.error("    Command execution failed.")
 211     return proc.returncode, stdout, stderr
 212
 213
 214 def get_last_successful_build_number(jenkins_url, job_name):
 215     """Get the number of the last successful build of the given job.
 216
 217     :param jenkins_url: Jenkins URL.
 218     :param job_name: Job name.
 219     :type jenkins_url: str
 220     :type job_name: str
 221     :returns: The build number as a string.
 222     :rtype: str
 223     """
 224
 225     url = "{}/{}/lastSuccessfulBuild/buildNumber".format(jenkins_url, job_name)
 226     cmd = "wget -qO- {url}".format(url=url)
 227
 228     return execute_command(cmd)
 229
 230
 231 def get_last_completed_build_number(jenkins_url, job_name):
 232     """Get the number of the last completed build of the given job.
 233
 234     :param jenkins_url: Jenkins URL.
 235     :param job_name: Job name.
 236     :type jenkins_url: str
 237     :type job_name: str
 238     :returns: The build number as a string.
 239     :rtype: str
 240     """
 241
 242     url = "{}/{}/lastCompletedBuild/buildNumber".format(jenkins_url, job_name)
 243     cmd = "wget -qO- {url}".format(url=url)
 244
 245     return execute_command(cmd)
 246
 247
 248 def archive_input_data(spec):
 249     """Archive the report.
 250
 251     :param spec: Specification read from the specification file.
 252     :type spec: Specification
 253     :raises PresentationError: If it is not possible to archive the input data.
 254     """
 255
 256     logging.info("    Archiving the input data files ...")
 257
 258     extension = spec.input["file-format"]
 259     data_files = get_files(spec.environment["paths"]["DIR[WORKING,DATA]"],
 260                            extension=extension)
 261     dst = spec.environment["paths"]["DIR[STATIC,ARCH]"]
 262     logging.info("      Destination: {0}".format(dst))
 263
 264     try:
 265         if not isdir(dst):
 266             makedirs(dst)
 267
 268         for data_file in data_files:
 269             logging.info("      Moving the file: {0} ...".format(data_file))
 270             move(data_file, dst)
 271
 272     except (Error, OSError) as err:
 273         raise PresentationError("Not possible to archive the input data.",
 274                                 str(err))
 275
 276     logging.info("    Done.")
 277
 278
 279 def classify_anomalies(data, window):
 280     """Evaluates if the sample value is an outlier, regression, normal or
 281     progression compared to the previous data within the window.
 282     We use the intervals defined as:
 283     - regress: less than trimmed moving median - 3 * stdev
 284     - normal: between trimmed moving median - 3 * stdev and median + 3 * stdev
 285     - progress: more than trimmed moving median + 3 * stdev
 286     where stdev is trimmed moving standard deviation.
 287
 288     :param data: Full data set with the outliers replaced by nan.
 289     :param window: Window size used to calculate moving average and moving
 290         stdev.
 291     :type data: pandas.Series
 292     :type window: int
 293     :returns: Evaluated results.
 294     :rtype: list
 295     """
 296
 297     if data.size < 3:
 298         return None
 299
 300     win_size = data.size if data.size < window else window
 301     tmm = data.rolling(window=win_size, min_periods=2).median()
 302     tmstd = data.rolling(window=win_size, min_periods=2).std()
 303
 304     classification = ["normal", ]
 305     first = True
 306     for build, value in data.iteritems():
 307         if first:
 308             first = False
 309             continue
 310         if np.isnan(value) or np.isnan(tmm[build]) or np.isnan(tmstd[build]):
 311             classification.append("outlier")
 312         elif value < (tmm[build] - 3 * tmstd[build]):
 313             classification.append("regression")
 314         elif value > (tmm[build] + 3 * tmstd[build]):
 315             classification.append("progression")
 316         else:
 317             classification.append("normal")
 318     return classification
 319
 320
 321 def convert_csv_to_pretty_txt(csv_file, txt_file):
 322     """Convert the given csv table to pretty text table.
 323
 324     :param csv_file: The path to the input csv file.
 325     :param txt_file: The path to the output pretty text file.
 326     :type csv_file: str
 327     :type txt_file: str
 328     """
 329
 330     txt_table = None
 331     with open(csv_file, 'rb') as csv_file:
 332         csv_content = csv.reader(csv_file, delimiter=',', quotechar='"')
 333         for row in csv_content:
 334             if txt_table is None:
 335                 txt_table = prettytable.PrettyTable(row)
 336             else:
 337                 txt_table.add_row(row)
 338         txt_table.align["Test case"] = "l"
 339     if txt_table:
 340         with open(txt_file, "w") as txt_file:
 341             txt_file.write(str(txt_table))
 342
 343
 344 class Worker(multiprocessing.Process):
 345     """Worker class used to process tasks in separate parallel processes.
 346     """
 347
 348     def __init__(self, work_queue, data_queue, func):
 349         """Initialization.
 350
 351         :param work_queue: Queue with items to process.
 352         :param data_queue: Shared memory between processes. Queue which keeps
 353             the result data. This data is then read by the main process and used
 354             in further processing.
 355         :param func: Function which is executed by the worker.
 356         :type work_queue: multiprocessing.JoinableQueue
 357         :type data_queue: multiprocessing.Manager().Queue()
 358         :type func: Callable object
 359         """
 360         super(Worker, self).__init__()
 361         self._work_queue = work_queue
 362         self._data_queue = data_queue
 363         self._func = func
 364
 365     def run(self):
 366         """Method representing the process's activity.
 367         """
 368
 369         while True:
 370             try:
 371                 self.process(self._work_queue.get())
 372             finally:
 373                 self._work_queue.task_done()
 374
 375     def process(self, item_to_process):
 376         """Method executed by the runner.
 377
 378         :param item_to_process: Data to be processed by the function.
 379         :type item_to_process: tuple
 380         """
 381         self._func(self.pid, self._data_queue, *item_to_process)