resources/tools/presentation/utils.py

   1 # Copyright (c) 2017 Cisco and/or its affiliates.
   2 # Licensed under the Apache License, Version 2.0 (the "License");
   3 # you may not use this file except in compliance with the License.
   4 # You may obtain a copy of the License at:
   5 #
   6 #     http://www.apache.org/licenses/LICENSE-2.0
   7 #
   8 # Unless required by applicable law or agreed to in writing, software
   9 # distributed under the License is distributed on an "AS IS" BASIS,
  10 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  11 # See the License for the specific language governing permissions and
  12 # limitations under the License.
  13
  14 """General purpose utilities.
  15 """
  16
  17 import subprocess
  18 import numpy as np
  19 import pandas as pd
  20 import logging
  21
  22 from os import walk, makedirs, environ
  23 from os.path import join, isdir
  24 from shutil import move, Error
  25 from math import sqrt
  26
  27 from errors import PresentationError
  28
  29
  30 def mean(items):
  31     """Calculate mean value from the items.
  32
  33     :param items: Mean value is calculated from these items.
  34     :type items: list
  35     :returns: MEan value.
  36     :rtype: float
  37     """
  38
  39     return float(sum(items)) / len(items)
  40
  41
  42 def stdev(items):
  43     """Calculate stdev from the items.
  44
  45     :param items: Stdev is calculated from these items.
  46     :type items: list
  47     :returns: Stdev.
  48     :rtype: float
  49     """
  50
  51     avg = mean(items)
  52     variance = [(x - avg) ** 2 for x in items]
  53     stddev = sqrt(mean(variance))
  54     return stddev
  55
  56
  57 def relative_change(nr1, nr2):
  58     """Compute relative change of two values.
  59
  60     :param nr1: The first number.
  61     :param nr2: The second number.
  62     :type nr1: float
  63     :type nr2: float
  64     :returns: Relative change of nr1.
  65     :rtype: float
  66     """
  67
  68     return float(((nr2 - nr1) / nr1) * 100)
  69
  70
  71 def remove_outliers(input_list, outlier_const=1.5, window=14):
  72     """Return list with outliers removed, using split_outliers.
  73
  74     :param input_list: Data from which the outliers will be removed.
  75     :param outlier_const: Outlier constant.
  76     :param window: How many preceding values to take into account.
  77     :type input_list: list of floats
  78     :type outlier_const: float
  79     :type window: int
  80     :returns: The input list without outliers.
  81     :rtype: list of floats
  82     """
  83
  84     data = np.array(input_list)
  85     upper_quartile = np.percentile(data, 75)
  86     lower_quartile = np.percentile(data, 25)
  87     iqr = (upper_quartile - lower_quartile) * outlier_const
  88     quartile_set = (lower_quartile - iqr, upper_quartile + iqr)
  89     result_lst = list()
  90     for y in input_list:
  91         if quartile_set[0] <= y <= quartile_set[1]:
  92             result_lst.append(y)
  93     return result_lst
  94
  95
  96 def split_outliers(input_series, outlier_const=1.5, window=14):
  97     """Go through the input data and generate two pandas series:
  98     - input data with outliers replaced by NAN
  99     - outliers.
 100     The function uses IQR to detect outliers.
 101
 102     :param input_series: Data to be examined for outliers.
 103     :param outlier_const: Outlier constant.
 104     :param window: How many preceding values to take into account.
 105     :type input_series: pandas.Series
 106     :type outlier_const: float
 107     :type window: int
 108     :returns: Input data with NAN outliers and Outliers.
 109     :rtype: (pandas.Series, pandas.Series)
 110     """
 111
 112     list_data = list(input_series.items())
 113     head_size = min(window, len(list_data))
 114     head_list = list_data[:head_size]
 115     trimmed_data = pd.Series()
 116     outliers = pd.Series()
 117     for item_x, item_y in head_list:
 118         item_pd = pd.Series([item_y, ], index=[item_x, ])
 119         trimmed_data = trimmed_data.append(item_pd)
 120     for index, (item_x, item_y) in list(enumerate(list_data))[head_size:]:
 121         y_rolling_list = [y for (x, y) in list_data[index - head_size:index]]
 122         y_rolling_array = np.array(y_rolling_list)
 123         q1 = np.percentile(y_rolling_array, 25)
 124         q3 = np.percentile(y_rolling_array, 75)
 125         iqr = (q3 - q1) * outlier_const
 126         low = q1 - iqr
 127         item_pd = pd.Series([item_y, ], index=[item_x, ])
 128         if low <= item_y:
 129             trimmed_data = trimmed_data.append(item_pd)
 130         else:
 131             outliers = outliers.append(item_pd)
 132             nan_pd = pd.Series([np.nan, ], index=[item_x, ])
 133             trimmed_data = trimmed_data.append(nan_pd)
 134
 135     return trimmed_data, outliers
 136
 137
 138 def get_files(path, extension=None, full_path=True):
 139     """Generates the list of files to process.
 140
 141     :param path: Path to files.
 142     :param extension: Extension of files to process. If it is the empty string,
 143         all files will be processed.
 144     :param full_path: If True, the files with full path are generated.
 145     :type path: str
 146     :type extension: str
 147     :type full_path: bool
 148     :returns: List of files to process.
 149     :rtype: list
 150     """
 151
 152     file_list = list()
 153     for root, _, files in walk(path):
 154         for filename in files:
 155             if extension:
 156                 if filename.endswith(extension):
 157                     if full_path:
 158                         file_list.append(join(root, filename))
 159                     else:
 160                         file_list.append(filename)
 161             else:
 162                 file_list.append(join(root, filename))
 163
 164     return file_list
 165
 166
 167 def get_rst_title_char(level):
 168     """Return character used for the given title level in rst files.
 169
 170     :param level: Level of the title.
 171     :type: int
 172     :returns: Character used for the given title level in rst files.
 173     :rtype: str
 174     """
 175     chars = ('=', '-', '`', "'", '.', '~', '*', '+', '^')
 176     if level < len(chars):
 177         return chars[level]
 178     else:
 179         return chars[-1]
 180
 181
 182 def execute_command(cmd):
 183     """Execute the command in a subprocess and log the stdout and stderr.
 184
 185     :param cmd: Command to execute.
 186     :type cmd: str
 187     :returns: Return code of the executed command.
 188     :rtype: int
 189     """
 190
 191     env = environ.copy()
 192     proc = subprocess.Popen(
 193         [cmd],
 194         stdout=subprocess.PIPE,
 195         stderr=subprocess.PIPE,
 196         shell=True,
 197         env=env)
 198
 199     stdout, stderr = proc.communicate()
 200
 201     logging.info(stdout)
 202     logging.info(stderr)
 203
 204     if proc.returncode != 0:
 205         logging.error("    Command execution failed.")
 206     return proc.returncode, stdout, stderr
 207
 208
 209 def get_last_successful_build_number(jenkins_url, job_name):
 210     """Get the number of the last successful build of the given job.
 211
 212     :param jenkins_url: Jenkins URL.
 213     :param job_name: Job name.
 214     :type jenkins_url: str
 215     :type job_name: str
 216     :returns: The build number as a string.
 217     :rtype: str
 218     """
 219
 220     url = "{}/{}/lastSuccessfulBuild/buildNumber".format(jenkins_url, job_name)
 221     cmd = "wget -qO- {url}".format(url=url)
 222
 223     return execute_command(cmd)
 224
 225
 226 def get_last_completed_build_number(jenkins_url, job_name):
 227     """Get the number of the last completed build of the given job.
 228
 229     :param jenkins_url: Jenkins URL.
 230     :param job_name: Job name.
 231     :type jenkins_url: str
 232     :type job_name: str
 233     :returns: The build number as a string.
 234     :rtype: str
 235     """
 236
 237     url = "{}/{}/lastCompletedBuild/buildNumber".format(jenkins_url, job_name)
 238     cmd = "wget -qO- {url}".format(url=url)
 239
 240     return execute_command(cmd)
 241
 242
 243 def archive_input_data(spec):
 244     """Archive the report.
 245
 246     :param spec: Specification read from the specification file.
 247     :type spec: Specification
 248     :raises PresentationError: If it is not possible to archive the input data.
 249     """
 250
 251     logging.info("    Archiving the input data files ...")
 252
 253     if spec.is_debug:
 254         extension = spec.debug["input-format"]
 255     else:
 256         extension = spec.input["file-format"]
 257     data_files = get_files(spec.environment["paths"]["DIR[WORKING,DATA]"],
 258                            extension=extension)
 259     dst = spec.environment["paths"]["DIR[STATIC,ARCH]"]
 260     logging.info("      Destination: {0}".format(dst))
 261
 262     try:
 263         if not isdir(dst):
 264             makedirs(dst)
 265
 266         for data_file in data_files:
 267             logging.info("      Moving the file: {0} ...".format(data_file))
 268             move(data_file, dst)
 269
 270     except (Error, OSError) as err:
 271         raise PresentationError("Not possible to archive the input data.",
 272                                 str(err))
 273
 274     logging.info("    Done.")