resources/tools/presentation/utils.py

   1 # Copyright (c) 2017 Cisco and/or its affiliates.
   2 # Licensed under the Apache License, Version 2.0 (the "License");
   3 # you may not use this file except in compliance with the License.
   4 # You may obtain a copy of the License at:
   5 #
   6 #     http://www.apache.org/licenses/LICENSE-2.0
   7 #
   8 # Unless required by applicable law or agreed to in writing, software
   9 # distributed under the License is distributed on an "AS IS" BASIS,
  10 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  11 # See the License for the specific language governing permissions and
  12 # limitations under the License.
  13
  14 """General purpose utilities.
  15 """
  16
  17 import subprocess
  18 import numpy as np
  19 import pandas as pd
  20 import logging
  21
  22 from os import walk, makedirs, environ
  23 from os.path import join, isdir
  24 from shutil import copy, Error
  25 from math import sqrt
  26
  27 from errors import PresentationError
  28
  29
  30 def mean(items):
  31     """Calculate mean value from the items.
  32
  33     :param items: Mean value is calculated from these items.
  34     :type items: list
  35     :returns: MEan value.
  36     :rtype: float
  37     """
  38
  39     return float(sum(items)) / len(items)
  40
  41
  42 def stdev(items):
  43     """Calculate stdev from the items.
  44
  45     :param items: Stdev is calculated from these items.
  46     :type items: list
  47     :returns: Stdev.
  48     :rtype: float
  49     """
  50
  51     avg = mean(items)
  52     variance = [(x - avg) ** 2 for x in items]
  53     stddev = sqrt(mean(variance))
  54     return stddev
  55
  56
  57 def relative_change(nr1, nr2):
  58     """Compute relative change of two values.
  59
  60     :param nr1: The first number.
  61     :param nr2: The second number.
  62     :type nr1: float
  63     :type nr2: float
  64     :returns: Relative change of nr1.
  65     :rtype: float
  66     """
  67
  68     return float(((nr2 - nr1) / nr1) * 100)
  69
  70 def remove_outliers(input_list, outlier_const=1.5, window=14):
  71     """Return list with outliers removed, using split_outliers.
  72
  73     :param input_list: Data from which the outliers will be removed.
  74     :param outlier_const: Outlier constant.
  75     :param window: How many preceding values to take into account.
  76     :type input_list: list of floats
  77     :type outlier_const: float
  78     :type window: int
  79     :returns: The input list without outliers.
  80     :rtype: list of floats
  81     """
  82
  83     input_series = pd.Series()
  84     for index, value in enumerate(input_list):
  85         item_pd = pd.Series([value, ], index=[index, ])
  86         input_series.append(item_pd)
  87     output_series, _ = split_outliers(input_series, outlier_const=outlier_const,
  88                                       window=window)
  89     output_list = [y for x, y in output_series.items() if not np.isnan(y)]
  90
  91     return output_list
  92
  93
  94 def split_outliers(input_series, outlier_const=1.5, window=14):
  95     """Go through the input data and generate two pandas series:
  96     - input data with outliers replaced by NAN
  97     - outliers.
  98     The function uses IQR to detect outliers.
  99
 100     :param input_series: Data to be examined for outliers.
 101     :param outlier_const: Outlier constant.
 102     :param window: How many preceding values to take into account.
 103     :type input_series: pandas.Series
 104     :type outlier_const: float
 105     :type window: int
 106     :returns: Input data with NAN outliers and Outliers.
 107     :rtype: (pandas.Series, pandas.Series)
 108     """
 109
 110     list_data = list(input_series.items())
 111     head_size = min(window, len(list_data))
 112     head_list = list_data[:head_size]
 113     trimmed_data = pd.Series()
 114     outliers = pd.Series()
 115     for item_x, item_y in head_list:
 116         item_pd = pd.Series([item_y, ], index=[item_x, ])
 117         trimmed_data = trimmed_data.append(item_pd)
 118     for index, (item_x, item_y) in list(enumerate(list_data))[head_size:]:
 119         y_rolling_list = [y for (x, y) in list_data[index - head_size:index]]
 120         y_rolling_array = np.array(y_rolling_list)
 121         q1 = np.percentile(y_rolling_array, 25)
 122         q3 = np.percentile(y_rolling_array, 75)
 123         iqr = (q3 - q1) * outlier_const
 124         low, high = q1 - iqr, q3 + iqr
 125         item_pd = pd.Series([item_y, ], index=[item_x, ])
 126         if low <= item_y <= high:
 127             trimmed_data = trimmed_data.append(item_pd)
 128         else:
 129             outliers = outliers.append(item_pd)
 130             nan_pd = pd.Series([np.nan, ], index=[item_x, ])
 131             trimmed_data = trimmed_data.append(nan_pd)
 132
 133     return trimmed_data, outliers
 134
 135
 136 def get_files(path, extension=None, full_path=True):
 137     """Generates the list of files to process.
 138
 139     :param path: Path to files.
 140     :param extension: Extension of files to process. If it is the empty string,
 141     all files will be processed.
 142     :param full_path: If True, the files with full path are generated.
 143     :type path: str
 144     :type extension: str
 145     :type full_path: bool
 146     :returns: List of files to process.
 147     :rtype: list
 148     """
 149
 150     file_list = list()
 151     for root, _, files in walk(path):
 152         for filename in files:
 153             if extension:
 154                 if filename.endswith(extension):
 155                     if full_path:
 156                         file_list.append(join(root, filename))
 157                     else:
 158                         file_list.append(filename)
 159             else:
 160                 file_list.append(join(root, filename))
 161
 162     return file_list
 163
 164
 165 def get_rst_title_char(level):
 166     """Return character used for the given title level in rst files.
 167
 168     :param level: Level of the title.
 169     :type: int
 170     :returns: Character used for the given title level in rst files.
 171     :rtype: str
 172     """
 173     chars = ('=', '-', '`', "'", '.', '~', '*', '+', '^')
 174     if level < len(chars):
 175         return chars[level]
 176     else:
 177         return chars[-1]
 178
 179
 180 def execute_command(cmd):
 181     """Execute the command in a subprocess and log the stdout and stderr.
 182
 183     :param cmd: Command to execute.
 184     :type cmd: str
 185     :returns: Return code of the executed command.
 186     :rtype: int
 187     """
 188
 189     env = environ.copy()
 190     proc = subprocess.Popen(
 191         [cmd],
 192         stdout=subprocess.PIPE,
 193         stderr=subprocess.PIPE,
 194         shell=True,
 195         env=env)
 196
 197     stdout, stderr = proc.communicate()
 198
 199     logging.info(stdout)
 200     logging.info(stderr)
 201
 202     if proc.returncode != 0:
 203         logging.error("    Command execution failed.")
 204     return proc.returncode, stdout, stderr
 205
 206
 207 def get_last_successful_build_number(jenkins_url, job_name):
 208     """Get the number of the last successful build of the given job.
 209
 210     :param jenkins_url: Jenkins URL.
 211     :param job_name: Job name.
 212     :type jenkins_url: str
 213     :type job_name: str
 214     :returns: The build number as a string.
 215     :rtype: str
 216     """
 217
 218     url = "{}/{}/lastSuccessfulBuild/buildNumber".format(jenkins_url, job_name)
 219     cmd = "wget -qO- {url}".format(url=url)
 220
 221     return execute_command(cmd)
 222
 223
 224 def get_last_completed_build_number(jenkins_url, job_name):
 225     """Get the number of the last completed build of the given job.
 226
 227     :param jenkins_url: Jenkins URL.
 228     :param job_name: Job name.
 229     :type jenkins_url: str
 230     :type job_name: str
 231     :returns: The build number as a string.
 232     :rtype: str
 233     """
 234
 235     url = "{}/{}/lastCompletedBuild/buildNumber".format(jenkins_url, job_name)
 236     cmd = "wget -qO- {url}".format(url=url)
 237
 238     return execute_command(cmd)
 239
 240
 241 def archive_input_data(spec):
 242     """Archive the report.
 243
 244     :param spec: Specification read from the specification file.
 245     :type spec: Specification
 246     :raises PresentationError: If it is not possible to archive the input data.
 247     """
 248
 249     logging.info("    Archiving the input data files ...")
 250
 251     if spec.is_debug:
 252         extension = spec.debug["input-format"]
 253     else:
 254         extension = spec.input["file-format"]
 255     data_files = get_files(spec.environment["paths"]["DIR[WORKING,DATA]"],
 256                            extension=extension)
 257     dst = spec.environment["paths"]["DIR[STATIC,ARCH]"]
 258     logging.info("      Destination: {0}".format(dst))
 259
 260     try:
 261         if not isdir(dst):
 262             makedirs(dst)
 263
 264         for data_file in data_files:
 265             logging.info("      Copying the file: {0} ...".format(data_file))
 266             copy(data_file, dst)
 267
 268     except (Error, OSError) as err:
 269         raise PresentationError("Not possible to archive the input data.",
 270                                 str(err))
 271
 272     logging.info("    Done.")