resources/tools/presentation/utils.py

   1 # Copyright (c) 2017 Cisco and/or its affiliates.
   2 # Licensed under the Apache License, Version 2.0 (the "License");
   3 # you may not use this file except in compliance with the License.
   4 # You may obtain a copy of the License at:
   5 #
   6 #     http://www.apache.org/licenses/LICENSE-2.0
   7 #
   8 # Unless required by applicable law or agreed to in writing, software
   9 # distributed under the License is distributed on an "AS IS" BASIS,
  10 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  11 # See the License for the specific language governing permissions and
  12 # limitations under the License.
  13
  14 """General purpose utilities.
  15 """
  16
  17 import subprocess
  18 import numpy as np
  19 import pandas as pd
  20 import logging
  21
  22 from os import walk, makedirs, environ
  23 from os.path import join, isdir
  24 from shutil import copy, Error
  25 from math import sqrt
  26
  27 from errors import PresentationError
  28
  29
  30 def mean(items):
  31     """Calculate mean value from the items.
  32
  33     :param items: Mean value is calculated from these items.
  34     :type items: list
  35     :returns: MEan value.
  36     :rtype: float
  37     """
  38
  39     if len(items):
  40         return float(sum(items)) / len(items)
  41     else:
  42         return None
  43
  44
  45 def stdev(items):
  46     """Calculate stdev from the items.
  47
  48     :param items: Stdev is calculated from these items.
  49     :type items: list
  50     :returns: Stdev.
  51     :rtype: float
  52     """
  53
  54     avg = mean(items)
  55     variance = [(x - avg) ** 2 for x in items]
  56     stddev = sqrt(mean(variance))
  57     return stddev
  58
  59
  60 def relative_change(nr1, nr2):
  61     """Compute relative change of two values.
  62
  63     :param nr1: The first number.
  64     :param nr2: The second number.
  65     :type nr1: float
  66     :type nr2: float
  67     :returns: Relative change of nr1.
  68     :rtype: float
  69     """
  70
  71     return float(((nr2 - nr1) / nr1) * 100)
  72
  73
  74 def remove_outliers(input_list, outlier_const=1.5, window=14):
  75     """Return list with outliers removed, using split_outliers.
  76
  77     :param input_list: Data from which the outliers will be removed.
  78     :param outlier_const: Outlier constant.
  79     :param window: How many preceding values to take into account.
  80     :type input_list: list of floats
  81     :type outlier_const: float
  82     :type window: int
  83     :returns: The input list without outliers.
  84     :rtype: list of floats
  85     """
  86
  87     input_series = pd.Series()
  88     for index, value in enumerate(input_list):
  89         item_pd = pd.Series([value, ], index=[index, ])
  90         input_series.append(item_pd)
  91     output_series, _ = split_outliers(input_series, outlier_const=outlier_const,
  92                                       window=window)
  93     output_list = [y for x, y in output_series.items() if not np.isnan(y)]
  94
  95     return output_list
  96
  97
  98 def split_outliers(input_series, outlier_const=1.5, window=14):
  99     """Go through the input data and generate two pandas series:
 100     - input data with outliers replaced by NAN
 101     - outliers.
 102     The function uses IQR to detect outliers.
 103
 104     :param input_series: Data to be examined for outliers.
 105     :param outlier_const: Outlier constant.
 106     :param window: How many preceding values to take into account.
 107     :type input_series: pandas.Series
 108     :type outlier_const: float
 109     :type window: int
 110     :returns: Input data with NAN outliers and Outliers.
 111     :rtype: (pandas.Series, pandas.Series)
 112     """
 113
 114     list_data = list(input_series.items())
 115     head_size = min(window, len(list_data))
 116     head_list = list_data[:head_size]
 117     trimmed_data = pd.Series()
 118     outliers = pd.Series()
 119     for item_x, item_y in head_list:
 120         item_pd = pd.Series([item_y, ], index=[item_x, ])
 121         trimmed_data = trimmed_data.append(item_pd)
 122     for index, (item_x, item_y) in list(enumerate(list_data))[head_size:]:
 123         y_rolling_list = [y for (x, y) in list_data[index - head_size:index]]
 124         y_rolling_array = np.array(y_rolling_list)
 125         q1 = np.percentile(y_rolling_array, 25)
 126         q3 = np.percentile(y_rolling_array, 75)
 127         iqr = (q3 - q1) * outlier_const
 128         low, high = q1 - iqr, q3 + iqr
 129         item_pd = pd.Series([item_y, ], index=[item_x, ])
 130         if low <= item_y <= high:
 131             trimmed_data = trimmed_data.append(item_pd)
 132         else:
 133             outliers = outliers.append(item_pd)
 134             nan_pd = pd.Series([np.nan, ], index=[item_x, ])
 135             trimmed_data = trimmed_data.append(nan_pd)
 136
 137     return trimmed_data, outliers
 138
 139
 140 def get_files(path, extension=None, full_path=True):
 141     """Generates the list of files to process.
 142
 143     :param path: Path to files.
 144     :param extension: Extension of files to process. If it is the empty string,
 145     all files will be processed.
 146     :param full_path: If True, the files with full path are generated.
 147     :type path: str
 148     :type extension: str
 149     :type full_path: bool
 150     :returns: List of files to process.
 151     :rtype: list
 152     """
 153
 154     file_list = list()
 155     for root, _, files in walk(path):
 156         for filename in files:
 157             if extension:
 158                 if filename.endswith(extension):
 159                     if full_path:
 160                         file_list.append(join(root, filename))
 161                     else:
 162                         file_list.append(filename)
 163             else:
 164                 file_list.append(join(root, filename))
 165
 166     return file_list
 167
 168
 169 def get_rst_title_char(level):
 170     """Return character used for the given title level in rst files.
 171
 172     :param level: Level of the title.
 173     :type: int
 174     :returns: Character used for the given title level in rst files.
 175     :rtype: str
 176     """
 177     chars = ('=', '-', '`', "'", '.', '~', '*', '+', '^')
 178     if level < len(chars):
 179         return chars[level]
 180     else:
 181         return chars[-1]
 182
 183
 184 def execute_command(cmd):
 185     """Execute the command in a subprocess and log the stdout and stderr.
 186
 187     :param cmd: Command to execute.
 188     :type cmd: str
 189     :returns: Return code of the executed command.
 190     :rtype: int
 191     """
 192
 193     env = environ.copy()
 194     proc = subprocess.Popen(
 195         [cmd],
 196         stdout=subprocess.PIPE,
 197         stderr=subprocess.PIPE,
 198         shell=True,
 199         env=env)
 200
 201     stdout, stderr = proc.communicate()
 202
 203     logging.info(stdout)
 204     logging.info(stderr)
 205
 206     if proc.returncode != 0:
 207         logging.error("    Command execution failed.")
 208     return proc.returncode, stdout, stderr
 209
 210
 211 def get_last_successful_build_number(jenkins_url, job_name):
 212     """Get the number of the last successful build of the given job.
 213
 214     :param jenkins_url: Jenkins URL.
 215     :param job_name: Job name.
 216     :type jenkins_url: str
 217     :type job_name: str
 218     :returns: The build number as a string.
 219     :rtype: str
 220     """
 221
 222     url = "{}/{}/lastSuccessfulBuild/buildNumber".format(jenkins_url, job_name)
 223     cmd = "wget -qO- {url}".format(url=url)
 224
 225     return execute_command(cmd)
 226
 227
 228 def get_last_completed_build_number(jenkins_url, job_name):
 229     """Get the number of the last completed build of the given job.
 230
 231     :param jenkins_url: Jenkins URL.
 232     :param job_name: Job name.
 233     :type jenkins_url: str
 234     :type job_name: str
 235     :returns: The build number as a string.
 236     :rtype: str
 237     """
 238
 239     url = "{}/{}/lastCompletedBuild/buildNumber".format(jenkins_url, job_name)
 240     cmd = "wget -qO- {url}".format(url=url)
 241
 242     return execute_command(cmd)
 243
 244
 245 def archive_input_data(spec):
 246     """Archive the report.
 247
 248     :param spec: Specification read from the specification file.
 249     :type spec: Specification
 250     :raises PresentationError: If it is not possible to archive the input data.
 251     """
 252
 253     logging.info("    Archiving the input data files ...")
 254
 255     if spec.is_debug:
 256         extension = spec.debug["input-format"]
 257     else:
 258         extension = spec.input["file-format"]
 259     data_files = get_files(spec.environment["paths"]["DIR[WORKING,DATA]"],
 260                            extension=extension)
 261     dst = spec.environment["paths"]["DIR[STATIC,ARCH]"]
 262     logging.info("      Destination: {0}".format(dst))
 263
 264     try:
 265         if not isdir(dst):
 266             makedirs(dst)
 267
 268         for data_file in data_files:
 269             logging.info("      Copying the file: {0} ...".format(data_file))
 270             copy(data_file, dst)
 271
 272     except (Error, OSError) as err:
 273         raise PresentationError("Not possible to archive the input data.",
 274                                 str(err))
 275
 276     logging.info("    Done.")