CSIT-1504: Soak tests - box plots

[csit.git] / resources / tools / presentation / utils.py
diff --git a/resources/tools/presentation/utils.py b/resources/tools/presentation/utils.py

index f32019d..c350fae 100644 (file)
--- a/resources/tools/presentation/utils.py
+++ b/resources/tools/presentation/utils.py
@@ -17,15 +17,18 @@
  import multiprocessing
  import subprocess
  import numpy as np
  import multiprocessing
  import subprocess
  import numpy as np
-import pandas as pd
  import logging
  import logging
+import csv
+import prettytable
  
  from os import walk, makedirs, environ
  from os.path import join, isdir
  from shutil import move, Error
  
  from os import walk, makedirs, environ
  from os.path import join, isdir
  from shutil import move, Error
-from math import sqrt
+from datetime import datetime
+from pandas import Series
  
  from errors import PresentationError
  
  from errors import PresentationError
+from jumpavg.BitCountingClassifier import BitCountingClassifier
  
  
  def mean(items):
  
  
  def mean(items):
@@ -48,11 +51,7 @@ def stdev(items):
      :returns: Stdev.
      :rtype: float
      """
      :returns: Stdev.
      :rtype: float
      """
-
-    avg = mean(items)
-    variance = [(x - avg) ** 2 for x in items]
-    stddev = sqrt(mean(variance))
-    return stddev
+    return Series.std(Series(items))
  
  
  def relative_change(nr1, nr2):
  
  
  def relative_change(nr1, nr2):
@@ -69,73 +68,6 @@ def relative_change(nr1, nr2):
      return float(((nr2 - nr1) / nr1) * 100)
  
  
      return float(((nr2 - nr1) / nr1) * 100)
  
  
-def remove_outliers(input_list, outlier_const=1.5, window=14):
-    """Return list with outliers removed, using split_outliers.
-
-    :param input_list: Data from which the outliers will be removed.
-    :param outlier_const: Outlier constant.
-    :param window: How many preceding values to take into account.
-    :type input_list: list of floats
-    :type outlier_const: float
-    :type window: int
-    :returns: The input list without outliers.
-    :rtype: list of floats
-    """
-
-    data = np.array(input_list)
-    upper_quartile = np.percentile(data, 75)
-    lower_quartile = np.percentile(data, 25)
-    iqr = (upper_quartile - lower_quartile) * outlier_const
-    quartile_set = (lower_quartile - iqr, upper_quartile + iqr)
-    result_lst = list()
-    for y in input_list:
-        if quartile_set[0] <= y <= quartile_set[1]:
-            result_lst.append(y)
-    return result_lst
-
-
-def split_outliers(input_series, outlier_const=1.5, window=14):
-    """Go through the input data and generate two pandas series:
-    - input data with outliers replaced by NAN
-    - outliers.
-    The function uses IQR to detect outliers.
-
-    :param input_series: Data to be examined for outliers.
-    :param outlier_const: Outlier constant.
-    :param window: How many preceding values to take into account.
-    :type input_series: pandas.Series
-    :type outlier_const: float
-    :type window: int
-    :returns: Input data with NAN outliers and Outliers.
-    :rtype: (pandas.Series, pandas.Series)
-    """
-
-    list_data = list(input_series.items())
-    head_size = min(window, len(list_data))
-    head_list = list_data[:head_size]
-    trimmed_data = pd.Series()
-    outliers = pd.Series()
-    for item_x, item_y in head_list:
-        item_pd = pd.Series([item_y, ], index=[item_x, ])
-        trimmed_data = trimmed_data.append(item_pd)
-    for index, (item_x, item_y) in list(enumerate(list_data))[head_size:]:
-        y_rolling_list = [y for (x, y) in list_data[index - head_size:index]]
-        y_rolling_array = np.array(y_rolling_list)
-        q1 = np.percentile(y_rolling_array, 25)
-        q3 = np.percentile(y_rolling_array, 75)
-        iqr = (q3 - q1) * outlier_const
-        low = q1 - iqr
-        item_pd = pd.Series([item_y, ], index=[item_x, ])
-        if low <= item_y:
-            trimmed_data = trimmed_data.append(item_pd)
-        else:
-            outliers = outliers.append(item_pd)
-            nan_pd = pd.Series([np.nan, ], index=[item_x, ])
-            trimmed_data = trimmed_data.append(nan_pd)
-
-    return trimmed_data, outliers
-
-
  def get_files(path, extension=None, full_path=True):
      """Generates the list of files to process.
  
  def get_files(path, extension=None, full_path=True):
      """Generates the list of files to process.
  
@@ -185,8 +117,8 @@ def execute_command(cmd):
  
      :param cmd: Command to execute.
      :type cmd: str
  
      :param cmd: Command to execute.
      :type cmd: str
-    :returns: Return code of the executed command.
-    :rtype: int
+    :returns: Return code of the executed command, stdout and stderr.
+    :rtype: tuple(int, str, str)
      """
  
      env = environ.copy()
      """
  
      env = environ.copy()
@@ -243,6 +175,29 @@ def get_last_completed_build_number(jenkins_url, job_name):
      return execute_command(cmd)
  
  
      return execute_command(cmd)
  
  
+def get_build_timestamp(jenkins_url, job_name, build_nr):
+    """Get the timestamp of the build of the given job.
+
+    :param jenkins_url: Jenkins URL.
+    :param job_name: Job name.
+    :param build_nr: Build number.
+    :type jenkins_url: str
+    :type job_name: str
+    :type build_nr: int
+    :returns: The timestamp.
+    :rtype: datetime.datetime
+    """
+
+    url = "{jenkins_url}/{job_name}/{build_nr}".format(jenkins_url=jenkins_url,
+                                                       job_name=job_name,
+                                                       build_nr=build_nr)
+    cmd = "wget -qO- {url}".format(url=url)
+
+    timestamp = execute_command(cmd)
+
+    return datetime.fromtimestamp(timestamp/1000)
+
+
  def archive_input_data(spec):
      """Archive the report.
  
  def archive_input_data(spec):
      """Archive the report.
  
@@ -274,6 +229,74 @@ def archive_input_data(spec):
      logging.info("    Done.")
  
  
      logging.info("    Done.")
  
  
+def classify_anomalies(data):
+    """Process the data and return anomalies and trending values.
+
+    Gather data into groups with average as trend value.
+    Decorate values within groups to be normal,
+    the first value of changed average as a regression, or a progression.
+
+    :param data: Full data set with unavailable samples replaced by nan.
+    :type data: OrderedDict
+    :returns: Classification and trend values
+    :rtype: 2-tuple, list of strings and list of floats
+    """
+    # Nan mean something went wrong.
+    # Use 0.0 to cause that being reported as a severe regression.
+    bare_data = [0.0 if np.isnan(sample.avg) else sample
+                 for _, sample in data.iteritems()]
+    # TODO: Put analogous iterator into jumpavg library.
+    groups = BitCountingClassifier().classify(bare_data)
+    groups.reverse()  # Just to use .pop() for FIFO.
+    classification = []
+    avgs = []
+    active_group = None
+    values_left = 0
+    avg = 0.0
+    for _, sample in data.iteritems():
+        if np.isnan(sample.avg):
+            classification.append("outlier")
+            avgs.append(sample.avg)
+            continue
+        if values_left < 1 or active_group is None:
+            values_left = 0
+            while values_left < 1:  # Ignore empty groups (should not happen).
+                active_group = groups.pop()
+                values_left = len(active_group.values)
+            avg = active_group.metadata.avg
+            classification.append(active_group.metadata.classification)
+            avgs.append(avg)
+            values_left -= 1
+            continue
+        classification.append("normal")
+        avgs.append(avg)
+        values_left -= 1
+    return classification, avgs
+
+
+def convert_csv_to_pretty_txt(csv_file, txt_file):
+    """Convert the given csv table to pretty text table.
+
+    :param csv_file: The path to the input csv file.
+    :param txt_file: The path to the output pretty text file.
+    :type csv_file: str
+    :type txt_file: str
+    """
+
+    txt_table = None
+    with open(csv_file, 'rb') as csv_file:
+        csv_content = csv.reader(csv_file, delimiter=',', quotechar='"')
+        for row in csv_content:
+            if txt_table is None:
+                txt_table = prettytable.PrettyTable(row)
+            else:
+                txt_table.add_row(row)
+        txt_table.align["Test case"] = "l"
+    if txt_table:
+        with open(txt_file, "w") as txt_file:
+            txt_file.write(str(txt_table))
+
+
  class Worker(multiprocessing.Process):
      """Worker class used to process tasks in separate parallel processes.
      """
  class Worker(multiprocessing.Process):
      """Worker class used to process tasks in separate parallel processes.
      """