Use Jumpavg 0.2.0 in PAL

[csit.git] / resources / tools / presentation / utils.py
diff --git a/resources/tools/presentation/utils.py b/resources/tools/presentation/utils.py

index 3fdec85..3bd5a71 100644 (file)
--- a/resources/tools/presentation/utils.py
+++ b/resources/tools/presentation/utils.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2018 Cisco and/or its affiliates.
+# Copyright (c) 2019 Cisco and/or its affiliates.
  # Licensed under the Apache License, Version 2.0 (the "License");
  # you may not use this file except in compliance with the License.
  # You may obtain a copy of the License at:
  # Licensed under the Apache License, Version 2.0 (the "License");
  # you may not use this file except in compliance with the License.
  # You may obtain a copy of the License at:
@@ -16,6 +16,7 @@
  
  import multiprocessing
  import subprocess
  
  import multiprocessing
  import subprocess
+import math
  import numpy as np
  import logging
  import csv
  import numpy as np
  import logging
  import csv
@@ -24,11 +25,11 @@ import prettytable
  from os import walk, makedirs, environ
  from os.path import join, isdir
  from shutil import move, Error
  from os import walk, makedirs, environ
  from os.path import join, isdir
  from shutil import move, Error
-from math import sqrt
  from datetime import datetime
  from datetime import datetime
+from pandas import Series
  
  
+from resources.libraries.python import jumpavg
  from errors import PresentationError
  from errors import PresentationError
-from jumpavg.BitCountingClassifier import BitCountingClassifier
  
  
  def mean(items):
  
  
  def mean(items):
@@ -51,11 +52,7 @@ def stdev(items):
      :returns: Stdev.
      :rtype: float
      """
      :returns: Stdev.
      :rtype: float
      """
-
-    avg = mean(items)
-    variance = [(x - avg) ** 2 for x in items]
-    stddev = sqrt(mean(variance))
-    return stddev
+    return Series.std(Series(items))
  
  
  def relative_change(nr1, nr2):
  
  
  def relative_change(nr1, nr2):
@@ -72,6 +69,32 @@ def relative_change(nr1, nr2):
      return float(((nr2 - nr1) / nr1) * 100)
  
  
      return float(((nr2 - nr1) / nr1) * 100)
  
  
+def relative_change_stdev(mean1, mean2, std1, std2):
+    """Compute relative standard deviation of change of two values.
+
+    The "1" values are the base for comparison.
+    Results are returned as percentage (and percentual points for stdev).
+    Linearized theory is used, so results are wrong for relatively large stdev.
+
+    :param mean1: Mean of the first number.
+    :param mean2: Mean of the second number.
+    :param std1: Standard deviation estimate of the first number.
+    :param std2: Standard deviation estimate of the second number.
+    :type mean1: float
+    :type mean2: float
+    :type std1: float
+    :type std2: float
+    :returns: Relative change and its stdev.
+    :rtype: float
+    """
+    mean1, mean2 = float(mean1), float(mean2)
+    quotient = mean2 / mean1
+    first = std1 / mean1
+    second = std2 / mean2
+    std = quotient * math.sqrt(first * first + second * second)
+    return (quotient - 1) * 100, std * 100
+
+
  def get_files(path, extension=None, full_path=True):
      """Generates the list of files to process.
  
  def get_files(path, extension=None, full_path=True):
      """Generates the list of files to process.
  
@@ -212,9 +235,11 @@ def archive_input_data(spec):
  
      logging.info("    Archiving the input data files ...")
  
  
      logging.info("    Archiving the input data files ...")
  
-    extension = spec.input["file-format"]
-    data_files = get_files(spec.environment["paths"]["DIR[WORKING,DATA]"],
-                           extension=extension)
+    extension = spec.input["arch-file-format"]
+    data_files = list()
+    for ext in extension:
+        data_files.extend(get_files(
+            spec.environment["paths"]["DIR[WORKING,DATA]"], extension=ext))
      dst = spec.environment["paths"]["DIR[STATIC,ARCH]"]
      logging.info("      Destination: {0}".format(dst))
  
      dst = spec.environment["paths"]["DIR[STATIC,ARCH]"]
      logging.info("      Destination: {0}".format(dst))
  
@@ -245,30 +270,30 @@ def classify_anomalies(data):
      :returns: Classification and trend values
      :rtype: 2-tuple, list of strings and list of floats
      """
      :returns: Classification and trend values
      :rtype: 2-tuple, list of strings and list of floats
      """
-    # Nan mean something went wrong.
+    # Nan means something went wrong.
      # Use 0.0 to cause that being reported as a severe regression.
      # Use 0.0 to cause that being reported as a severe regression.
-    bare_data = [0.0 if np.isnan(sample.avg) else sample
-                 for _, sample in data.iteritems()]
-    # TODO: Put analogous iterator into jumpavg library.
-    groups = BitCountingClassifier().classify(bare_data)
-    groups.reverse()  # Just to use .pop() for FIFO.
+    bare_data = [0.0 if np.isnan(sample) else sample
+                 for sample in data.itervalues()]
+    # TODO: Make BitCountingGroupList a subclass of list again?
+    group_list = jumpavg.classify(bare_data).group_list
+    group_list.reverse()  # Just to use .pop() for FIFO.
      classification = []
      avgs = []
      active_group = None
      values_left = 0
      avg = 0.0
      classification = []
      avgs = []
      active_group = None
      values_left = 0
      avg = 0.0
-    for _, sample in data.iteritems():
-        if np.isnan(sample.avg):
+    for sample in data.itervalues():
+        if np.isnan(sample):
              classification.append("outlier")
              classification.append("outlier")
-            avgs.append(sample.avg)
+            avgs.append(sample)
              continue
          if values_left < 1 or active_group is None:
              values_left = 0
              while values_left < 1:  # Ignore empty groups (should not happen).
              continue
          if values_left < 1 or active_group is None:
              values_left = 0
              while values_left < 1:  # Ignore empty groups (should not happen).
-                active_group = groups.pop()
-                values_left = len(active_group.values)
-            avg = active_group.metadata.avg
-            classification.append(active_group.metadata.classification)
+                active_group = group_list.pop()
+                values_left = len(active_group.run_list)
+            avg = active_group.stats.avg
+            classification.append(active_group.comment)
              avgs.append(avg)
              values_left -= 1
              continue
              avgs.append(avg)
              values_left -= 1
              continue