rls1804 report: updates to perf rls notes, methodology.

[csit.git] / resources / tools / presentation / utils.py
diff --git a/resources/tools/presentation/utils.py b/resources/tools/presentation/utils.py

index bc62268..df543c1 100644 (file)
--- a/resources/tools/presentation/utils.py
+++ b/resources/tools/presentation/utils.py
@@ -67,6 +67,7 @@ def relative_change(nr1, nr2):
  
      return float(((nr2 - nr1) / nr1) * 100)
  
+
  def remove_outliers(input_list, outlier_const=1.5, window=14):
      """Return list with outliers removed, using split_outliers.
  
@@ -80,15 +81,16 @@ def remove_outliers(input_list, outlier_const=1.5, window=14):
      :rtype: list of floats
      """
  
-    input_series = pd.Series()
-    for index, value in enumerate(input_list):
-        item_pd = pd.Series([value, ], index=[index, ])
-        input_series.append(item_pd)
-    output_series, _ = split_outliers(input_series, outlier_const=outlier_const,
-                                      window=window)
-    output_list = [y for x, y in output_series.items() if not np.isnan(y)]
-
-    return output_list
+    data = np.array(input_list)
+    upper_quartile = np.percentile(data, 75)
+    lower_quartile = np.percentile(data, 25)
+    iqr = (upper_quartile - lower_quartile) * outlier_const
+    quartile_set = (lower_quartile - iqr, upper_quartile + iqr)
+    result_lst = list()
+    for y in input_list:
+        if quartile_set[0] <= y <= quartile_set[1]:
+            result_lst.append(y)
+    return result_lst
  
  
  def split_outliers(input_series, outlier_const=1.5, window=14):
@@ -121,9 +123,9 @@ def split_outliers(input_series, outlier_const=1.5, window=14):
          q1 = np.percentile(y_rolling_array, 25)
          q3 = np.percentile(y_rolling_array, 75)
          iqr = (q3 - q1) * outlier_const
-        low, high = q1 - iqr, q3 + iqr
+        low = q1 - iqr
          item_pd = pd.Series([item_y, ], index=[item_x, ])
-        if low <= item_y <= high:
+        if low <= item_y:
              trimmed_data = trimmed_data.append(item_pd)
          else:
              outliers = outliers.append(item_pd)
@@ -138,7 +140,7 @@ def get_files(path, extension=None, full_path=True):
  
      :param path: Path to files.
      :param extension: Extension of files to process. If it is the empty string,
-    all files will be processed.
+        all files will be processed.
      :param full_path: If True, the files with full path are generated.
      :type path: str
      :type extension: str