X-Git-Url: https://gerrit.fd.io/r/gitweb?a=blobdiff_plain;f=resources%2Ftools%2Fpresentation%2Futils.py;h=59dbfa634bb4532be404adfae669a77416bb02f9;hb=5646509aea6b43ef1efb282aad908289cc005b26;hp=2fbf70cadcf6c648bd47a1e31fbbef7616de145c;hpb=7cfdac0cf07e3a7d9a8b53b7621f8b7500fd1515;p=csit.git diff --git a/resources/tools/presentation/utils.py b/resources/tools/presentation/utils.py index 2fbf70cadc..59dbfa634b 100644 --- a/resources/tools/presentation/utils.py +++ b/resources/tools/presentation/utils.py @@ -1,4 +1,4 @@ -# Copyright (c) 2017 Cisco and/or its affiliates. +# Copyright (c) 2018 Cisco and/or its affiliates. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at: @@ -21,7 +21,7 @@ import logging from os import walk, makedirs, environ from os.path import join, isdir -from shutil import copy, Error +from shutil import move, Error from math import sqrt from errors import PresentationError @@ -81,15 +81,16 @@ def remove_outliers(input_list, outlier_const=1.5, window=14): :rtype: list of floats """ - input_series = pd.Series() - for index, value in enumerate(input_list): - item_pd = pd.Series([value, ], index=[index, ]) - input_series.append(item_pd) - output_series, _ = split_outliers(input_series, outlier_const=outlier_const, - window=window) - output_list = [y for x, y in output_series.items() if not np.isnan(y)] - - return output_list + data = np.array(input_list) + upper_quartile = np.percentile(data, 75) + lower_quartile = np.percentile(data, 25) + iqr = (upper_quartile - lower_quartile) * outlier_const + quartile_set = (lower_quartile - iqr, upper_quartile + iqr) + result_lst = list() + for y in input_list: + if quartile_set[0] <= y <= quartile_set[1]: + result_lst.append(y) + return result_lst def split_outliers(input_series, outlier_const=1.5, window=14): @@ -122,9 +123,9 @@ def split_outliers(input_series, outlier_const=1.5, window=14): q1 = np.percentile(y_rolling_array, 25) q3 = np.percentile(y_rolling_array, 75) iqr = (q3 - q1) * outlier_const - low, high = q1 - iqr, q3 + iqr + low = q1 - iqr item_pd = pd.Series([item_y, ], index=[item_x, ]) - if low <= item_y <= high: + if low <= item_y: trimmed_data = trimmed_data.append(item_pd) else: outliers = outliers.append(item_pd) @@ -139,7 +140,7 @@ def get_files(path, extension=None, full_path=True): :param path: Path to files. :param extension: Extension of files to process. If it is the empty string, - all files will be processed. + all files will be processed. :param full_path: If True, the files with full path are generated. :type path: str :type extension: str @@ -197,8 +198,8 @@ def execute_command(cmd): stdout, stderr = proc.communicate() - logging.info(stdout) - logging.info(stderr) + logging.debug(stdout) + logging.debug(stderr) if proc.returncode != 0: logging.error(" Command execution failed.") @@ -249,10 +250,7 @@ def archive_input_data(spec): logging.info(" Archiving the input data files ...") - if spec.is_debug: - extension = spec.debug["input-format"] - else: - extension = spec.input["file-format"] + extension = spec.input["file-format"] data_files = get_files(spec.environment["paths"]["DIR[WORKING,DATA]"], extension=extension) dst = spec.environment["paths"]["DIR[STATIC,ARCH]"] @@ -263,8 +261,8 @@ def archive_input_data(spec): makedirs(dst) for data_file in data_files: - logging.info(" Copying the file: {0} ...".format(data_file)) - copy(data_file, dst) + logging.info(" Moving the file: {0} ...".format(data_file)) + move(data_file, dst) except (Error, OSError) as err: raise PresentationError("Not possible to archive the input data.",