+ return float(((nr2 - nr1) / nr1) * 100)
+
+
+def remove_outliers(input_list, outlier_const=1.5, window=14):
+ """Return list with outliers removed, using split_outliers.
+
+ :param input_list: Data from which the outliers will be removed.
+ :param outlier_const: Outlier constant.
+ :param window: How many preceding values to take into account.
+ :type input_list: list of floats
+ :type outlier_const: float
+ :type window: int
+ :returns: The input list without outliers.
+ :rtype: list of floats
+ """
+
+ data = np.array(input_list)
+ upper_quartile = np.percentile(data, 75)
+ lower_quartile = np.percentile(data, 25)
+ iqr = (upper_quartile - lower_quartile) * outlier_const
+ quartile_set = (lower_quartile - iqr, upper_quartile + iqr)
+ result_lst = list()
+ for y in data.tolist():
+ if quartile_set[0] <= y <= quartile_set[1]:
+ result_lst.append(y)
+ return result_lst
+
+
+def split_outliers(input_series, outlier_const=1.5, window=14):
+ """Go through the input data and generate two pandas series:
+ - input data with outliers replaced by NAN
+ - outliers.
+ The function uses IQR to detect outliers.
+
+ :param input_series: Data to be examined for outliers.
+ :param outlier_const: Outlier constant.
+ :param window: How many preceding values to take into account.
+ :type input_series: pandas.Series
+ :type outlier_const: float
+ :type window: int
+ :returns: Input data with NAN outliers and Outliers.
+ :rtype: (pandas.Series, pandas.Series)
+ """
+
+ list_data = list(input_series.items())
+ head_size = min(window, len(list_data))
+ head_list = list_data[:head_size]
+ trimmed_data = pd.Series()
+ outliers = pd.Series()
+ for item_x, item_y in head_list:
+ item_pd = pd.Series([item_y, ], index=[item_x, ])
+ trimmed_data = trimmed_data.append(item_pd)
+ for index, (item_x, item_y) in list(enumerate(list_data))[head_size:]:
+ y_rolling_list = [y for (x, y) in list_data[index - head_size:index]]
+ y_rolling_array = np.array(y_rolling_list)
+ q1 = np.percentile(y_rolling_array, 25)
+ q3 = np.percentile(y_rolling_array, 75)
+ iqr = (q3 - q1) * outlier_const
+ low, high = q1 - iqr, q3 + iqr
+ item_pd = pd.Series([item_y, ], index=[item_x, ])
+ if low <= item_y <= high:
+ trimmed_data = trimmed_data.append(item_pd)
+ else:
+ outliers = outliers.append(item_pd)
+ nan_pd = pd.Series([np.nan, ], index=[item_x, ])
+ trimmed_data = trimmed_data.append(nan_pd)
+
+ return trimmed_data, outliers