From: Vratko Polak Date: Thu, 28 Jun 2018 11:07:14 +0000 (+0200) Subject: Fix jumpavg: No negative variance from rounding X-Git-Url: https://gerrit.fd.io/r/gitweb?p=csit.git;a=commitdiff_plain;h=a60792260765e15cebae10d2c54fb2c3aef75d77 Fix jumpavg: No negative variance from rounding The algorithm for computing stdev has been changed to a more stable one, based on Welford's algorithm. Change-Id: I51e02d9c5c26cda0d4e998381d5011aa793e6483 Signed-off-by: Vratko Polak --- diff --git a/PyPI/jumpavg/jumpavg/AvgStdevMetadataFactory.py b/PyPI/jumpavg/jumpavg/AvgStdevMetadataFactory.py index 6d2e967a88..25bc600aeb 100644 --- a/PyPI/jumpavg/jumpavg/AvgStdevMetadataFactory.py +++ b/PyPI/jumpavg/jumpavg/AvgStdevMetadataFactory.py @@ -30,22 +30,25 @@ class AvgStdevMetadataFactory(object): :returns: The metadata matching the values. :rtype: AvgStdevMetadata """ - sum_0 = 0 - sum_1 = 0.0 - sum_2 = 0.0 + # Using Welford method to be more resistant to rounding errors. + # Adapted from code for sample standard deviation at: + # https://www.johndcook.com/blog/standard_deviation/ + # The logic of plus operator is taken from + # https://www.johndcook.com/blog/skewness_kurtosis/ + size = 0 + avg = 0.0 + moment_2 = 0.0 for value in values: - if isinstance(value, AvgStdevMetadata): - sum_0 += value.size - sum_1 += value.avg * value.size - sum_2 += value.stdev * value.stdev * value.size - sum_2 += value.avg * value.avg * value.size - else: # The value is assumed to be float. - sum_0 += 1 - sum_1 += value - sum_2 += value * value - if sum_0 < 1: + if not isinstance(value, AvgStdevMetadata): + value = AvgStdevMetadata(size=1, avg=value) + old_size = size + delta = value.avg - avg + size += value.size + avg += delta * value.size / size + moment_2 += value.stdev * value.stdev * value.size + moment_2 += delta * delta * old_size * value.size / size + if size < 1: return AvgStdevMetadata() - avg = sum_1 / sum_0 - stdev = math.sqrt(sum_2 / sum_0 - avg * avg) - ret_obj = AvgStdevMetadata(size=sum_0, avg=avg, stdev=stdev) + stdev = math.sqrt(moment_2 / size) + ret_obj = AvgStdevMetadata(size=size, avg=avg, stdev=stdev) return ret_obj