Fix jumpavg: No negative variance from rounding 76/13276/2
authorVratko Polak <vrpolak@cisco.com>
Thu, 28 Jun 2018 11:07:14 +0000 (13:07 +0200)
committerTibor Frank <tifrank@cisco.com>
Thu, 28 Jun 2018 12:07:43 +0000 (12:07 +0000)
The algorithm for computing stdev has been changed
to a more stable one, based on Welford's algorithm.

Change-Id: I51e02d9c5c26cda0d4e998381d5011aa793e6483
Signed-off-by: Vratko Polak <vrpolak@cisco.com>
PyPI/jumpavg/jumpavg/AvgStdevMetadataFactory.py

index 6d2e967..25bc600 100644 (file)
@@ -30,22 +30,25 @@ class AvgStdevMetadataFactory(object):
         :returns: The metadata matching the values.
         :rtype: AvgStdevMetadata
         """
-        sum_0 = 0
-        sum_1 = 0.0
-        sum_2 = 0.0
+        # Using Welford method to be more resistant to rounding errors.
+        # Adapted from code for sample standard deviation at:
+        # https://www.johndcook.com/blog/standard_deviation/
+        # The logic of plus operator is taken from
+        # https://www.johndcook.com/blog/skewness_kurtosis/
+        size = 0
+        avg = 0.0
+        moment_2 = 0.0
         for value in values:
-            if isinstance(value, AvgStdevMetadata):
-                sum_0 += value.size
-                sum_1 += value.avg * value.size
-                sum_2 += value.stdev * value.stdev * value.size
-                sum_2 += value.avg * value.avg * value.size
-            else:  # The value is assumed to be float.
-                sum_0 += 1
-                sum_1 += value
-                sum_2 += value * value
-        if sum_0 < 1:
+            if not isinstance(value, AvgStdevMetadata):
+                value = AvgStdevMetadata(size=1, avg=value)
+            old_size = size
+            delta = value.avg - avg
+            size += value.size
+            avg += delta * value.size / size
+            moment_2 += value.stdev * value.stdev * value.size
+            moment_2 += delta * delta * old_size * value.size / size
+        if size < 1:
             return AvgStdevMetadata()
-        avg = sum_1 / sum_0
-        stdev = math.sqrt(sum_2 / sum_0 - avg * avg)
-        ret_obj = AvgStdevMetadata(size=sum_0, avg=avg, stdev=stdev)
+        stdev = math.sqrt(moment_2 / size)
+        ret_obj = AvgStdevMetadata(size=size, avg=avg, stdev=stdev)
         return ret_obj