From 942a580ecb880a0a8b886bc247b40ca43c96abb9 Mon Sep 17 00:00:00 2001 From: Vratko Polak Date: Thu, 13 Jul 2023 14:19:37 +0200 Subject: [PATCH] fix(jumpavg): penalize stdev also for size=2 This fix is mainly needed for bisection using PDR values. The impact on trending is smaller but still beneficial, as this fix should reduce the amount of false anomalies for two-band and other unstable tests. + Update metadata for 0.4.1 release into PyPI. Change-Id: Iabab4df50f4c4ad034362820904a237c507fa710 Signed-off-by: Vratko Polak --- PyPI/jumpavg/README.md | 2 ++ PyPI/jumpavg/pyproject.toml | 2 +- resources/libraries/python/jumpavg/bit_counting_stats.py | 11 +++++++---- 3 files changed, 10 insertions(+), 5 deletions(-) diff --git a/PyPI/jumpavg/README.md b/PyPI/jumpavg/README.md index e3cae0d924..e93e4dc13b 100644 --- a/PyPI/jumpavg/README.md +++ b/PyPI/jumpavg/README.md @@ -23,6 +23,8 @@ TODO. TODO: Move into a separate file? ++ 0.4.1: Fixed bug of not penalizing large stdev enough (at all for size 2 stats). + + 0.4.0: Added "unit" and "sbps" parameters so information content is reasonable even if sample values are below one. diff --git a/PyPI/jumpavg/pyproject.toml b/PyPI/jumpavg/pyproject.toml index 275482ecad..ee6b4cabed 100644 --- a/PyPI/jumpavg/pyproject.toml +++ b/PyPI/jumpavg/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "jumpavg" -version = "0.4.0" +version = "0.4.1" description = "Library for locating changes in time series by grouping results." authors = [ { name = "Cisco Systems Inc. and/or its affiliates", email = "csit-dev@lists.fd.io" }, diff --git a/resources/libraries/python/jumpavg/bit_counting_stats.py b/resources/libraries/python/jumpavg/bit_counting_stats.py index caece2c8ca..3d1cb8aef0 100644 --- a/resources/libraries/python/jumpavg/bit_counting_stats.py +++ b/resources/libraries/python/jumpavg/bit_counting_stats.py @@ -97,10 +97,13 @@ class BitCountingStats(AvgStdevStats): if self.size < 2: return stdev = self.stdev / self.unit - # Stdev is considered to be uniformly distributed - # from zero to max_value. That is quite a bad expectation, - # but resilient to negative samples etc. - self.bits += math.log(max_value + 1, 2) + # Stdev can be anything between zero and max value. + # For size==2, sphere surface is 2 points regardless of radius, + # we need to penalize large stdev already when encoding the stdev. + # The simplest way is to use the same distribution as with size... + self.bits += math.log((stdev + 1) * (stdev + 2), 2) + # .. just with added normalization from the max value cut-off. + self.bits += math.log(1 - 1 / (max_value + 2), 2) # Now we know the samples lie on sphere in size-1 dimensions. # So it is (size-2)-sphere, with radius^2 == stdev^2 * size. # https://en.wikipedia.org/wiki/N-sphere -- 2.16.6