From: Vratko Polak Date: Thu, 13 Jul 2023 12:19:37 +0000 (+0200) Subject: fix(jumpavg): penalize stdev also for size=2 X-Git-Url: https://gerrit.fd.io/r/gitweb?a=commitdiff_plain;ds=sidebyside;h=942a580ecb880a0a8b886bc247b40ca43c96abb9;p=csit.git fix(jumpavg): penalize stdev also for size=2 This fix is mainly needed for bisection using PDR values. The impact on trending is smaller but still beneficial, as this fix should reduce the amount of false anomalies for two-band and other unstable tests. + Update metadata for 0.4.1 release into PyPI. Change-Id: Iabab4df50f4c4ad034362820904a237c507fa710 Signed-off-by: Vratko Polak --- diff --git a/PyPI/jumpavg/README.md b/PyPI/jumpavg/README.md index e3cae0d924..e93e4dc13b 100644 --- a/PyPI/jumpavg/README.md +++ b/PyPI/jumpavg/README.md @@ -23,6 +23,8 @@ TODO. TODO: Move into a separate file? ++ 0.4.1: Fixed bug of not penalizing large stdev enough (at all for size 2 stats). + + 0.4.0: Added "unit" and "sbps" parameters so information content is reasonable even if sample values are below one. diff --git a/PyPI/jumpavg/pyproject.toml b/PyPI/jumpavg/pyproject.toml index 275482ecad..ee6b4cabed 100644 --- a/PyPI/jumpavg/pyproject.toml +++ b/PyPI/jumpavg/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "jumpavg" -version = "0.4.0" +version = "0.4.1" description = "Library for locating changes in time series by grouping results." authors = [ { name = "Cisco Systems Inc. and/or its affiliates", email = "csit-dev@lists.fd.io" }, diff --git a/resources/libraries/python/jumpavg/bit_counting_stats.py b/resources/libraries/python/jumpavg/bit_counting_stats.py index caece2c8ca..3d1cb8aef0 100644 --- a/resources/libraries/python/jumpavg/bit_counting_stats.py +++ b/resources/libraries/python/jumpavg/bit_counting_stats.py @@ -97,10 +97,13 @@ class BitCountingStats(AvgStdevStats): if self.size < 2: return stdev = self.stdev / self.unit - # Stdev is considered to be uniformly distributed - # from zero to max_value. That is quite a bad expectation, - # but resilient to negative samples etc. - self.bits += math.log(max_value + 1, 2) + # Stdev can be anything between zero and max value. + # For size==2, sphere surface is 2 points regardless of radius, + # we need to penalize large stdev already when encoding the stdev. + # The simplest way is to use the same distribution as with size... + self.bits += math.log((stdev + 1) * (stdev + 2), 2) + # .. just with added normalization from the max value cut-off. + self.bits += math.log(1 - 1 / (max_value + 2), 2) # Now we know the samples lie on sphere in size-1 dimensions. # So it is (size-2)-sphere, with radius^2 == stdev^2 * size. # https://en.wikipedia.org/wiki/N-sphere