From 079c390e0903a98182781ff5c2af2bba9902b4ed Mon Sep 17 00:00:00 2001 From: Vratko Polak Date: Fri, 2 Jun 2023 14:44:47 +0200 Subject: [PATCH] feat(jumpavg): support small values via unit param Previously, Jumpavg was known to give wrong results when the data contains values of order one or smaller. This change introduces a new "unit" parameter, which changes how the information content is calculated. For example if the data values are mutiplies of 0.01, the unit parameter should be set to 0.01 to compensate. For callers not knowing their correct unit value, another parameter is introduced, called "sbps" (meaning Significant Bits Per Sample). A binary integer number with this many ones is how much units should the maximal sample be. This way jumpavg computes the corresponding "unit" value to use. If neither "unit" nor "sbps" are given, the "sbps" value of 12 is applied. + Rename files to conform to snake_style naming. + Fix normalization for the "opposite triangle" distribution. + Simplify logic, all groups now start as "normal", not "unknown". + Minor style improvements as suggested by pylint. + From user perspective, this change should be backward compatible. - The normalization fix is a behavior change, but it is a bugfix and the new behavior should be better. Change-Id: I5a5ca11757f087fff13faf1d0b8e34a741400258 Signed-off-by: Vratko Polak --- resources/libraries/python/jumpavg/__init__.py | 10 ++--- .../{AvgStdevStats.py => avg_stdev_stats.py} | 2 +- .../{BitCountingGroup.py => bit_counting_group.py} | 20 ++++++---- ...tingGroupList.py => bit_counting_group_list.py} | 18 ++++++--- .../{BitCountingStats.py => bit_counting_stats.py} | 38 +++++++++++-------- resources/libraries/python/jumpavg/classify.py | 44 ++++++++++++++++------ resources/libraries/python/model/ExportJson.py | 2 +- 7 files changed, 88 insertions(+), 46 deletions(-) rename resources/libraries/python/jumpavg/{AvgStdevStats.py => avg_stdev_stats.py} (98%) rename resources/libraries/python/jumpavg/{BitCountingGroup.py => bit_counting_group.py} (90%) rename resources/libraries/python/jumpavg/{BitCountingGroupList.py => bit_counting_group_list.py} (93%) rename resources/libraries/python/jumpavg/{BitCountingStats.py => bit_counting_stats.py} (85%) diff --git a/resources/libraries/python/jumpavg/__init__.py b/resources/libraries/python/jumpavg/__init__.py index 4fa696c538..7f63b5ee39 100644 --- a/resources/libraries/python/jumpavg/__init__.py +++ b/resources/libraries/python/jumpavg/__init__.py @@ -1,4 +1,4 @@ -# Copyright (c) 2021 Cisco and/or its affiliates. +# Copyright (c) 2023 Cisco and/or its affiliates. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at: @@ -15,8 +15,8 @@ __init__ file for "jumpavg" Python package. """ -from .AvgStdevStats import AvgStdevStats -from .BitCountingStats import BitCountingStats -from .BitCountingGroup import BitCountingGroup -from .BitCountingGroupList import BitCountingGroupList +from .avg_stdev_stats import AvgStdevStats +from .bit_counting_stats import BitCountingStats +from .bit_counting_group import BitCountingGroup +from .bit_counting_group_list import BitCountingGroupList from .classify import classify diff --git a/resources/libraries/python/jumpavg/AvgStdevStats.py b/resources/libraries/python/jumpavg/avg_stdev_stats.py similarity index 98% rename from resources/libraries/python/jumpavg/AvgStdevStats.py rename to resources/libraries/python/jumpavg/avg_stdev_stats.py index d40b316bf1..3d6a834919 100644 --- a/resources/libraries/python/jumpavg/AvgStdevStats.py +++ b/resources/libraries/python/jumpavg/avg_stdev_stats.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022 Cisco and/or its affiliates. +# Copyright (c) 2023 Cisco and/or its affiliates. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at: diff --git a/resources/libraries/python/jumpavg/BitCountingGroup.py b/resources/libraries/python/jumpavg/bit_counting_group.py similarity index 90% rename from resources/libraries/python/jumpavg/BitCountingGroup.py rename to resources/libraries/python/jumpavg/bit_counting_group.py index 48bea086f4..22c9337532 100644 --- a/resources/libraries/python/jumpavg/BitCountingGroup.py +++ b/resources/libraries/python/jumpavg/bit_counting_group.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022 Cisco and/or its affiliates. +# Copyright (c) 2023 Cisco and/or its affiliates. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at: @@ -17,8 +17,8 @@ import collections import dataclasses import typing -from .AvgStdevStats import AvgStdevStats -from .BitCountingStats import BitCountingStats +from .avg_stdev_stats import AvgStdevStats +from .bit_counting_stats import BitCountingStats @dataclasses.dataclass @@ -46,7 +46,9 @@ class BitCountingGroup(collections.abc.Sequence): so the caller should clone it to avoid unexpected muations.""" max_value: float """Maximal sample value to expect.""" - comment: str = "unknown" + unit: float = 1.0 + """Typical resolution of the values""" + comment: str = "normal" """Any string giving more info, e.g. "regression".""" prev_avg: typing.Optional[float] = None """Average of the previous group, if any.""" @@ -64,7 +66,7 @@ class BitCountingGroup(collections.abc.Sequence): e.g. whether the stats and bits values reflect the runs. """ if self.stats is None: - self.stats = AvgStdevStats.for_runs(self.run_list) + self.stats = AvgStdevStats.for_runs(runs=self.run_list) @property def bits(self) -> float: @@ -76,8 +78,11 @@ class BitCountingGroup(collections.abc.Sequence): :rtype: float """ if self.cached_bits is None: - self.cached_bits = BitCountingStats.for_runs( - [self.stats], self.max_value, self.prev_avg + self.cached_bits = BitCountingStats.for_runs_and_params( + runs=[self.stats], + max_value=self.max_value, + unit=self.unit, + prev_avg=self.prev_avg, ).bits return self.cached_bits @@ -115,6 +120,7 @@ class BitCountingGroup(collections.abc.Sequence): stats=stats, cached_bits=self.cached_bits, max_value=self.max_value, + unit=self.unit, prev_avg=self.prev_avg, comment=self.comment, ) diff --git a/resources/libraries/python/jumpavg/BitCountingGroupList.py b/resources/libraries/python/jumpavg/bit_counting_group_list.py similarity index 93% rename from resources/libraries/python/jumpavg/BitCountingGroupList.py rename to resources/libraries/python/jumpavg/bit_counting_group_list.py index 468e79b236..e4d33b53a2 100644 --- a/resources/libraries/python/jumpavg/BitCountingGroupList.py +++ b/resources/libraries/python/jumpavg/bit_counting_group_list.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022 Cisco and/or its affiliates. +# Copyright (c) 2023 Cisco and/or its affiliates. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at: @@ -17,8 +17,8 @@ import collections import dataclasses import typing -from .AvgStdevStats import AvgStdevStats # Just for type hints. -from .BitCountingGroup import BitCountingGroup +from .avg_stdev_stats import AvgStdevStats # Just for type hints. +from .bit_counting_group import BitCountingGroup @dataclasses.dataclass @@ -46,6 +46,8 @@ class BitCountingGroupList(collections.abc.Sequence): max_value: float """Maximal sample value to base bits computation on.""" + unit: float = 1.0 + """Typical resolution of the values.""" group_list: typing.List[BitCountingGroup] = None """List of groups to compose this group list. Init also accepts None standing for an empty list. @@ -62,7 +64,7 @@ class BitCountingGroupList(collections.abc.Sequence): e.g. whether the cached bits values (and bits_except_last) make sense. """ if self.group_list is None: - self.group_list = list() + self.group_list = [] def __getitem__(self, index: int) -> BitCountingGroup: """Return the group at the index. @@ -90,6 +92,7 @@ class BitCountingGroupList(collections.abc.Sequence): """ return self.__class__( max_value=self.max_value, + unit=self.unit, group_list=[group.copy() for group in self.group_list], bits_except_last=self.bits_except_last, ) @@ -114,6 +117,7 @@ class BitCountingGroupList(collections.abc.Sequence): # for users with many samples. return self.__class__( max_value=self.max_value, + unit=self.unit, group_list=group_list, bits_except_last=self.bits_except_last, ) @@ -152,11 +156,15 @@ class BitCountingGroupList(collections.abc.Sequence): # It is faster to avoid stats recalculation. new_group = runs.copy() new_group.max_value = self.max_value + # Unit is common. new_group.prev_avg = prev_avg new_group.cached_bits = None else: new_group = BitCountingGroup( - run_list=runs, max_value=self.max_value, prev_avg=prev_avg + run_list=runs, + max_value=self.max_value, + unit=self.unit, + prev_avg=prev_avg, ) self.bits_except_last = self.bits self.group_list.append(new_group) diff --git a/resources/libraries/python/jumpavg/BitCountingStats.py b/resources/libraries/python/jumpavg/bit_counting_stats.py similarity index 85% rename from resources/libraries/python/jumpavg/BitCountingStats.py rename to resources/libraries/python/jumpavg/bit_counting_stats.py index 524ac952c8..caece2c8ca 100644 --- a/resources/libraries/python/jumpavg/BitCountingStats.py +++ b/resources/libraries/python/jumpavg/bit_counting_stats.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022 Cisco and/or its affiliates. +# Copyright (c) 2023 Cisco and/or its affiliates. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at: @@ -17,7 +17,7 @@ import dataclasses import math import typing -from .AvgStdevStats import AvgStdevStats +from .avg_stdev_stats import AvgStdevStats @dataclasses.dataclass @@ -40,6 +40,8 @@ class BitCountingStats(AvgStdevStats): """Maximal sample value (real or estimated). Default value is there just for argument ordering reasons, leaving None leads to exceptions.""" + unit: float = 1.0 + """Typical resolution of the values.""" prev_avg: typing.Optional[float] = None """Population average of the previous group (if any).""" bits: float = None @@ -74,6 +76,8 @@ class BitCountingStats(AvgStdevStats): return if self.max_value <= 0.0: raise ValueError(f"Invalid max value: {self!r}") + max_value = self.max_value / self.unit + avg = self.avg / self.unit # Length of the sequence must be also counted in bits, # otherwise the message would not be decodable. # Model: probability of k samples is 1/k - 1/(k+1) == 1/k/(k+1) @@ -82,36 +86,37 @@ class BitCountingStats(AvgStdevStats): if self.prev_avg is None: # Avg is considered to be uniformly distributed # from zero to max_value. - self.bits += math.log(self.max_value + 1.0, 2) + self.bits += math.log(max_value + 1, 2) else: # Opposite triangle distribution with minimum. - self.bits += math.log( - (self.max_value * (self.max_value + 1)) - / (abs(self.avg - self.prev_avg) + 1), - 2, - ) + prev_avg = self.prev_avg / self.unit + norm = prev_avg * prev_avg + norm -= (prev_avg - 1) * max_value + norm += max_value * max_value / 2 + self.bits -= math.log((abs(avg - prev_avg) + 1) / norm, 2) if self.size < 2: return + stdev = self.stdev / self.unit # Stdev is considered to be uniformly distributed # from zero to max_value. That is quite a bad expectation, # but resilient to negative samples etc. - self.bits += math.log(self.max_value + 1.0, 2) + self.bits += math.log(max_value + 1, 2) # Now we know the samples lie on sphere in size-1 dimensions. # So it is (size-2)-sphere, with radius^2 == stdev^2 * size. # https://en.wikipedia.org/wiki/N-sphere sphere_area_ln = math.log(2) - sphere_area_ln += math.log(math.pi) * ((self.size - 1) / 2.0) - sphere_area_ln -= math.lgamma((self.size - 1) / 2.0) - sphere_area_ln += math.log(self.stdev + 1.0) * (self.size - 2) - sphere_area_ln += math.log(self.size) * ((self.size - 2) / 2.0) + sphere_area_ln += math.log(math.pi) * ((self.size - 1) / 2) + sphere_area_ln -= math.lgamma((self.size - 1) / 2) + sphere_area_ln += math.log(stdev + 1) * (self.size - 2) + sphere_area_ln += math.log(self.size) * ((self.size - 2) / 2) self.bits += sphere_area_ln / math.log(2) - # TODO: Rename, so pylint stops complaining about signature change. @classmethod - def for_runs( + def for_runs_and_params( cls, runs: typing.Iterable[typing.Union[float, AvgStdevStats]], max_value: float, + unit: float = 1.0, prev_avg: typing.Optional[float] = None, ): """Return new stats instance describing the sequence of runs. @@ -131,9 +136,11 @@ class BitCountingStats(AvgStdevStats): :param runs: Sequence of data to describe by the new metadata. :param max_value: Maximal expected value. + :param unit: Typical resolution of the values. :param prev_avg: Population average of the previous group, if any. :type runs: Iterable[Union[float, AvgStdevStats]] :type max_value: Union[float, NoneType] + :type unit: float :type prev_avg: Union[float, NoneType] :returns: The new stats instance. :rtype: cls @@ -144,6 +151,7 @@ class BitCountingStats(AvgStdevStats): avg=asd.avg, stdev=asd.stdev, max_value=max_value, + unit=unit, prev_avg=prev_avg, ) return ret_obj diff --git a/resources/libraries/python/jumpavg/classify.py b/resources/libraries/python/jumpavg/classify.py index 87d2502037..cc3cdcceed 100644 --- a/resources/libraries/python/jumpavg/classify.py +++ b/resources/libraries/python/jumpavg/classify.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022 Cisco and/or its affiliates. +# Copyright (c) 2023 Cisco and/or its affiliates. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at: @@ -13,21 +13,23 @@ """Module holding the classify function -Classification os one of primary purposes of this package. +Classification is one of primary purposes of this package. Minimal message length principle is used for grouping results into the list of groups, assuming each group is a population of different Gaussian distribution. """ -import typing +from typing import Iterable, Optional, Union -from .AvgStdevStats import AvgStdevStats -from .BitCountingGroupList import BitCountingGroupList +from .avg_stdev_stats import AvgStdevStats +from .bit_counting_group_list import BitCountingGroupList def classify( - values: typing.Iterable[typing.Union[float, typing.Iterable[float]]] + values: Iterable[Union[float, Iterable[float]]], + unit: Optional[float] = None, + sbps: Optional[float] = None, ) -> BitCountingGroupList: """Return the values in groups of optimal bit count. @@ -38,12 +40,27 @@ def classify( Internally, such sequence is replaced by AvgStdevStats after maximal value is found. + If the values are smaller than expected (below one unit), + the underlying assumption break down and the classification is wrong. + Use the "unit" parameter to hint at what the input resolution is. + + If the correct value of unit is not known beforehand, + the argument "sbps" (Significant Bits Per Sample) can be used + to set unit such that maximal sample value is this many ones in binary. + If neither "unit" nor "sbps" are given, "sbps" of 12 is used by default. + :param values: Sequence of runs to classify. + :param unit: Typical resolution of the values. + Zero and None means no unit given. + :param sbps: Significant Bits Per Sample. None on zero means 12. + If units is not set, this is used to compute unit from max sample value. :type values: Iterable[Union[float, Iterable[float]]] + :type unit: Optional[float] + :type sbps: Optional[float] :returns: Classified group list. :rtype: BitCountingGroupList """ - processed_values = list() + processed_values = [] max_value = 0.0 for value in values: if isinstance(value, (float, int)): @@ -55,9 +72,14 @@ def classify( if subvalue > max_value: max_value = subvalue processed_values.append(AvgStdevStats.for_runs(value)) + if not unit: + if not sbps: + sbps = 12.0 + max_in_units = pow(2.0, sbps + 1.0) - 1.0 + unit = max_value / max_in_units # Glist means group list (BitCountingGroupList). - open_glists = list() - record_glist = BitCountingGroupList(max_value=max_value) + open_glists = [] + record_glist = BitCountingGroupList(max_value=max_value, unit=unit) for value in processed_values: new_open_glist = record_glist.copy_fast().append_group_of_runs([value]) record_glist = new_open_glist @@ -68,9 +90,7 @@ def classify( open_glists.append(new_open_glist) previous_average = record_glist[0].stats.avg for group in record_glist: - if group.stats.avg == previous_average: - group.comment = "normal" - elif group.stats.avg < previous_average: + if group.stats.avg < previous_average: group.comment = "regression" elif group.stats.avg > previous_average: group.comment = "progression" diff --git a/resources/libraries/python/model/ExportJson.py b/resources/libraries/python/model/ExportJson.py index 478b3abb60..de8874dc52 100644 --- a/resources/libraries/python/model/ExportJson.py +++ b/resources/libraries/python/model/ExportJson.py @@ -30,7 +30,7 @@ from robot.libraries.BuiltIn import BuiltIn from zlib import compress from resources.libraries.python.Constants import Constants -from resources.libraries.python.jumpavg.AvgStdevStats import AvgStdevStats +from resources.libraries.python.jumpavg import AvgStdevStats from resources.libraries.python.model.ExportResult import ( export_dut_type_and_version, export_tg_type_and_version ) -- 2.16.6