Previously, Jumpavg was known to give wrong results
when the data contains values of order one or smaller.
This change introduces a new "unit" parameter,
which changes how the information content is calculated.
For example if the data values are mutiplies of 0.01,
the unit parameter should be set to 0.01 to compensate.
For callers not knowing their correct unit value,
another parameter is introduced, called "sbps"
(meaning Significant Bits Per Sample).
A binary integer number with this many ones
is how much units should the maximal sample be.
This way jumpavg computes the corresponding "unit" value to use.
If neither "unit" nor "sbps" are given,
the "sbps" value of 12 is applied.
+ Rename files to conform to snake_style naming.
+ Fix normalization for the "opposite triangle" distribution.
+ Simplify logic, all groups now start as "normal", not "unknown".
+ Minor style improvements as suggested by pylint.
+ From user perspective, this change should be backward compatible.
- The normalization fix is a behavior change,
but it is a bugfix and the new behavior should be better.
Change-Id: I5a5ca11757f087fff13faf1d0b8e34a741400258
Signed-off-by: Vratko Polak <vrpolak@cisco.com>
-# Copyright (c) 2021 Cisco and/or its affiliates.
+# Copyright (c) 2023 Cisco and/or its affiliates.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at:
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at:
__init__ file for "jumpavg" Python package.
"""
__init__ file for "jumpavg" Python package.
"""
-from .AvgStdevStats import AvgStdevStats
-from .BitCountingStats import BitCountingStats
-from .BitCountingGroup import BitCountingGroup
-from .BitCountingGroupList import BitCountingGroupList
+from .avg_stdev_stats import AvgStdevStats
+from .bit_counting_stats import BitCountingStats
+from .bit_counting_group import BitCountingGroup
+from .bit_counting_group_list import BitCountingGroupList
from .classify import classify
from .classify import classify
-# Copyright (c) 2022 Cisco and/or its affiliates.
+# Copyright (c) 2023 Cisco and/or its affiliates.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at:
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at:
-# Copyright (c) 2022 Cisco and/or its affiliates.
+# Copyright (c) 2023 Cisco and/or its affiliates.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at:
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at:
import dataclasses
import typing
import dataclasses
import typing
-from .AvgStdevStats import AvgStdevStats
-from .BitCountingStats import BitCountingStats
+from .avg_stdev_stats import AvgStdevStats
+from .bit_counting_stats import BitCountingStats
so the caller should clone it to avoid unexpected muations."""
max_value: float
"""Maximal sample value to expect."""
so the caller should clone it to avoid unexpected muations."""
max_value: float
"""Maximal sample value to expect."""
- comment: str = "unknown"
+ unit: float = 1.0
+ """Typical resolution of the values"""
+ comment: str = "normal"
"""Any string giving more info, e.g. "regression"."""
prev_avg: typing.Optional[float] = None
"""Average of the previous group, if any."""
"""Any string giving more info, e.g. "regression"."""
prev_avg: typing.Optional[float] = None
"""Average of the previous group, if any."""
e.g. whether the stats and bits values reflect the runs.
"""
if self.stats is None:
e.g. whether the stats and bits values reflect the runs.
"""
if self.stats is None:
- self.stats = AvgStdevStats.for_runs(self.run_list)
+ self.stats = AvgStdevStats.for_runs(runs=self.run_list)
@property
def bits(self) -> float:
@property
def bits(self) -> float:
:rtype: float
"""
if self.cached_bits is None:
:rtype: float
"""
if self.cached_bits is None:
- self.cached_bits = BitCountingStats.for_runs(
- [self.stats], self.max_value, self.prev_avg
+ self.cached_bits = BitCountingStats.for_runs_and_params(
+ runs=[self.stats],
+ max_value=self.max_value,
+ unit=self.unit,
+ prev_avg=self.prev_avg,
).bits
return self.cached_bits
).bits
return self.cached_bits
stats=stats,
cached_bits=self.cached_bits,
max_value=self.max_value,
stats=stats,
cached_bits=self.cached_bits,
max_value=self.max_value,
prev_avg=self.prev_avg,
comment=self.comment,
)
prev_avg=self.prev_avg,
comment=self.comment,
)
-# Copyright (c) 2022 Cisco and/or its affiliates.
+# Copyright (c) 2023 Cisco and/or its affiliates.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at:
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at:
import dataclasses
import typing
import dataclasses
import typing
-from .AvgStdevStats import AvgStdevStats # Just for type hints.
-from .BitCountingGroup import BitCountingGroup
+from .avg_stdev_stats import AvgStdevStats # Just for type hints.
+from .bit_counting_group import BitCountingGroup
max_value: float
"""Maximal sample value to base bits computation on."""
max_value: float
"""Maximal sample value to base bits computation on."""
+ unit: float = 1.0
+ """Typical resolution of the values."""
group_list: typing.List[BitCountingGroup] = None
"""List of groups to compose this group list.
Init also accepts None standing for an empty list.
group_list: typing.List[BitCountingGroup] = None
"""List of groups to compose this group list.
Init also accepts None standing for an empty list.
e.g. whether the cached bits values (and bits_except_last) make sense.
"""
if self.group_list is None:
e.g. whether the cached bits values (and bits_except_last) make sense.
"""
if self.group_list is None:
- self.group_list = list()
def __getitem__(self, index: int) -> BitCountingGroup:
"""Return the group at the index.
def __getitem__(self, index: int) -> BitCountingGroup:
"""Return the group at the index.
"""
return self.__class__(
max_value=self.max_value,
"""
return self.__class__(
max_value=self.max_value,
group_list=[group.copy() for group in self.group_list],
bits_except_last=self.bits_except_last,
)
group_list=[group.copy() for group in self.group_list],
bits_except_last=self.bits_except_last,
)
# for users with many samples.
return self.__class__(
max_value=self.max_value,
# for users with many samples.
return self.__class__(
max_value=self.max_value,
group_list=group_list,
bits_except_last=self.bits_except_last,
)
group_list=group_list,
bits_except_last=self.bits_except_last,
)
# It is faster to avoid stats recalculation.
new_group = runs.copy()
new_group.max_value = self.max_value
# It is faster to avoid stats recalculation.
new_group = runs.copy()
new_group.max_value = self.max_value
new_group.prev_avg = prev_avg
new_group.cached_bits = None
else:
new_group = BitCountingGroup(
new_group.prev_avg = prev_avg
new_group.cached_bits = None
else:
new_group = BitCountingGroup(
- run_list=runs, max_value=self.max_value, prev_avg=prev_avg
+ run_list=runs,
+ max_value=self.max_value,
+ unit=self.unit,
+ prev_avg=prev_avg,
)
self.bits_except_last = self.bits
self.group_list.append(new_group)
)
self.bits_except_last = self.bits
self.group_list.append(new_group)
-# Copyright (c) 2022 Cisco and/or its affiliates.
+# Copyright (c) 2023 Cisco and/or its affiliates.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at:
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at:
import math
import typing
import math
import typing
-from .AvgStdevStats import AvgStdevStats
+from .avg_stdev_stats import AvgStdevStats
"""Maximal sample value (real or estimated).
Default value is there just for argument ordering reasons,
leaving None leads to exceptions."""
"""Maximal sample value (real or estimated).
Default value is there just for argument ordering reasons,
leaving None leads to exceptions."""
+ unit: float = 1.0
+ """Typical resolution of the values."""
prev_avg: typing.Optional[float] = None
"""Population average of the previous group (if any)."""
bits: float = None
prev_avg: typing.Optional[float] = None
"""Population average of the previous group (if any)."""
bits: float = None
return
if self.max_value <= 0.0:
raise ValueError(f"Invalid max value: {self!r}")
return
if self.max_value <= 0.0:
raise ValueError(f"Invalid max value: {self!r}")
+ max_value = self.max_value / self.unit
+ avg = self.avg / self.unit
# Length of the sequence must be also counted in bits,
# otherwise the message would not be decodable.
# Model: probability of k samples is 1/k - 1/(k+1) == 1/k/(k+1)
# Length of the sequence must be also counted in bits,
# otherwise the message would not be decodable.
# Model: probability of k samples is 1/k - 1/(k+1) == 1/k/(k+1)
if self.prev_avg is None:
# Avg is considered to be uniformly distributed
# from zero to max_value.
if self.prev_avg is None:
# Avg is considered to be uniformly distributed
# from zero to max_value.
- self.bits += math.log(self.max_value + 1.0, 2)
+ self.bits += math.log(max_value + 1, 2)
else:
# Opposite triangle distribution with minimum.
else:
# Opposite triangle distribution with minimum.
- self.bits += math.log(
- (self.max_value * (self.max_value + 1))
- / (abs(self.avg - self.prev_avg) + 1),
- 2,
- )
+ prev_avg = self.prev_avg / self.unit
+ norm = prev_avg * prev_avg
+ norm -= (prev_avg - 1) * max_value
+ norm += max_value * max_value / 2
+ self.bits -= math.log((abs(avg - prev_avg) + 1) / norm, 2)
+ stdev = self.stdev / self.unit
# Stdev is considered to be uniformly distributed
# from zero to max_value. That is quite a bad expectation,
# but resilient to negative samples etc.
# Stdev is considered to be uniformly distributed
# from zero to max_value. That is quite a bad expectation,
# but resilient to negative samples etc.
- self.bits += math.log(self.max_value + 1.0, 2)
+ self.bits += math.log(max_value + 1, 2)
# Now we know the samples lie on sphere in size-1 dimensions.
# So it is (size-2)-sphere, with radius^2 == stdev^2 * size.
# https://en.wikipedia.org/wiki/N-sphere
sphere_area_ln = math.log(2)
# Now we know the samples lie on sphere in size-1 dimensions.
# So it is (size-2)-sphere, with radius^2 == stdev^2 * size.
# https://en.wikipedia.org/wiki/N-sphere
sphere_area_ln = math.log(2)
- sphere_area_ln += math.log(math.pi) * ((self.size - 1) / 2.0)
- sphere_area_ln -= math.lgamma((self.size - 1) / 2.0)
- sphere_area_ln += math.log(self.stdev + 1.0) * (self.size - 2)
- sphere_area_ln += math.log(self.size) * ((self.size - 2) / 2.0)
+ sphere_area_ln += math.log(math.pi) * ((self.size - 1) / 2)
+ sphere_area_ln -= math.lgamma((self.size - 1) / 2)
+ sphere_area_ln += math.log(stdev + 1) * (self.size - 2)
+ sphere_area_ln += math.log(self.size) * ((self.size - 2) / 2)
self.bits += sphere_area_ln / math.log(2)
self.bits += sphere_area_ln / math.log(2)
- # TODO: Rename, so pylint stops complaining about signature change.
+ def for_runs_and_params(
cls,
runs: typing.Iterable[typing.Union[float, AvgStdevStats]],
max_value: float,
cls,
runs: typing.Iterable[typing.Union[float, AvgStdevStats]],
max_value: float,
prev_avg: typing.Optional[float] = None,
):
"""Return new stats instance describing the sequence of runs.
prev_avg: typing.Optional[float] = None,
):
"""Return new stats instance describing the sequence of runs.
:param runs: Sequence of data to describe by the new metadata.
:param max_value: Maximal expected value.
:param runs: Sequence of data to describe by the new metadata.
:param max_value: Maximal expected value.
+ :param unit: Typical resolution of the values.
:param prev_avg: Population average of the previous group, if any.
:type runs: Iterable[Union[float, AvgStdevStats]]
:type max_value: Union[float, NoneType]
:param prev_avg: Population average of the previous group, if any.
:type runs: Iterable[Union[float, AvgStdevStats]]
:type max_value: Union[float, NoneType]
:type prev_avg: Union[float, NoneType]
:returns: The new stats instance.
:rtype: cls
:type prev_avg: Union[float, NoneType]
:returns: The new stats instance.
:rtype: cls
avg=asd.avg,
stdev=asd.stdev,
max_value=max_value,
avg=asd.avg,
stdev=asd.stdev,
max_value=max_value,
prev_avg=prev_avg,
)
return ret_obj
prev_avg=prev_avg,
)
return ret_obj
-# Copyright (c) 2022 Cisco and/or its affiliates.
+# Copyright (c) 2023 Cisco and/or its affiliates.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at:
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at:
"""Module holding the classify function
"""Module holding the classify function
-Classification os one of primary purposes of this package.
+Classification is one of primary purposes of this package.
Minimal message length principle is used
for grouping results into the list of groups,
assuming each group is a population of different Gaussian distribution.
"""
Minimal message length principle is used
for grouping results into the list of groups,
assuming each group is a population of different Gaussian distribution.
"""
+from typing import Iterable, Optional, Union
-from .AvgStdevStats import AvgStdevStats
-from .BitCountingGroupList import BitCountingGroupList
+from .avg_stdev_stats import AvgStdevStats
+from .bit_counting_group_list import BitCountingGroupList
- values: typing.Iterable[typing.Union[float, typing.Iterable[float]]]
+ values: Iterable[Union[float, Iterable[float]]],
+ unit: Optional[float] = None,
+ sbps: Optional[float] = None,
) -> BitCountingGroupList:
"""Return the values in groups of optimal bit count.
) -> BitCountingGroupList:
"""Return the values in groups of optimal bit count.
Internally, such sequence is replaced by AvgStdevStats
after maximal value is found.
Internally, such sequence is replaced by AvgStdevStats
after maximal value is found.
+ If the values are smaller than expected (below one unit),
+ the underlying assumption break down and the classification is wrong.
+ Use the "unit" parameter to hint at what the input resolution is.
+
+ If the correct value of unit is not known beforehand,
+ the argument "sbps" (Significant Bits Per Sample) can be used
+ to set unit such that maximal sample value is this many ones in binary.
+ If neither "unit" nor "sbps" are given, "sbps" of 12 is used by default.
+
:param values: Sequence of runs to classify.
:param values: Sequence of runs to classify.
+ :param unit: Typical resolution of the values.
+ Zero and None means no unit given.
+ :param sbps: Significant Bits Per Sample. None on zero means 12.
+ If units is not set, this is used to compute unit from max sample value.
:type values: Iterable[Union[float, Iterable[float]]]
:type values: Iterable[Union[float, Iterable[float]]]
+ :type unit: Optional[float]
+ :type sbps: Optional[float]
:returns: Classified group list.
:rtype: BitCountingGroupList
"""
:returns: Classified group list.
:rtype: BitCountingGroupList
"""
- processed_values = list()
max_value = 0.0
for value in values:
if isinstance(value, (float, int)):
max_value = 0.0
for value in values:
if isinstance(value, (float, int)):
if subvalue > max_value:
max_value = subvalue
processed_values.append(AvgStdevStats.for_runs(value))
if subvalue > max_value:
max_value = subvalue
processed_values.append(AvgStdevStats.for_runs(value))
+ if not unit:
+ if not sbps:
+ sbps = 12.0
+ max_in_units = pow(2.0, sbps + 1.0) - 1.0
+ unit = max_value / max_in_units
# Glist means group list (BitCountingGroupList).
# Glist means group list (BitCountingGroupList).
- open_glists = list()
- record_glist = BitCountingGroupList(max_value=max_value)
+ open_glists = []
+ record_glist = BitCountingGroupList(max_value=max_value, unit=unit)
for value in processed_values:
new_open_glist = record_glist.copy_fast().append_group_of_runs([value])
record_glist = new_open_glist
for value in processed_values:
new_open_glist = record_glist.copy_fast().append_group_of_runs([value])
record_glist = new_open_glist
open_glists.append(new_open_glist)
previous_average = record_glist[0].stats.avg
for group in record_glist:
open_glists.append(new_open_glist)
previous_average = record_glist[0].stats.avg
for group in record_glist:
- if group.stats.avg == previous_average:
- group.comment = "normal"
- elif group.stats.avg < previous_average:
+ if group.stats.avg < previous_average:
group.comment = "regression"
elif group.stats.avg > previous_average:
group.comment = "progression"
group.comment = "regression"
elif group.stats.avg > previous_average:
group.comment = "progression"
from zlib import compress
from resources.libraries.python.Constants import Constants
from zlib import compress
from resources.libraries.python.Constants import Constants
-from resources.libraries.python.jumpavg.AvgStdevStats import AvgStdevStats
+from resources.libraries.python.jumpavg import AvgStdevStats
from resources.libraries.python.model.ExportResult import (
export_dut_type_and_version, export_tg_type_and_version
)
from resources.libraries.python.model.ExportResult import (
export_dut_type_and_version, export_tg_type_and_version
)