feat(jumpavg): support small values via unit param

author Vratko Polak <vrpolak@cisco.com>

Fri, 2 Jun 2023 12:44:47 +0000 (14:44 +0200)

committer Tibor Frank <tifrank@cisco.com>

Wed, 7 Jun 2023 05:53:55 +0000 (05:53 +0000)
author Vratko Polak <vrpolak@cisco.com>
Fri, 2 Jun 2023 12:44:47 +0000 (14:44 +0200)
committer Tibor Frank <tifrank@cisco.com>
Wed, 7 Jun 2023 05:53:55 +0000 (05:53 +0000)
diff --git a/resources/libraries/python/jumpavg/__init__.py b/resources/libraries/python/jumpavg/__init__.py

index 4fa696c..7f63b5e 100644 (file)
--- a/resources/libraries/python/jumpavg/__init__.py
+++ b/resources/libraries/python/jumpavg/__init__.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2021 Cisco and/or its affiliates.
+# Copyright (c) 2023 Cisco and/or its affiliates.
  # Licensed under the Apache License, Version 2.0 (the "License");
  # you may not use this file except in compliance with the License.
  # You may obtain a copy of the License at:
@@ -15,8 +15,8 @@
  __init__ file for "jumpavg" Python package.
  """
  
-from .AvgStdevStats import AvgStdevStats
-from .BitCountingStats import BitCountingStats
-from .BitCountingGroup import BitCountingGroup
-from .BitCountingGroupList import BitCountingGroupList
+from .avg_stdev_stats import AvgStdevStats
+from .bit_counting_stats import BitCountingStats
+from .bit_counting_group import BitCountingGroup
+from .bit_counting_group_list import BitCountingGroupList
  from .classify import classify
diff --git a/resources/libraries/python/jumpavg/AvgStdevStats.py b/resources/libraries/python/jumpavg/avg_stdev_stats.py

similarity index 98%

rename from resources/libraries/python/jumpavg/AvgStdevStats.py

rename to resources/libraries/python/jumpavg/avg_stdev_stats.py

index d40b316..3d6a834 100644 (file)
--- a/resources/libraries/python/jumpavg/AvgStdevStats.py
+++ b/resources/libraries/python/jumpavg/avg_stdev_stats.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2022 Cisco and/or its affiliates.
+# Copyright (c) 2023 Cisco and/or its affiliates.
  # Licensed under the Apache License, Version 2.0 (the "License");
  # you may not use this file except in compliance with the License.
  # You may obtain a copy of the License at:
diff --git a/resources/libraries/python/jumpavg/BitCountingGroup.py b/resources/libraries/python/jumpavg/bit_counting_group.py

similarity index 90%

rename from resources/libraries/python/jumpavg/BitCountingGroup.py

rename to resources/libraries/python/jumpavg/bit_counting_group.py

index 48bea08..22c9337 100644 (file)
--- a/resources/libraries/python/jumpavg/BitCountingGroup.py
+++ b/resources/libraries/python/jumpavg/bit_counting_group.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2022 Cisco and/or its affiliates.
+# Copyright (c) 2023 Cisco and/or its affiliates.
  # Licensed under the Apache License, Version 2.0 (the "License");
  # you may not use this file except in compliance with the License.
  # You may obtain a copy of the License at:
@@ -17,8 +17,8 @@ import collections
  import dataclasses
  import typing
  
-from .AvgStdevStats import AvgStdevStats
-from .BitCountingStats import BitCountingStats
+from .avg_stdev_stats import AvgStdevStats
+from .bit_counting_stats import BitCountingStats
  
  
  @dataclasses.dataclass
@@ -46,7 +46,9 @@ class BitCountingGroup(collections.abc.Sequence):
      so the caller should clone it to avoid unexpected muations."""
      max_value: float
      """Maximal sample value to expect."""
-    comment: str = "unknown"
+    unit: float = 1.0
+    """Typical resolution of the values"""
+    comment: str = "normal"
      """Any string giving more info, e.g. "regression"."""
      prev_avg: typing.Optional[float] = None
      """Average of the previous group, if any."""
@@ -64,7 +66,7 @@ class BitCountingGroup(collections.abc.Sequence):
          e.g. whether the stats and bits values reflect the runs.
          """
          if self.stats is None:
-            self.stats = AvgStdevStats.for_runs(self.run_list)
+            self.stats = AvgStdevStats.for_runs(runs=self.run_list)
  
      @property
      def bits(self) -> float:
@@ -76,8 +78,11 @@ class BitCountingGroup(collections.abc.Sequence):
          :rtype: float
          """
          if self.cached_bits is None:
-            self.cached_bits = BitCountingStats.for_runs(
-                [self.stats], self.max_value, self.prev_avg
+            self.cached_bits = BitCountingStats.for_runs_and_params(
+                runs=[self.stats],
+                max_value=self.max_value,
+                unit=self.unit,
+                prev_avg=self.prev_avg,
              ).bits
          return self.cached_bits
  
@@ -115,6 +120,7 @@ class BitCountingGroup(collections.abc.Sequence):
              stats=stats,
              cached_bits=self.cached_bits,
              max_value=self.max_value,
+            unit=self.unit,
              prev_avg=self.prev_avg,
              comment=self.comment,
          )
diff --git a/resources/libraries/python/jumpavg/BitCountingGroupList.py b/resources/libraries/python/jumpavg/bit_counting_group_list.py

similarity index 93%

rename from resources/libraries/python/jumpavg/BitCountingGroupList.py

rename to resources/libraries/python/jumpavg/bit_counting_group_list.py

index 468e79b..e4d33b5 100644 (file)
--- a/resources/libraries/python/jumpavg/BitCountingGroupList.py
+++ b/resources/libraries/python/jumpavg/bit_counting_group_list.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2022 Cisco and/or its affiliates.
+# Copyright (c) 2023 Cisco and/or its affiliates.
  # Licensed under the Apache License, Version 2.0 (the "License");
  # you may not use this file except in compliance with the License.
  # You may obtain a copy of the License at:
@@ -17,8 +17,8 @@ import collections
  import dataclasses
  import typing
  
-from .AvgStdevStats import AvgStdevStats  # Just for type hints.
-from .BitCountingGroup import BitCountingGroup
+from .avg_stdev_stats import AvgStdevStats  # Just for type hints.
+from .bit_counting_group import BitCountingGroup
  
  
  @dataclasses.dataclass
@@ -46,6 +46,8 @@ class BitCountingGroupList(collections.abc.Sequence):
  
      max_value: float
      """Maximal sample value to base bits computation on."""
+    unit: float = 1.0
+    """Typical resolution of the values."""
      group_list: typing.List[BitCountingGroup] = None
      """List of groups to compose this group list.
      Init also accepts None standing for an empty list.
@@ -62,7 +64,7 @@ class BitCountingGroupList(collections.abc.Sequence):
          e.g. whether the cached bits values (and bits_except_last) make sense.
          """
          if self.group_list is None:
-            self.group_list = list()
+            self.group_list = []
  
      def __getitem__(self, index: int) -> BitCountingGroup:
          """Return the group at the index.
@@ -90,6 +92,7 @@ class BitCountingGroupList(collections.abc.Sequence):
          """
          return self.__class__(
              max_value=self.max_value,
+            unit=self.unit,
              group_list=[group.copy() for group in self.group_list],
              bits_except_last=self.bits_except_last,
          )
@@ -114,6 +117,7 @@ class BitCountingGroupList(collections.abc.Sequence):
              # for users with many samples.
          return self.__class__(
              max_value=self.max_value,
+            unit=self.unit,
              group_list=group_list,
              bits_except_last=self.bits_except_last,
          )
@@ -152,11 +156,15 @@ class BitCountingGroupList(collections.abc.Sequence):
              # It is faster to avoid stats recalculation.
              new_group = runs.copy()
              new_group.max_value = self.max_value
+            # Unit is common.
              new_group.prev_avg = prev_avg
              new_group.cached_bits = None
          else:
              new_group = BitCountingGroup(
-                run_list=runs, max_value=self.max_value, prev_avg=prev_avg
+                run_list=runs,
+                max_value=self.max_value,
+                unit=self.unit,
+                prev_avg=prev_avg,
              )
          self.bits_except_last = self.bits
          self.group_list.append(new_group)
diff --git a/resources/libraries/python/jumpavg/BitCountingStats.py b/resources/libraries/python/jumpavg/bit_counting_stats.py

similarity index 85%

rename from resources/libraries/python/jumpavg/BitCountingStats.py

rename to resources/libraries/python/jumpavg/bit_counting_stats.py

index 524ac95..caece2c 100644 (file)
--- a/resources/libraries/python/jumpavg/BitCountingStats.py
+++ b/resources/libraries/python/jumpavg/bit_counting_stats.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2022 Cisco and/or its affiliates.
+# Copyright (c) 2023 Cisco and/or its affiliates.
  # Licensed under the Apache License, Version 2.0 (the "License");
  # you may not use this file except in compliance with the License.
  # You may obtain a copy of the License at:
@@ -17,7 +17,7 @@ import dataclasses
  import math
  import typing
  
-from .AvgStdevStats import AvgStdevStats
+from .avg_stdev_stats import AvgStdevStats
  
  
  @dataclasses.dataclass
@@ -40,6 +40,8 @@ class BitCountingStats(AvgStdevStats):
      """Maximal sample value (real or estimated).
      Default value is there just for argument ordering reasons,
      leaving None leads to exceptions."""
+    unit: float = 1.0
+    """Typical resolution of the values."""
      prev_avg: typing.Optional[float] = None
      """Population average of the previous group (if any)."""
      bits: float = None
@@ -74,6 +76,8 @@ class BitCountingStats(AvgStdevStats):
              return
          if self.max_value <= 0.0:
              raise ValueError(f"Invalid max value: {self!r}")
+        max_value = self.max_value / self.unit
+        avg = self.avg / self.unit
          # Length of the sequence must be also counted in bits,
          # otherwise the message would not be decodable.
          # Model: probability of k samples is 1/k - 1/(k+1) == 1/k/(k+1)
@@ -82,36 +86,37 @@ class BitCountingStats(AvgStdevStats):
          if self.prev_avg is None:
              # Avg is considered to be uniformly distributed
              # from zero to max_value.
-            self.bits += math.log(self.max_value + 1.0, 2)
+            self.bits += math.log(max_value + 1, 2)
          else:
              # Opposite triangle distribution with minimum.
-            self.bits += math.log(
-                (self.max_value * (self.max_value + 1))
-                / (abs(self.avg - self.prev_avg) + 1),
-                2,
-            )
+            prev_avg = self.prev_avg / self.unit
+            norm = prev_avg * prev_avg
+            norm -= (prev_avg - 1) * max_value
+            norm += max_value * max_value / 2
+            self.bits -= math.log((abs(avg - prev_avg) + 1) / norm, 2)
          if self.size < 2:
              return
+        stdev = self.stdev / self.unit
          # Stdev is considered to be uniformly distributed
          # from zero to max_value. That is quite a bad expectation,
          # but resilient to negative samples etc.
-        self.bits += math.log(self.max_value + 1.0, 2)
+        self.bits += math.log(max_value + 1, 2)
          # Now we know the samples lie on sphere in size-1 dimensions.
          # So it is (size-2)-sphere, with radius^2 == stdev^2 * size.
          # https://en.wikipedia.org/wiki/N-sphere
          sphere_area_ln = math.log(2)
-        sphere_area_ln += math.log(math.pi) * ((self.size - 1) / 2.0)
-        sphere_area_ln -= math.lgamma((self.size - 1) / 2.0)
-        sphere_area_ln += math.log(self.stdev + 1.0) * (self.size - 2)
-        sphere_area_ln += math.log(self.size) * ((self.size - 2) / 2.0)
+        sphere_area_ln += math.log(math.pi) * ((self.size - 1) / 2)
+        sphere_area_ln -= math.lgamma((self.size - 1) / 2)
+        sphere_area_ln += math.log(stdev + 1) * (self.size - 2)
+        sphere_area_ln += math.log(self.size) * ((self.size - 2) / 2)
          self.bits += sphere_area_ln / math.log(2)
  
-    # TODO: Rename, so pylint stops complaining about signature change.
      @classmethod
-    def for_runs(
+    def for_runs_and_params(
          cls,
          runs: typing.Iterable[typing.Union[float, AvgStdevStats]],
          max_value: float,
+        unit: float = 1.0,
          prev_avg: typing.Optional[float] = None,
      ):
          """Return new stats instance describing the sequence of runs.
@@ -131,9 +136,11 @@ class BitCountingStats(AvgStdevStats):
  
          :param runs: Sequence of data to describe by the new metadata.
          :param max_value: Maximal expected value.
+        :param unit: Typical resolution of the values.
          :param prev_avg: Population average of the previous group, if any.
          :type runs: Iterable[Union[float, AvgStdevStats]]
          :type max_value: Union[float, NoneType]
+        :type unit: float
          :type prev_avg: Union[float, NoneType]
          :returns: The new stats instance.
          :rtype: cls
@@ -144,6 +151,7 @@ class BitCountingStats(AvgStdevStats):
              avg=asd.avg,
              stdev=asd.stdev,
              max_value=max_value,
+            unit=unit,
              prev_avg=prev_avg,
          )
          return ret_obj
diff --git a/resources/libraries/python/jumpavg/classify.py b/resources/libraries/python/jumpavg/classify.py

index 87d2502..cc3cdcc 100644 (file)
--- a/resources/libraries/python/jumpavg/classify.py
+++ b/resources/libraries/python/jumpavg/classify.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2022 Cisco and/or its affiliates.
+# Copyright (c) 2023 Cisco and/or its affiliates.
  # Licensed under the Apache License, Version 2.0 (the "License");
  # you may not use this file except in compliance with the License.
  # You may obtain a copy of the License at:
@@ -13,21 +13,23 @@
  
  """Module holding the classify function
  
-Classification os one of primary purposes of this package.
+Classification is one of primary purposes of this package.
  
  Minimal message length principle is used
  for grouping results into the list of groups,
  assuming each group is a population of different Gaussian distribution.
  """
  
-import typing
+from typing import Iterable, Optional, Union
  
-from .AvgStdevStats import AvgStdevStats
-from .BitCountingGroupList import BitCountingGroupList
+from .avg_stdev_stats import AvgStdevStats
+from .bit_counting_group_list import BitCountingGroupList
  
  
  def classify(
-    values: typing.Iterable[typing.Union[float, typing.Iterable[float]]]
+    values: Iterable[Union[float, Iterable[float]]],
+    unit: Optional[float] = None,
+    sbps: Optional[float] = None,
  ) -> BitCountingGroupList:
      """Return the values in groups of optimal bit count.
  
@@ -38,12 +40,27 @@ def classify(
      Internally, such sequence is replaced by AvgStdevStats
      after maximal value is found.
  
+    If the values are smaller than expected (below one unit),
+    the underlying assumption break down and the classification is wrong.
+    Use the "unit" parameter to hint at what the input resolution is.
+
+    If the correct value of unit is not known beforehand,
+    the argument "sbps" (Significant Bits Per Sample) can be used
+    to set unit such that maximal sample value is this many ones in binary.
+    If neither "unit" nor "sbps" are given, "sbps" of 12 is used by default.
+
      :param values: Sequence of runs to classify.
+    :param unit: Typical resolution of the values.
+        Zero and None means no unit given.
+    :param sbps: Significant Bits Per Sample. None on zero means 12.
+        If units is not set, this is used to compute unit from max sample value.
      :type values: Iterable[Union[float, Iterable[float]]]
+    :type unit: Optional[float]
+    :type sbps: Optional[float]
      :returns: Classified group list.
      :rtype: BitCountingGroupList
      """
-    processed_values = list()
+    processed_values = []
      max_value = 0.0
      for value in values:
          if isinstance(value, (float, int)):
@@ -55,9 +72,14 @@ def classify(
                  if subvalue > max_value:
                      max_value = subvalue
              processed_values.append(AvgStdevStats.for_runs(value))
+    if not unit:
+        if not sbps:
+            sbps = 12.0
+        max_in_units = pow(2.0, sbps + 1.0) - 1.0
+        unit = max_value / max_in_units
      # Glist means group list (BitCountingGroupList).
-    open_glists = list()
-    record_glist = BitCountingGroupList(max_value=max_value)
+    open_glists = []
+    record_glist = BitCountingGroupList(max_value=max_value, unit=unit)
      for value in processed_values:
          new_open_glist = record_glist.copy_fast().append_group_of_runs([value])
          record_glist = new_open_glist
@@ -68,9 +90,7 @@ def classify(
          open_glists.append(new_open_glist)
      previous_average = record_glist[0].stats.avg
      for group in record_glist:
-        if group.stats.avg == previous_average:
-            group.comment = "normal"
-        elif group.stats.avg < previous_average:
+        if group.stats.avg < previous_average:
              group.comment = "regression"
          elif group.stats.avg > previous_average:
              group.comment = "progression"
diff --git a/resources/libraries/python/model/ExportJson.py b/resources/libraries/python/model/ExportJson.py

index 478b3ab..de8874d 100644 (file)
--- a/resources/libraries/python/model/ExportJson.py
+++ b/resources/libraries/python/model/ExportJson.py
@@ -30,7 +30,7 @@ from robot.libraries.BuiltIn import BuiltIn
  from zlib import compress
  
  from resources.libraries.python.Constants import Constants
-from resources.libraries.python.jumpavg.AvgStdevStats import AvgStdevStats
+from resources.libraries.python.jumpavg import AvgStdevStats
  from resources.libraries.python.model.ExportResult import (
      export_dut_type_and_version, export_tg_type_and_version
  )
author	Vratko Polak <vrpolak@cisco.com>
	Fri, 2 Jun 2023 12:44:47 +0000 (14:44 +0200)
committer	Tibor Frank <tifrank@cisco.com>
	Wed, 7 Jun 2023 05:53:55 +0000 (05:53 +0000)
resources/libraries/python/jumpavg/__init__.py		patch \| blob \| history
resources/libraries/python/jumpavg/avg_stdev_stats.py	[moved from resources/libraries/python/jumpavg/AvgStdevStats.py with 98% similarity]	patch \| blob \| history
resources/libraries/python/jumpavg/bit_counting_group.py	[moved from resources/libraries/python/jumpavg/BitCountingGroup.py with 90% similarity]	patch \| blob \| history
resources/libraries/python/jumpavg/bit_counting_group_list.py	[moved from resources/libraries/python/jumpavg/BitCountingGroupList.py with 93% similarity]	patch \| blob \| history
resources/libraries/python/jumpavg/bit_counting_stats.py	[moved from resources/libraries/python/jumpavg/BitCountingStats.py with 85% similarity]	patch \| blob \| history
resources/libraries/python/jumpavg/classify.py		patch \| blob \| history
resources/libraries/python/model/ExportJson.py		patch \| blob \| history