X-Git-Url: https://gerrit.fd.io/r/gitweb?a=blobdiff_plain;f=resources%2Flibraries%2Fpython%2FPLRsearch%2Fstat_trackers.py;h=e598fd840ef15ddf0c49b7f12239b884f1d1fd5a;hb=HEAD;hp=58ad98fd2e36e53cfc67985c596d9dd484d5b504;hpb=fbbc47359e3f7b59bbd5a84d85c673374933a50a;p=csit.git diff --git a/resources/libraries/python/PLRsearch/stat_trackers.py b/resources/libraries/python/PLRsearch/stat_trackers.py index 58ad98fd2e..e598fd840e 100644 --- a/resources/libraries/python/PLRsearch/stat_trackers.py +++ b/resources/libraries/python/PLRsearch/stat_trackers.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019 Cisco and/or its affiliates. +# Copyright (c) 2024 Cisco and/or its affiliates. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at: @@ -32,7 +32,7 @@ import numpy from .log_plus import log_plus, safe_exp -class ScalarStatTracker(object): +class ScalarStatTracker: """Class for tracking one-dimensional samples. Variance of one-dimensional data cannot be negative, @@ -61,13 +61,13 @@ class ScalarStatTracker(object): def __repr__(self): """Return string, which interpreted constructs state of self. - :returns: Expression contructing an equivalent instance. + :returns: Expression constructing an equivalent instance. :rtype: str """ - return ("ScalarStatTracker(log_sum_weight={lsw!r},average={a!r}," - "log_variance={lv!r})".format( - lsw=self.log_sum_weight, a=self.average, - lv=self.log_variance)) + return ( + f"ScalarStatTracker(log_sum_weight={self.log_sum_weight!r}," + f"average={self.average!r},log_variance={self.log_variance!r})" + ) def copy(self): """Return new ScalarStatTracker instance with the same state as self. @@ -79,7 +79,8 @@ class ScalarStatTracker(object): :rtype: ScalarStatTracker """ return ScalarStatTracker( - self.log_sum_weight, self.average, self.log_variance) + self.log_sum_weight, self.average, self.log_variance + ) def add(self, scalar_value, log_weight=0.0): """Return updated stats corresponding to addition of another sample. @@ -111,7 +112,8 @@ class ScalarStatTracker(object): if absolute_shift > 0.0: log_square_shift = 2 * math.log(absolute_shift) log_variance = log_plus( - log_variance, log_square_shift + log_sample_ratio) + log_variance, log_square_shift + log_sample_ratio + ) if log_variance is not None: log_variance += old_log_sum_weight - new_log_sum_weight self.log_sum_weight = new_log_sum_weight @@ -136,9 +138,15 @@ class ScalarDualStatTracker(ScalarStatTracker): """ def __init__( - self, log_sum_weight=None, average=0.0, log_variance=None, - log_sum_secondary_weight=None, secondary_average=0.0, - log_secondary_variance=None, max_log_weight=None): + self, + log_sum_weight=None, + average=0.0, + log_variance=None, + log_sum_secondary_weight=None, + secondary_average=0.0, + log_secondary_variance=None, + max_log_weight=None, + ): """Initialize new tracker instance, empty by default. :param log_sum_weight: Natural logarithm of sum of weights @@ -168,7 +176,8 @@ class ScalarDualStatTracker(ScalarStatTracker): # so in case of diamond inheritance mismatch would be probable. ScalarStatTracker.__init__(self, log_sum_weight, average, log_variance) self.secondary = ScalarStatTracker( - log_sum_secondary_weight, secondary_average, log_secondary_variance) + log_sum_secondary_weight, secondary_average, log_secondary_variance + ) self.max_log_weight = max_log_weight def __repr__(self): @@ -179,13 +188,13 @@ class ScalarDualStatTracker(ScalarStatTracker): """ sec = self.secondary return ( - "ScalarDualStatTracker(log_sum_weight={lsw!r},average={a!r}," - "log_variance={lv!r},log_sum_secondary_weight={lssw!r}," - "secondary_average={sa!r},log_secondary_variance={lsv!r}," - "max_log_weight={mlw!r})".format( - lsw=self.log_sum_weight, a=self.average, lv=self.log_variance, - lssw=sec.log_sum_weight, sa=sec.average, lsv=sec.log_variance, - mlw=self.max_log_weight)) + f"ScalarDualStatTracker(log_sum_weight={self.log_sum_weight!r}," + f"average={self.average!r},log_variance={self.log_variance!r}," + f"log_sum_secondary_weight={sec.log_sum_weight!r}," + f"secondary_average={sec.average!r}," + f"log_secondary_variance={sec.log_variance!r}," + f"max_log_weight={self.max_log_weight!r})" + ) def add(self, scalar_value, log_weight=0.0): """Return updated both stats after addition of another sample. @@ -200,7 +209,7 @@ class ScalarDualStatTracker(ScalarStatTracker): """ # Using super() as copy() and add() are not expected to change # signature, so this way diamond inheritance will be supported. - primary = super(ScalarDualStatTracker, self) + primary = super() if self.max_log_weight is None or log_weight >= self.max_log_weight: self.max_log_weight = log_weight self.secondary = primary.copy() @@ -209,7 +218,6 @@ class ScalarDualStatTracker(ScalarStatTracker): primary.add(scalar_value, log_weight) return self - def get_pessimistic_variance(self): """Return estimate of variance reflecting weight effects. @@ -231,7 +239,7 @@ class ScalarDualStatTracker(ScalarStatTracker): return var_combined -class VectorStatTracker(object): +class VectorStatTracker: """Class for tracking multi-dimensional samples. Contrary to one-dimensional data, multi-dimensional covariance matrix @@ -246,13 +254,17 @@ class VectorStatTracker(object): """ def __init__( - self, dimension=2, log_sum_weight=None, averages=None, - covariance_matrix=None): - """Initialize new tracker instance, two-dimenstional empty by default. + self, + dimension=2, + log_sum_weight=None, + averages=None, + covariance_matrix=None, + ): + """Initialize new tracker instance, two-dimensional empty by default. If any of latter two arguments is None, it means the tracker state is invalid. Use reset method - to create empty tracker of constructed dimentionality. + to create empty tracker of constructed dimensionality. :param dimension: Number of scalar components of samples. :param log_sum_weight: Natural logarithm of sum of weights @@ -273,14 +285,15 @@ class VectorStatTracker(object): def __repr__(self): """Return string, which interpreted constructs state of self. - :returns: Expression contructing an equivalent instance. + :returns: Expression constructing an equivalent instance. :rtype: str """ return ( - "VectorStatTracker(dimension={d!r},log_sum_weight={lsw!r}," - "averages={a!r},covariance_matrix={cm!r})".format( - d=self.dimension, lsw=self.log_sum_weight, a=self.averages, - cm=self.covariance_matrix)) + f"VectorStatTracker(dimension={self.dimension!r}," + f"log_sum_weight={self.log_sum_weight!r}," + f"averages={self.averages!r}," + f"covariance_matrix={self.covariance_matrix!r})" + ) def copy(self): """Return new instance with the same state as self. @@ -292,8 +305,11 @@ class VectorStatTracker(object): :rtype: VectorStatTracker """ return VectorStatTracker( - self.dimension, self.log_sum_weight, self.averages[:], - copy.deepcopy(self.covariance_matrix)) + self.dimension, + self.log_sum_weight, + self.averages[:], + copy.deepcopy(self.covariance_matrix), + ) def reset(self): """Return state set to empty data of proper dimensionality. @@ -303,8 +319,9 @@ class VectorStatTracker(object): """ self.averages = [0.0 for _ in range(self.dimension)] # TODO: Examine whether we can gain speed by tracking triangle only. - self.covariance_matrix = [[0.0 for _ in range(self.dimension)] - for _ in range(self.dimension)] + self.covariance_matrix = [ + [0.0 for _ in range(self.dimension)] for _ in range(self.dimension) + ] # TODO: In Python3, list comprehensions are generators, # so they are not indexable. Put list() when converting. return self @@ -338,10 +355,12 @@ class VectorStatTracker(object): old_log_sum_weight = self.log_sum_weight old_averages = self.averages if not old_averages: - shift = [0.0 for index in range(dimension)] + shift = [0.0 for _ in range(dimension)] else: - shift = [vector_value[index] - old_averages[index] - for index in range(dimension)] + shift = [ + vector_value[index] - old_averages[index] + for index in range(dimension) + ] if old_log_sum_weight is None: # First sample. self.log_sum_weight = log_weight @@ -352,8 +371,10 @@ class VectorStatTracker(object): new_log_sum_weight = log_plus(old_log_sum_weight, log_weight) data_ratio = math.exp(old_log_sum_weight - new_log_sum_weight) sample_ratio = math.exp(log_weight - new_log_sum_weight) - new_averages = [old_averages[index] + shift[index] * sample_ratio - for index in range(dimension)] + new_averages = [ + old_averages[index] + shift[index] * sample_ratio + for index in range(dimension) + ] # It is easier to update covariance matrix in-place. for second in range(dimension): for first in range(dimension): @@ -375,7 +396,7 @@ class VectorStatTracker(object): If the weight of the incoming sample is far bigger than the weight of all the previous data together, - convariance matrix would suffer from underflows. + covariance matrix would suffer from underflow. To avoid that, this method manipulates both weights before calling add().