-# Copyright (c) 2019 Cisco and/or its affiliates.
+# Copyright (c) 2021 Cisco and/or its affiliates.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at:
# TODO: Teach FD.io CSIT to use multiple dirs in PYTHONPATH,
# then switch to absolute imports within PLRsearch package.
# Current usage of relative imports is just a short term workaround.
-from log_plus import log_plus # pylint: disable=relative-import
+from .log_plus import log_plus, safe_exp
-class ScalarStatTracker(object):
+class ScalarStatTracker:
"""Class for tracking one-dimensional samples.
Variance of one-dimensional data cannot be negative,
self.log_variance = log_variance
def __repr__(self):
- """Return string, which interpreted constructs state of self."""
- return ("ScalarStatTracker(log_sum_weight={lsw!r},average={a!r},"
- "log_variance={lv!r})".format(
- lsw=self.log_sum_weight, a=self.average,
- lv=self.log_variance))
+ """Return string, which interpreted constructs state of self.
+
+ :returns: Expression constructing an equivalent instance.
+ :rtype: str
+ """
+ return f"ScalarStatTracker(log_sum_weight={self.log_sum_weight!r}," \
+ f"average={self.average!r},log_variance={self.log_variance!r})"
def copy(self):
"""Return new ScalarStatTracker instance with the same state as self.
:rtype: ScalarStatTracker
"""
return ScalarStatTracker(
- self.log_sum_weight, self.average, self.log_variance)
+ self.log_sum_weight, self.average, self.log_variance
+ )
def add(self, scalar_value, log_weight=0.0):
"""Return updated stats corresponding to addition of another sample.
One typical use is for Monte Carlo integrator to decide whether
the partial sums so far are reliable enough.
"""
-
def __init__(
self, log_sum_weight=None, average=0.0, log_variance=None,
log_sum_secondary_weight=None, secondary_average=0.0,
# so in case of diamond inheritance mismatch would be probable.
ScalarStatTracker.__init__(self, log_sum_weight, average, log_variance)
self.secondary = ScalarStatTracker(
- log_sum_secondary_weight, secondary_average, log_secondary_variance)
+ log_sum_secondary_weight, secondary_average, log_secondary_variance
+ )
self.max_log_weight = max_log_weight
def __repr__(self):
- """Return string, which interpreted constructs state of self."""
+ """Return string, which interpreted constructs state of self.
+
+ :returns: Expression contructing an equivalent instance.
+ :rtype: str
+ """
sec = self.secondary
- return (
- "ScalarDualStatTracker(log_sum_weight={lsw!r},average={a!r},"
- "log_variance={lv!r},log_sum_secondary_weight={lssw!r},"
- "secondary_average={sa!r},log_secondary_variance={lsv!r},"
- "max_log_weight={mlw!r})".format(
- lsw=self.log_sum_weight, a=self.average, lv=self.log_variance,
- lssw=sec.log_sum_weight, sa=sec.average, lsv=sec.log_variance,
- mlw=self.max_log_weight))
+ return f"ScalarDualStatTracker(log_sum_weight={self.log_sum_weight!r},"\
+ f"average={self.average!r},log_variance={self.log_variance!r}," \
+ f"log_sum_secondary_weight={sec.log_sum_weight!r}," \
+ f"secondary_average={sec.average!r}," \
+ f"log_secondary_variance={sec.log_variance!r}," \
+ f"max_log_weight={self.max_log_weight!r})"
def add(self, scalar_value, log_weight=0.0):
"""Return updated both stats after addition of another sample.
primary.add(scalar_value, log_weight)
return self
+ def get_pessimistic_variance(self):
+ """Return estimate of variance reflecting weight effects.
+
+ Typical scenario is the primary tracker dominated by a single sample.
+ In worse case, secondary tracker is also dominated by
+ a single (but different) sample.
-class VectorStatTracker(object):
+ Current implementation simply returns variance of average
+ of the two trackers, as if they were independent.
+
+ :returns: Pessimistic estimate of variance (not stdev, no log).
+ :rtype: float
+ """
+ var_primary = safe_exp(self.log_variance)
+ var_secondary = safe_exp(self.secondary.log_variance)
+ var_combined = (var_primary + var_secondary) / 2
+ avg_half_diff = (self.average - self.secondary.average) / 2
+ var_combined += avg_half_diff * avg_half_diff
+ return var_combined
+
+
+class VectorStatTracker:
"""Class for tracking multi-dimensional samples.
Contrary to one-dimensional data, multi-dimensional covariance matrix
def __init__(
self, dimension=2, log_sum_weight=None, averages=None,
covariance_matrix=None):
- """Initialize new tracker instance, two-dimenstional empty by default.
+ """Initialize new tracker instance, two-dimensional empty by default.
If any of latter two arguments is None, it means
the tracker state is invalid. Use reset method
- to create empty tracker of constructed dimentionality.
+ to create empty tracker of constructed dimensionality.
:param dimension: Number of scalar components of samples.
:param log_sum_weight: Natural logarithm of sum of weights
def __repr__(self):
"""Return string, which interpreted constructs state of self.
- :returns: Expression contructing an equivalent instance.
- :rtype: str"""
- return (
- "VectorStatTracker(dimension={d!r},log_sum_weight={lsw!r},"
- "averages={a!r},covariance_matrix={cm!r})".format(
- d=self.dimension, lsw=self.log_sum_weight, a=self.averages,
- cm=self.covariance_matrix))
+ :returns: Expression constructing an equivalent instance.
+ :rtype: str
+ """
+ return f"VectorStatTracker(dimension={self.dimension!r}," \
+ f"log_sum_weight={self.log_sum_weight!r}," \
+ f"averages={self.averages!r}," \
+ f"covariance_matrix={self.covariance_matrix!r})"
def copy(self):
"""Return new instance with the same state as self.
:rtype: VectorStatTracker
"""
return VectorStatTracker(
- self.dimension, self.log_sum_weight, self.averages,
- self.covariance_matrix)
+ self.dimension, self.log_sum_weight, self.averages[:],
+ copy.deepcopy(self.covariance_matrix)
+ )
def reset(self):
"""Return state set to empty data of proper dimensionality.
"""
self.averages = [0.0 for _ in range(self.dimension)]
# TODO: Examine whether we can gain speed by tracking triangle only.
- self.covariance_matrix = [[0.0 for _ in range(self.dimension)]
- for _ in range(self.dimension)]
+ self.covariance_matrix = [
+ [0.0 for _ in range(self.dimension)] for _ in range(self.dimension)
+ ]
# TODO: In Python3, list comprehensions are generators,
# so they are not indexable. Put list() when converting.
return self
self.reset()
for index in range(self.dimension):
self.covariance_matrix[index][index] = 1.0
+ return self
def add_get_shift(self, vector_value, log_weight=0.0):
"""Return shift and update state to addition of another sample.
Default: 0.0 (as log of 1.0).
:type vector_value: iterable of float
:type log_weight: float
- :returns: Updated self.
- :rtype: VectorStatTracker
+ :returns: Shift vector
+ :rtype: list of float
"""
dimension = self.dimension
old_log_sum_weight = self.log_sum_weight
old_averages = self.averages
if not old_averages:
- shift = [0.0 for index in range(dimension)]
+ shift = [0.0 for _ in range(dimension)]
else:
- shift = [vector_value[index] - old_averages[index]
- for index in range(dimension)]
+ shift = [
+ vector_value[index] - old_averages[index]
+ for index in range(dimension)
+ ]
if old_log_sum_weight is None:
# First sample.
self.log_sum_weight = log_weight
new_log_sum_weight = log_plus(old_log_sum_weight, log_weight)
data_ratio = math.exp(old_log_sum_weight - new_log_sum_weight)
sample_ratio = math.exp(log_weight - new_log_sum_weight)
- new_averages = [old_averages[index] + shift[index] * sample_ratio
- for index in range(dimension)]
+ new_averages = [
+ old_averages[index] + shift[index] * sample_ratio
+ for index in range(dimension)
+ ]
# It is easier to update covariance matrix in-place.
for second in range(dimension):
for first in range(dimension):
If the weight of the incoming sample is far bigger
than the weight of all the previous data together,
- convariance matrix would suffer from underflows.
+ covariance matrix would suffer from underflow.
To avoid that, this method manipulates both weights
before calling add().