Dpdk in VM: Increase num_mbufs
[csit.git] / resources / libraries / python / PLRsearch / stat_trackers.py
index 168b09a..2a7a05c 100644 (file)
@@ -29,10 +29,10 @@ import numpy
 # TODO: Teach FD.io CSIT to use multiple dirs in PYTHONPATH,
 # then switch to absolute imports within PLRsearch package.
 # Current usage of relative imports is just a short term workaround.
 # TODO: Teach FD.io CSIT to use multiple dirs in PYTHONPATH,
 # then switch to absolute imports within PLRsearch package.
 # Current usage of relative imports is just a short term workaround.
-from log_plus import log_plus  # pylint: disable=relative-import
+from .log_plus import log_plus, safe_exp
 
 
 
 
-class ScalarStatTracker(object):
+class ScalarStatTracker:
     """Class for tracking one-dimensional samples.
 
     Variance of one-dimensional data cannot be negative,
     """Class for tracking one-dimensional samples.
 
     Variance of one-dimensional data cannot be negative,
@@ -59,11 +59,13 @@ class ScalarStatTracker(object):
         self.log_variance = log_variance
 
     def __repr__(self):
         self.log_variance = log_variance
 
     def __repr__(self):
-        """Return string, which interpreted constructs state of self."""
-        return ("ScalarStatTracker(log_sum_weight={lsw!r},average={a!r},"
-                "log_variance={lv!r})".format(
-                    lsw=self.log_sum_weight, a=self.average,
-                    lv=self.log_variance))
+        """Return string, which interpreted constructs state of self.
+
+        :returns: Expression constructing an equivalent instance.
+        :rtype: str
+        """
+        return f"ScalarStatTracker(log_sum_weight={self.log_sum_weight!r}," \
+            f"average={self.average!r},log_variance={self.log_variance!r})"
 
     def copy(self):
         """Return new ScalarStatTracker instance with the same state as self.
 
     def copy(self):
         """Return new ScalarStatTracker instance with the same state as self.
@@ -75,7 +77,8 @@ class ScalarStatTracker(object):
         :rtype: ScalarStatTracker
         """
         return ScalarStatTracker(
         :rtype: ScalarStatTracker
         """
         return ScalarStatTracker(
-            self.log_sum_weight, self.average, self.log_variance)
+            self.log_sum_weight, self.average, self.log_variance
+        )
 
     def add(self, scalar_value, log_weight=0.0):
         """Return updated stats corresponding to addition of another sample.
 
     def add(self, scalar_value, log_weight=0.0):
         """Return updated stats corresponding to addition of another sample.
@@ -130,7 +133,6 @@ class ScalarDualStatTracker(ScalarStatTracker):
     One typical use is for Monte Carlo integrator to decide whether
     the partial sums so far are reliable enough.
     """
     One typical use is for Monte Carlo integrator to decide whether
     the partial sums so far are reliable enough.
     """
-
     def __init__(
             self, log_sum_weight=None, average=0.0, log_variance=None,
             log_sum_secondary_weight=None, secondary_average=0.0,
     def __init__(
             self, log_sum_weight=None, average=0.0, log_variance=None,
             log_sum_secondary_weight=None, secondary_average=0.0,
@@ -164,20 +166,23 @@ class ScalarDualStatTracker(ScalarStatTracker):
         # so in case of diamond inheritance mismatch would be probable.
         ScalarStatTracker.__init__(self, log_sum_weight, average, log_variance)
         self.secondary = ScalarStatTracker(
         # so in case of diamond inheritance mismatch would be probable.
         ScalarStatTracker.__init__(self, log_sum_weight, average, log_variance)
         self.secondary = ScalarStatTracker(
-            log_sum_secondary_weight, secondary_average, log_secondary_variance)
+            log_sum_secondary_weight, secondary_average, log_secondary_variance
+        )
         self.max_log_weight = max_log_weight
 
     def __repr__(self):
         self.max_log_weight = max_log_weight
 
     def __repr__(self):
-        """Return string, which interpreted constructs state of self."""
+        """Return string, which interpreted constructs state of self.
+
+        :returns: Expression contructing an equivalent instance.
+        :rtype: str
+        """
         sec = self.secondary
         sec = self.secondary
-        return (
-            "ScalarDualStatTracker(log_sum_weight={lsw!r},average={a!r},"
-            "log_variance={lv!r},log_sum_secondary_weight={lssw!r},"
-            "secondary_average={sa!r},log_secondary_variance={lsv!r},"
-            "max_log_weight={mlw!r})".format(
-                lsw=self.log_sum_weight, a=self.average, lv=self.log_variance,
-                lssw=sec.log_sum_weight, sa=sec.average, lsv=sec.log_variance,
-                mlw=self.max_log_weight))
+        return f"ScalarDualStatTracker(log_sum_weight={self.log_sum_weight!r},"\
+            f"average={self.average!r},log_variance={self.log_variance!r}," \
+            f"log_sum_secondary_weight={sec.log_sum_weight!r}," \
+            f"secondary_average={sec.average!r}," \
+            f"log_secondary_variance={sec.log_variance!r}," \
+            f"max_log_weight={self.max_log_weight!r})"
 
     def add(self, scalar_value, log_weight=0.0):
         """Return updated both stats after addition of another sample.
 
     def add(self, scalar_value, log_weight=0.0):
         """Return updated both stats after addition of another sample.
@@ -201,8 +206,28 @@ class ScalarDualStatTracker(ScalarStatTracker):
         primary.add(scalar_value, log_weight)
         return self
 
         primary.add(scalar_value, log_weight)
         return self
 
+    def get_pessimistic_variance(self):
+        """Return estimate of variance reflecting weight effects.
+
+        Typical scenario is the primary tracker dominated by a single sample.
+        In worse case, secondary tracker is also dominated by
+        a single (but different) sample.
 
 
-class VectorStatTracker(object):
+        Current implementation simply returns variance of average
+        of the two trackers, as if they were independent.
+
+        :returns: Pessimistic estimate of variance (not stdev, no log).
+        :rtype: float
+        """
+        var_primary = safe_exp(self.log_variance)
+        var_secondary = safe_exp(self.secondary.log_variance)
+        var_combined = (var_primary + var_secondary) / 2
+        avg_half_diff = (self.average - self.secondary.average) / 2
+        var_combined += avg_half_diff * avg_half_diff
+        return var_combined
+
+
+class VectorStatTracker:
     """Class for tracking multi-dimensional samples.
 
     Contrary to one-dimensional data, multi-dimensional covariance matrix
     """Class for tracking multi-dimensional samples.
 
     Contrary to one-dimensional data, multi-dimensional covariance matrix
@@ -219,11 +244,11 @@ class VectorStatTracker(object):
     def __init__(
             self, dimension=2, log_sum_weight=None, averages=None,
             covariance_matrix=None):
     def __init__(
             self, dimension=2, log_sum_weight=None, averages=None,
             covariance_matrix=None):
-        """Initialize new tracker instance, two-dimenstional empty by default.
+        """Initialize new tracker instance, two-dimensional empty by default.
 
         If any of latter two arguments is None, it means
         the tracker state is invalid. Use reset method
 
         If any of latter two arguments is None, it means
         the tracker state is invalid. Use reset method
-        to create empty tracker of constructed dimentionality.
+        to create empty tracker of constructed dimensionality.
 
         :param dimension: Number of scalar components of samples.
         :param log_sum_weight: Natural logarithm of sum of weights
 
         :param dimension: Number of scalar components of samples.
         :param log_sum_weight: Natural logarithm of sum of weights
@@ -244,13 +269,13 @@ class VectorStatTracker(object):
     def __repr__(self):
         """Return string, which interpreted constructs state of self.
 
     def __repr__(self):
         """Return string, which interpreted constructs state of self.
 
-        :returns: Expression contructing an equivalent instance.
-        :rtype: str"""
-        return (
-            "VectorStatTracker(dimension={d!r},log_sum_weight={lsw!r},"
-            "averages={a!r},covariance_matrix={cm!r})".format(
-                d=self.dimension, lsw=self.log_sum_weight, a=self.averages,
-                cm=self.covariance_matrix))
+        :returns: Expression constructing an equivalent instance.
+        :rtype: str
+        """
+        return f"VectorStatTracker(dimension={self.dimension!r}," \
+            f"log_sum_weight={self.log_sum_weight!r}," \
+            f"averages={self.averages!r}," \
+            f"covariance_matrix={self.covariance_matrix!r})"
 
     def copy(self):
         """Return new instance with the same state as self.
 
     def copy(self):
         """Return new instance with the same state as self.
@@ -262,8 +287,9 @@ class VectorStatTracker(object):
         :rtype: VectorStatTracker
         """
         return VectorStatTracker(
         :rtype: VectorStatTracker
         """
         return VectorStatTracker(
-            self.dimension, self.log_sum_weight, self.averages,
-            self.covariance_matrix)
+            self.dimension, self.log_sum_weight, self.averages[:],
+            copy.deepcopy(self.covariance_matrix)
+        )
 
     def reset(self):
         """Return state set to empty data of proper dimensionality.
 
     def reset(self):
         """Return state set to empty data of proper dimensionality.
@@ -273,8 +299,9 @@ class VectorStatTracker(object):
         """
         self.averages = [0.0 for _ in range(self.dimension)]
         # TODO: Examine whether we can gain speed by tracking triangle only.
         """
         self.averages = [0.0 for _ in range(self.dimension)]
         # TODO: Examine whether we can gain speed by tracking triangle only.
-        self.covariance_matrix = [[0.0 for _ in range(self.dimension)]
-                                  for _ in range(self.dimension)]
+        self.covariance_matrix = [
+            [0.0 for _ in range(self.dimension)] for _ in range(self.dimension)
+        ]
         # TODO: In Python3, list comprehensions are generators,
         # so they are not indexable. Put list() when converting.
         return self
         # TODO: In Python3, list comprehensions are generators,
         # so they are not indexable. Put list() when converting.
         return self
@@ -288,6 +315,7 @@ class VectorStatTracker(object):
         self.reset()
         for index in range(self.dimension):
             self.covariance_matrix[index][index] = 1.0
         self.reset()
         for index in range(self.dimension):
             self.covariance_matrix[index][index] = 1.0
+        return self
 
     def add_get_shift(self, vector_value, log_weight=0.0):
         """Return shift and update state to addition of another sample.
 
     def add_get_shift(self, vector_value, log_weight=0.0):
         """Return shift and update state to addition of another sample.
@@ -300,17 +328,19 @@ class VectorStatTracker(object):
             Default: 0.0 (as log of 1.0).
         :type vector_value: iterable of float
         :type log_weight: float
             Default: 0.0 (as log of 1.0).
         :type vector_value: iterable of float
         :type log_weight: float
-        :returns: Updated self.
-        :rtype: VectorStatTracker
+        :returns: Shift vector
+        :rtype: list of float
         """
         dimension = self.dimension
         old_log_sum_weight = self.log_sum_weight
         old_averages = self.averages
         if not old_averages:
         """
         dimension = self.dimension
         old_log_sum_weight = self.log_sum_weight
         old_averages = self.averages
         if not old_averages:
-            shift = [0.0 for index in range(dimension)]
+            shift = [0.0 for _ in range(dimension)]
         else:
         else:
-            shift = [vector_value[index] - old_averages[index]
-                     for index in range(dimension)]
+            shift = [
+                vector_value[index] - old_averages[index]
+                for index in range(dimension)
+            ]
         if old_log_sum_weight is None:
             # First sample.
             self.log_sum_weight = log_weight
         if old_log_sum_weight is None:
             # First sample.
             self.log_sum_weight = log_weight
@@ -321,8 +351,10 @@ class VectorStatTracker(object):
         new_log_sum_weight = log_plus(old_log_sum_weight, log_weight)
         data_ratio = math.exp(old_log_sum_weight - new_log_sum_weight)
         sample_ratio = math.exp(log_weight - new_log_sum_weight)
         new_log_sum_weight = log_plus(old_log_sum_weight, log_weight)
         data_ratio = math.exp(old_log_sum_weight - new_log_sum_weight)
         sample_ratio = math.exp(log_weight - new_log_sum_weight)
-        new_averages = [old_averages[index] + shift[index] * sample_ratio
-                        for index in range(dimension)]
+        new_averages = [
+            old_averages[index] + shift[index] * sample_ratio
+            for index in range(dimension)
+        ]
         # It is easier to update covariance matrix in-place.
         for second in range(dimension):
             for first in range(dimension):
         # It is easier to update covariance matrix in-place.
         for second in range(dimension):
             for first in range(dimension):
@@ -344,7 +376,7 @@ class VectorStatTracker(object):
 
         If the weight of the incoming sample is far bigger
         than the weight of all the previous data together,
 
         If the weight of the incoming sample is far bigger
         than the weight of all the previous data together,
-        convariance matrix would suffer from underflows.
+        covariance matrix would suffer from underflow.
         To avoid that, this method manipulates both weights
         before calling add().
 
         To avoid that, this method manipulates both weights
         before calling add().