resources/libraries/python/MLRsearch/target_stat.py

   1 # Copyright (c) 2023 Cisco and/or its affiliates.
   2 # Licensed under the Apache License, Version 2.0 (the "License");
   3 # you may not use this file except in compliance with the License.
   4 # You may obtain a copy of the License at:
   5 #
   6 #     http://www.apache.org/licenses/LICENSE-2.0
   7 #
   8 # Unless required by applicable law or agreed to in writing, software
   9 # distributed under the License is distributed on an "AS IS" BASIS,
  10 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  11 # See the License for the specific language governing permissions and
  12 # limitations under the License.
  13
  14 """Module defining LoadStat class."""
  15
  16 from dataclasses import dataclass, field
  17 from typing import Dict, Tuple
  18
  19 from .target_spec import TargetSpec
  20 from .discrete_result import DiscreteResult
  21
  22
  23 @dataclass
  24 class TargetStat:
  25     """Class for aggregating trial results for a single load and target.
  26
  27     Reference to the target is included for convenience.
  28
  29     The main usage is for load classification, done in estimates method.
  30     If both estimates agree, the load is classified as either a lower bound
  31     or an upper bound. For additional logic for dealing with loss inversion
  32     see MeasurementDatabase.
  33
  34     Also, data needed for conditional throughput is gathered here,
  35     exposed only as a pessimistic loss ratio
  36     (as the load value is not stored here).
  37     """
  38
  39     target: TargetSpec = field(repr=False)
  40     """The target for which this instance is aggregating results."""
  41     good_long: float = 0.0
  42     """Sum of durations of long enough trials satisfying target loss ratio."""
  43     bad_long: float = 0.0
  44     """Sum of durations of long trials not satisfying target loss ratio."""
  45     good_short: float = 0.0
  46     """Sum of durations of shorter trials satisfying target loss ratio."""
  47     bad_short: float = 0.0
  48     """Sum of durations of shorter trials not satisfying target loss ratio."""
  49     long_losses: Dict[float, float] = field(repr=False, default_factory=dict)
  50     """If a loss ratio value occured in a long trial, map it to duration sum."""
  51
  52     def __str__(self) -> str:
  53         """Convert into a short human-readable string.
  54
  55         :returns: The short string.
  56         :rtype: str
  57         """
  58         return (
  59             f"gl={self.good_long},bl={self.bad_long}"
  60             f",gs={self.good_short},bs={self.bad_short}"
  61         )
  62
  63     def add(self, result: DiscreteResult) -> None:
  64         """Take into account one more trial result.
  65
  66         Use intended duration for deciding between long and short trials,
  67         but use offered duation (with overheads) to increase the duration sums.
  68
  69         :param result: The trial result to add to the stats.
  70         :type result: DiscreteResult
  71         """
  72         dwo = result.duration_with_overheads
  73         rlr = result.loss_ratio
  74         if result.intended_duration >= self.target.trial_duration:
  75             if rlr not in self.long_losses:
  76                 self.long_losses[rlr] = 0.0
  77                 self.long_losses = dict(sorted(self.long_losses.items()))
  78             self.long_losses[rlr] += dwo
  79             if rlr > self.target.loss_ratio:
  80                 self.bad_long += dwo
  81             else:
  82                 self.good_long += dwo
  83         else:
  84             if rlr > self.target.loss_ratio:
  85                 self.bad_short += dwo
  86             else:
  87                 self.good_short += dwo
  88
  89     def estimates(self) -> Tuple[bool, bool]:
  90         """Return whether this load can become a lower bound.
  91
  92         This returns two estimates, hence the weird nonverb name of this method.
  93         One estimate assumes all following results will satisfy the loss ratio,
  94         the other assumes all results will not satisfy the loss ratio.
  95         The sum of durations of the assumed results
  96         is the minimum to reach target duration sum, or zero if already reached.
  97
  98         If both estimates are the same, it means the load is a definite bound.
  99         This may happen even when the sum of durations of already
 100         measured trials is less than the target, when the missing measurements
 101         cannot change the classification.
 102
 103         :returns: Tuple of two estimates whether the load can be a lower bound.
 104             (True, False) means more trial results are needed.
 105         :rtype: Tuple[bool, bool]
 106         """
 107         coeff = self.target.exceed_ratio
 108         decrease = self.good_short * coeff / (1.0 - coeff)
 109         short_excess = self.bad_short - decrease
 110         effective_excess = self.bad_long + max(0.0, short_excess)
 111         effective_dursum = max(
 112             self.good_long + effective_excess,
 113             self.target.duration_sum,
 114         )
 115         limit_dursum = effective_dursum * self.target.exceed_ratio
 116         optimistic = effective_excess <= limit_dursum
 117         pessimistic = (effective_dursum - self.good_long) <= limit_dursum
 118         return optimistic, pessimistic
 119
 120     @property
 121     def pessimistic_loss_ratio(self) -> float:
 122         """Return the loss ratio for conditional throughput computation.
 123
 124         It adds missing dursum as full-loss trials to long_losses
 125         and returns a quantile corresponding to exceed ratio.
 126         In case of tie (as in median for even number of samples),
 127         this returns the lower value (as being equal to goal exceed ratio
 128         is allowed).
 129
 130         For loads classified as a lower bound, the return value
 131         ends up being no larger than the target loss ratio.
 132         This is because the excess short bad trials would only come
 133         after the quantile in question (as would full-loss missing trials).
 134         For other loads, anything can happen, but conditional throughput
 135         should not be computed for those anyway.
 136         Those two facts allow the logic here be simpler than in estimates().
 137
 138         :returns: Effective loss ratio based on long trial results.
 139         :rtype: float
 140         """
 141         all_long = max(self.target.duration_sum, self.good_long + self.bad_long)
 142         remaining = all_long * (1.0 - self.target.exceed_ratio)
 143         ret = None
 144         for ratio, dursum in self.long_losses.items():
 145             if ret is None or remaining > 0.0:
 146                 ret = ratio
 147                 remaining -= dursum
 148             else:
 149                 break
 150         else:
 151             if remaining > 0.0:
 152                 ret = 1.0
 153         return ret