resources/libraries/python/MLRsearch/multiple_loss_ratio_search.py

   1 # Copyright (c) 2023 Cisco and/or its affiliates.
   2 # Licensed under the Apache License, Version 2.0 (the "License");
   3 # you may not use this file except in compliance with the License.
   4 # You may obtain a copy of the License at:
   5 #
   6 #     http://www.apache.org/licenses/LICENSE-2.0
   7 #
   8 # Unless required by applicable law or agreed to in writing, software
   9 # distributed under the License is distributed on an "AS IS" BASIS,
  10 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  11 # See the License for the specific language governing permissions and
  12 # limitations under the License.
  13
  14 """Module defining MultipleLossRatioSearch class."""
  15
  16 import logging
  17 import time
  18
  19 from dataclasses import dataclass
  20 from typing import Callable, Optional, Tuple
  21
  22 from .candidate import Candidate
  23 from .config import Config
  24 from .dataclass import secondary_field
  25 from .discrete_load import DiscreteLoad
  26 from .discrete_result import DiscreteResult
  27 from .expander import GlobalWidth
  28 from .goal_result import GoalResult
  29 from .limit_handler import LimitHandler
  30 from .load_rounding import LoadRounding
  31 from .measurement_database import MeasurementDatabase
  32 from .pep3140 import Pep3140Dict
  33 from .search_goal import SearchGoal
  34 from .selector import Selector
  35 from .target_scaling import TargetScaling
  36 from .trial_measurement import AbstractMeasurer
  37
  38
  39 @dataclass
  40 class MultipleLossRatioSearch:
  41     """Implementation of the controller part of MLRsearch algorithm.
  42
  43     The manager part is creating and calling this,
  44     the measurer part is injected.
  45
  46     Traditional binary search algorithm needs initial interval
  47     (lower and upper bound), and returns final narrow bounds
  48     (related to its search goal) after bisecting
  49     (until some exit condition is met).
  50     The exit condition is usually related to the interval width,
  51     (upper bound value minus lower bound value).
  52
  53     The optimized algorithm in this class contains several improvements
  54     aimed to reduce overall search time.
  55
  56     One improvement is searching for bounds for multiple search goals at once.
  57     Specifically, the trial measurement results influence bounds for all goals,
  58     even though the selection of trial inputs for next measurement
  59     focuses only on one goal. The focus can switch between goals frequently.
  60
  61     Next improvement is that results of trial measurements
  62     with small trial duration can be used to find a reasonable starting interval
  63     for full trial duration search.
  64     This results in more trials performed, but smaller overall duration
  65     in general.
  66     Internally, such shorter trials come from "preceding targets",
  67     handled in a same way as a search goal "final target".
  68     Related improvement is that the "current" interval does not need to be valid
  69     (e.g. one of the bounds is missing).
  70     In that case, this algorithm will move and expand the interval,
  71     in a process called external search. Only when both bounds are found,
  72     the interval bisection (called internal search) starts making it narrow.
  73
  74     Next improvement is bisecting in logarithmic quantities,
  75     so that target relative width is independent of measurement units.
  76
  77     Next improvement is basing the initial interval on forwarding rates
  78     of few initial measurements, starting at max load and using forwarding rates
  79     seen so far.
  80
  81     Next improvement is to allow the use of multiple shorter trials
  82     instead one big trial, allowing a percentage of trials
  83     to exceed the loss ratio target.
  84     This makes the result more stable in practice.
  85     Conservative behavior (single long trial, zero exceed ratio)
  86     is still available using corresponding goal definitions.
  87
  88     Final improvement is exiting early if the minimal load
  89     is not a valid lower bound (at final duration)
  90     and also exiting if the overall search duration is too long.
  91
  92     There are also subtle optimizations related to candidate selection
  93     and uneven splitting of intervals, too numerous to list here.
  94
  95     The return values describe performance at the relevant lower bound
  96     as "conditional throughput", which is based on loss ratio of one of trials
  97     selected as a quantile based on exceed ratio parameter.
  98     Usually this value may be quite pessimistic, as MLRsearch stops
  99     measuring a load as soon as it becomes a lower bound,
 100     so conditional throughput is usually based on forwarding rate
 101     of the worst on the good long trials.
 102     """
 103
 104     config: Config
 105     """Arguments required at construction time."""
 106     # End of fields required at intance creation.
 107     measurer: AbstractMeasurer = secondary_field()
 108     """Measurer to use, set at calling search()."""
 109     debug: Callable[[str], None] = secondary_field()
 110     """Object to call for logging, None means logging.debug."""
 111     # Fields below are computed from data above
 112     rounding: LoadRounding = secondary_field()
 113     """Derived from goals. Instance to use for intended load rounding."""
 114     from_float: Callable[[float], DiscreteLoad] = secondary_field()
 115     """Conversion method from float [tps] intended load values."""
 116     limit_handler: LimitHandler = secondary_field()
 117     """Load post-processing utility based on config and rounding."""
 118     scaling: TargetScaling = secondary_field()
 119     """Utility for creating target chains for search goals."""
 120     database: MeasurementDatabase = secondary_field()
 121     """Storage for (stats of) measurement results so far."""
 122     stop_time: float = secondary_field()
 123     """Monotonic time value at which the search should end with failure."""
 124
 125     def search(
 126         self,
 127         measurer: AbstractMeasurer,
 128         debug: Optional[Callable[[str], None]] = None,
 129     ) -> Pep3140Dict[SearchGoal, GoalResult]:
 130         """Perform initial trials, create state object, proceed with main loop.
 131
 132         Stateful arguments (measurer and debug) are stored.
 133         Derived objects are constructed from config.
 134
 135         :param measurer: Measurement provider to use by this search object.
 136         :param debug: Callable to optionally use instead of logging.debug().
 137         :type measurer: AbstractMeasurer
 138         :type debug: Optional[Callable[[str], None]]
 139         :returns: Structure containing conditional throughputs and other stats,
 140             one for each search goal. If a value is None it means there is
 141             no lower bound (min load turned out to be an upper bound).
 142         :rtype: Pep3140Dict[SearchGoal, GoalResult]
 143         :raises RuntimeError: If total duration is larger than timeout,
 144             or if min load becomes an upper bound for a search goal
 145             that has fail fast true.
 146         """
 147         self.measurer = measurer
 148         self.debug = logging.debug if debug is None else debug
 149         self.rounding = LoadRounding(
 150             min_load=self.config.min_load,
 151             max_load=self.config.max_load,
 152             float_goals=[goal.relative_width for goal in self.config.goals],
 153         )
 154         self.from_float = DiscreteLoad.float_conver(rounding=self.rounding)
 155         self.limit_handler = LimitHandler(
 156             rounding=self.rounding,
 157             debug=self.debug,
 158         )
 159         self.scaling = TargetScaling(
 160             goals=self.config.goals,
 161             rounding=self.rounding,
 162         )
 163         self.database = MeasurementDatabase(self.scaling.targets)
 164         self.stop_time = time.monotonic() + self.config.search_duration_max
 165         result0, result1 = self.run_initial_trials()
 166         self.main_loop(result0.discrete_load, result1.discrete_load)
 167         ret_dict = Pep3140Dict()
 168         for goal in self.config.goals:
 169             target = self.scaling.goal_to_final_target[goal]
 170             bounds = self.database.get_relevant_bounds(target=target)
 171             ret_dict[goal] = GoalResult.from_bounds(bounds=bounds)
 172         return ret_dict
 173
 174     def measure(self, duration: float, load: DiscreteLoad) -> DiscreteResult:
 175         """Call measurer and put the result to appropriate form in database.
 176
 177         Also check the argument types and load roundness,
 178         and return the result to the caller.
 179
 180         :param duration: Intended duration for the trial measurement.
 181         :param load: Intended load for the trial measurement:
 182         :type duration: float
 183         :type load: DiscreteLoad
 184         :returns: The trial results.
 185         :rtype: DiscreteResult
 186         :raises RuntimeError: If an argument doed not have the required type.
 187         """
 188         if not isinstance(duration, float):
 189             raise RuntimeError(f"Duration has to be float: {duration!r}")
 190         if not isinstance(load, DiscreteLoad):
 191             raise RuntimeError(f"Load has to be discrete: {load!r}")
 192         if not load.is_round:
 193             raise RuntimeError(f"Told to measure unrounded: {load!r}")
 194         self.debug(f"Measuring at d={duration},il={int(load)}")
 195         result = self.measurer.measure(
 196             intended_duration=duration,
 197             intended_load=float(load),
 198         )
 199         self.debug(f"Measured lr={result.loss_ratio}")
 200         result = DiscreteResult.with_load(result=result, load=load)
 201         self.database.add(result)
 202         return result
 203
 204     def run_initial_trials(self) -> Tuple[DiscreteResult, DiscreteResult]:
 205         """Perform trials to get enough data to start the selectors.
 206
 207         Measurements are done with all initial targets in mind,
 208         based on smallest target loss ratio, largest initial trial duration,
 209         and largest initial target width.
 210
 211         Forwarding rate is used as a hint for next intended load.
 212         The relative quantity is used, as load can use different units.
 213         When the smallest target loss ratio is non-zero, a correction is needed
 214         (forwarding rate is only a good hint for zero loss ratio load).
 215         The correction is conservative (all increase in load turns to losses).
 216
 217         Also, warmup trial (if configured) is performed,
 218         all other trials are added to the database.
 219
 220         This could return the initial width, but from implementation perspective
 221         it is easier to return two measurements (or the same one twice) here
 222         and compute width later. The "one value twice" happens when max load
 223         has small loss, or when min load has big loss.
 224
 225         :returns: Two last measured values, in any order. Or one value twice.
 226         :rtype: Tuple[DiscreteResult, DiscreteResult]
 227         """
 228         max_load = self.limit_handler.max_load
 229         ratio, duration, width = None, None, None
 230         for target in self.scaling.targets:
 231             if target.preceding:
 232                 continue
 233             if ratio is None or ratio > target.loss_ratio:
 234                 ratio = target.loss_ratio
 235             if not duration or duration < target.trial_duration:
 236                 duration = target.trial_duration
 237             if not width or width < target.discrete_width:
 238                 width = target.discrete_width
 239         self.debug(f"Init ratio {ratio} duration {duration} width {width}")
 240         if self.config.warmup_duration:
 241             self.debug("Warmup trial.")
 242             self.measure(self.config.warmup_duration, max_load)
 243             # Warmup should not affect the real results, reset the database.
 244             self.database = MeasurementDatabase(self.scaling.targets)
 245         self.debug(f"First trial at max rate: {max_load}")
 246         result0 = self.measure(duration, max_load)
 247         rfr = result0.relative_forwarding_rate
 248         corrected_rfr = (self.from_float(rfr) / (1.0 - ratio)).rounded_down()
 249         if corrected_rfr >= max_load:
 250             self.debug("Small loss, no other initial trials are needed.")
 251             return result0, result0
 252         mrr = self.limit_handler.handle(corrected_rfr, width, None, max_load)
 253         self.debug(f"Second trial at (corrected) mrr: {mrr}")
 254         result1 = self.measure(duration, mrr)
 255         # Attempt to get narrower width.
 256         result_ratio = result1.loss_ratio
 257         if result_ratio > ratio:
 258             rfr2 = result1.relative_forwarding_rate
 259             crfr2 = (self.from_float(rfr2) / (1.0 - ratio)).rounded_down()
 260             mrr2 = self.limit_handler.handle(crfr2, width, None, mrr)
 261         else:
 262             mrr2 = mrr + width
 263             mrr2 = self.limit_handler.handle(mrr2, width, mrr, max_load)
 264         if not mrr2:
 265             self.debug("Close enough, measuring at mrr2 is not needed.")
 266             return result1, result1
 267         self.debug(f"Third trial at (corrected) mrr2: {mrr2}")
 268         result2 = self.measure(duration, mrr2)
 269         return result1, result2
 270
 271     def main_loop(self, load0: DiscreteLoad, load1: DiscreteLoad) -> None:
 272         """Initialize selectors and keep measuring the winning candidate.
 273
 274         Selectors are created, the two input loads are useful starting points.
 275
 276         The search ends when no selector nominates any candidate,
 277         or if the search takes too long (or if a selector raises).
 278
 279         Winner is selected according to ordering defined in Candidate class.
 280         In case of a tie, selectors for earlier goals are preferred.
 281
 282         As a selector is only allowed to update current width as the winner,
 283         the update is done here explicitly.
 284
 285         :param load0: Discrete load of one of results from run_initial_trials.
 286         :param load1: Discrete load of other of results from run_initial_trials.
 287         :type load0: DiscreteLoad
 288         :type load1: DiscreteLoad
 289         :raises RuntimeError: If the search takes too long,
 290             or if min load becomes an upper bound for any search goal
 291         """
 292         if load1 < load0:
 293             load0, load1 = load1, load0
 294         global_width = GlobalWidth.from_loads(load0, load1)
 295         selectors = []
 296         for target in self.scaling.goal_to_final_target.values():
 297             selector = Selector(
 298                 final_target=target,
 299                 global_width=global_width,
 300                 initial_lower_load=load0,
 301                 initial_upper_load=load1,
 302                 database=self.database,
 303                 handler=self.limit_handler,
 304                 debug=self.debug,
 305             )
 306             selectors.append(selector)
 307         while time.monotonic() < self.stop_time:
 308             winner = Candidate()
 309             for selector in selectors:
 310                 # Order of arguments is important
 311                 # when two targets nominate the same candidate.
 312                 winner = min(Candidate.nomination_from(selector), winner)
 313             if not winner:
 314                 break
 315             # We do not check duration versus stop_time here,
 316             # as some measurers can be unpredictably faster
 317             # than their intended duration suggests.
 318             self.measure(duration=winner.duration, load=winner.load)
 319             # Delayed updates.
 320             if winner.width:
 321                 global_width.width = winner.width
 322             winner.won()
 323         else:
 324             raise RuntimeError("Optimized search takes too long.")
 325         self.debug("Search done.")