1 # Copyright (c) 2023 Cisco and/or its affiliates.
2 # Licensed under the Apache License, Version 2.0 (the "License");
3 # you may not use this file except in compliance with the License.
4 # You may obtain a copy of the License at:
6 # http://www.apache.org/licenses/LICENSE-2.0
8 # Unless required by applicable law or agreed to in writing, software
9 # distributed under the License is distributed on an "AS IS" BASIS,
10 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 # See the License for the specific language governing permissions and
12 # limitations under the License.
14 """Module defining MultipleLossRatioSearch class."""
19 from dataclasses import dataclass
20 from typing import Callable, Optional, Tuple
22 from .candidate import Candidate
23 from .config import Config
24 from .dataclass import secondary_field
25 from .discrete_load import DiscreteLoad
26 from .discrete_result import DiscreteResult
27 from .expander import GlobalWidth
28 from .goal_result import GoalResult
29 from .limit_handler import LimitHandler
30 from .load_rounding import LoadRounding
31 from .measurement_database import MeasurementDatabase
32 from .pep3140 import Pep3140Dict
33 from .search_goal import SearchGoal
34 from .selector import Selector
35 from .target_scaling import TargetScaling
36 from .trial_measurement import AbstractMeasurer
40 class MultipleLossRatioSearch:
41 """Implementation of the controller part of MLRsearch algorithm.
43 The manager part is creating and calling this,
44 the measurer part is injected.
46 Traditional binary search algorithm needs initial interval
47 (lower and upper bound), and returns final narrow bounds
48 (related to its search goal) after bisecting
49 (until some exit condition is met).
50 The exit condition is usually related to the interval width,
51 (upper bound value minus lower bound value).
53 The optimized algorithm in this class contains several improvements
54 aimed to reduce overall search time.
56 One improvement is searching for bounds for multiple search goals at once.
57 Specifically, the trial measurement results influence bounds for all goals,
58 even though the selection of trial inputs for next measurement
59 focuses only on one goal. The focus can switch between goals frequently.
61 Next improvement is that results of trial measurements
62 with small trial duration can be used to find a reasonable starting interval
63 for full trial duration search.
64 This results in more trials performed, but smaller overall duration
66 Internally, such shorter trials come from "preceding targets",
67 handled in a same way as a search goal "final target".
68 Related improvement is that the "current" interval does not need to be valid
69 (e.g. one of the bounds is missing).
70 In that case, this algorithm will move and expand the interval,
71 in a process called external search. Only when both bounds are found,
72 the interval bisection (called internal search) starts making it narrow.
74 Next improvement is bisecting in logarithmic quantities,
75 so that target relative width is independent of measurement units.
77 Next improvement is basing the initial interval on forwarding rates
78 of few initial measurements, starting at max load and using forwarding rates
81 Next improvement is to allow the use of multiple shorter trials
82 instead one big trial, allowing a percentage of trials
83 to exceed the loss ratio target.
84 This makes the result more stable in practice.
85 Conservative behavior (single long trial, zero exceed ratio)
86 is still available using corresponding goal definitions.
88 Final improvement is exiting early if the minimal load
89 is not a valid lower bound (at final duration)
90 and also exiting if the overall search duration is too long.
92 There are also subtle optimizations related to candidate selection
93 and uneven splitting of intervals, too numerous to list here.
95 The return values describe performance at the relevant lower bound
96 as "conditional throughput", which is based on loss ratio of one of trials
97 selected as a quantile based on exceed ratio parameter.
98 Usually this value may be quite pessimistic, as MLRsearch stops
99 measuring a load as soon as it becomes a lower bound,
100 so conditional throughput is usually based on forwarding rate
101 of the worst on the good long trials.
105 """Arguments required at construction time."""
106 # End of fields required at intance creation.
107 measurer: AbstractMeasurer = secondary_field()
108 """Measurer to use, set at calling search()."""
109 debug: Callable[[str], None] = secondary_field()
110 """Object to call for logging, None means logging.debug."""
111 # Fields below are computed from data above
112 rounding: LoadRounding = secondary_field()
113 """Derived from goals. Instance to use for intended load rounding."""
114 from_float: Callable[[float], DiscreteLoad] = secondary_field()
115 """Conversion method from float [tps] intended load values."""
116 limit_handler: LimitHandler = secondary_field()
117 """Load post-processing utility based on config and rounding."""
118 scaling: TargetScaling = secondary_field()
119 """Utility for creating target chains for search goals."""
120 database: MeasurementDatabase = secondary_field()
121 """Storage for (stats of) measurement results so far."""
122 stop_time: float = secondary_field()
123 """Monotonic time value at which the search should end with failure."""
127 measurer: AbstractMeasurer,
128 debug: Optional[Callable[[str], None]] = None,
129 ) -> Pep3140Dict[SearchGoal, GoalResult]:
130 """Perform initial trials, create state object, proceed with main loop.
132 Stateful arguments (measurer and debug) are stored.
133 Derived objects are constructed from config.
135 :param measurer: Measurement provider to use by this search object.
136 :param debug: Callable to optionally use instead of logging.debug().
137 :type measurer: AbstractMeasurer
138 :type debug: Optional[Callable[[str], None]]
139 :returns: Structure containing conditional throughputs and other stats,
140 one for each search goal. If a value is None it means there is
141 no lower bound (min load turned out to be an upper bound).
142 :rtype: Pep3140Dict[SearchGoal, GoalResult]
143 :raises RuntimeError: If total duration is larger than timeout,
144 or if min load becomes an upper bound for a search goal
145 that has fail fast true.
147 self.measurer = measurer
148 self.debug = logging.debug if debug is None else debug
149 self.rounding = LoadRounding(
150 min_load=self.config.min_load,
151 max_load=self.config.max_load,
152 float_goals=[goal.relative_width for goal in self.config.goals],
154 self.from_float = DiscreteLoad.float_conver(rounding=self.rounding)
155 self.limit_handler = LimitHandler(
156 rounding=self.rounding,
159 self.scaling = TargetScaling(
160 goals=self.config.goals,
161 rounding=self.rounding,
163 self.database = MeasurementDatabase(self.scaling.targets)
164 self.stop_time = time.monotonic() + self.config.search_duration_max
165 result0, result1 = self.run_initial_trials()
166 self.main_loop(result0.discrete_load, result1.discrete_load)
167 ret_dict = Pep3140Dict()
168 for goal in self.config.goals:
169 target = self.scaling.goal_to_final_target[goal]
170 bounds = self.database.get_relevant_bounds(target=target)
171 ret_dict[goal] = GoalResult.from_bounds(bounds=bounds)
174 def measure(self, duration: float, load: DiscreteLoad) -> DiscreteResult:
175 """Call measurer and put the result to appropriate form in database.
177 Also check the argument types and load roundness,
178 and return the result to the caller.
180 :param duration: Intended duration for the trial measurement.
181 :param load: Intended load for the trial measurement:
182 :type duration: float
183 :type load: DiscreteLoad
184 :returns: The trial results.
185 :rtype: DiscreteResult
186 :raises RuntimeError: If an argument doed not have the required type.
188 if not isinstance(duration, float):
189 raise RuntimeError(f"Duration has to be float: {duration!r}")
190 if not isinstance(load, DiscreteLoad):
191 raise RuntimeError(f"Load has to be discrete: {load!r}")
192 if not load.is_round:
193 raise RuntimeError(f"Told to measure unrounded: {load!r}")
194 self.debug(f"Measuring at d={duration},il={int(load)}")
195 result = self.measurer.measure(
196 intended_duration=duration,
197 intended_load=float(load),
199 self.debug(f"Measured lr={result.loss_ratio}")
200 result = DiscreteResult.with_load(result=result, load=load)
201 self.database.add(result)
204 def run_initial_trials(self) -> Tuple[DiscreteResult, DiscreteResult]:
205 """Perform trials to get enough data to start the selectors.
207 Measurements are done with all initial targets in mind,
208 based on smallest target loss ratio, largest initial trial duration,
209 and largest initial target width.
211 Forwarding rate is used as a hint for next intended load.
212 The relative quantity is used, as load can use different units.
213 When the smallest target loss ratio is non-zero, a correction is needed
214 (forwarding rate is only a good hint for zero loss ratio load).
215 The correction is conservative (all increase in load turns to losses).
217 Also, warmup trial (if configured) is performed,
218 all other trials are added to the database.
220 This could return the initial width, but from implementation perspective
221 it is easier to return two measurements (or the same one twice) here
222 and compute width later. The "one value twice" happens when max load
223 has small loss, or when min load has big loss.
225 :returns: Two last measured values, in any order. Or one value twice.
226 :rtype: Tuple[DiscreteResult, DiscreteResult]
228 max_load = self.limit_handler.max_load
229 ratio, duration, width = None, None, None
230 for target in self.scaling.targets:
233 if ratio is None or ratio > target.loss_ratio:
234 ratio = target.loss_ratio
235 if not duration or duration < target.trial_duration:
236 duration = target.trial_duration
237 if not width or width < target.discrete_width:
238 width = target.discrete_width
239 self.debug(f"Init ratio {ratio} duration {duration} width {width}")
240 if self.config.warmup_duration:
241 self.debug("Warmup trial.")
242 self.measure(self.config.warmup_duration, max_load)
243 # Warmup should not affect the real results, reset the database.
244 self.database = MeasurementDatabase(self.scaling.targets)
245 self.debug(f"First trial at max rate: {max_load}")
246 result0 = self.measure(duration, max_load)
247 rfr = result0.relative_forwarding_rate
248 corrected_rfr = (self.from_float(rfr) / (1.0 - ratio)).rounded_down()
249 if corrected_rfr >= max_load:
250 self.debug("Small loss, no other initial trials are needed.")
251 return result0, result0
252 mrr = self.limit_handler.handle(corrected_rfr, width, None, max_load)
253 self.debug(f"Second trial at (corrected) mrr: {mrr}")
254 result1 = self.measure(duration, mrr)
255 # Attempt to get narrower width.
256 result_ratio = result1.loss_ratio
257 if result_ratio > ratio:
258 rfr2 = result1.relative_forwarding_rate
259 crfr2 = (self.from_float(rfr2) / (1.0 - ratio)).rounded_down()
260 mrr2 = self.limit_handler.handle(crfr2, width, None, mrr)
263 mrr2 = self.limit_handler.handle(mrr2, width, mrr, max_load)
265 self.debug("Close enough, measuring at mrr2 is not needed.")
266 return result1, result1
267 self.debug(f"Third trial at (corrected) mrr2: {mrr2}")
268 result2 = self.measure(duration, mrr2)
269 return result1, result2
271 def main_loop(self, load0: DiscreteLoad, load1: DiscreteLoad) -> None:
272 """Initialize selectors and keep measuring the winning candidate.
274 Selectors are created, the two input loads are useful starting points.
276 The search ends when no selector nominates any candidate,
277 or if the search takes too long (or if a selector raises).
279 Winner is selected according to ordering defined in Candidate class.
280 In case of a tie, selectors for earlier goals are preferred.
282 As a selector is only allowed to update current width as the winner,
283 the update is done here explicitly.
285 :param load0: Discrete load of one of results from run_initial_trials.
286 :param load1: Discrete load of other of results from run_initial_trials.
287 :type load0: DiscreteLoad
288 :type load1: DiscreteLoad
289 :raises RuntimeError: If the search takes too long,
290 or if min load becomes an upper bound for any search goal
293 load0, load1 = load1, load0
294 global_width = GlobalWidth.from_loads(load0, load1)
296 for target in self.scaling.goal_to_final_target.values():
299 global_width=global_width,
300 initial_lower_load=load0,
301 initial_upper_load=load1,
302 database=self.database,
303 handler=self.limit_handler,
306 selectors.append(selector)
307 while time.monotonic() < self.stop_time:
309 for selector in selectors:
310 # Order of arguments is important
311 # when two targets nominate the same candidate.
312 winner = min(Candidate.nomination_from(selector), winner)
315 # We do not check duration versus stop_time here,
316 # as some measurers can be unpredictably faster
317 # than their intended duration suggests.
318 self.measure(duration=winner.duration, load=winner.load)
321 global_width.width = winner.width
324 raise RuntimeError("Optimized search takes too long.")
325 self.debug("Search done.")