CSIT-1178: Prepare for bursty MRR
[csit.git] / resources / libraries / python / search / MultipleLossRatioSearch.py
1 # Copyright (c) 2018 Cisco and/or its affiliates.
2 # Licensed under the Apache License, Version 2.0 (the "License");
3 # you may not use this file except in compliance with the License.
4 # You may obtain a copy of the License at:
5 #
6 #     http://www.apache.org/licenses/LICENSE-2.0
7 #
8 # Unless required by applicable law or agreed to in writing, software
9 # distributed under the License is distributed on an "AS IS" BASIS,
10 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 # See the License for the specific language governing permissions and
12 # limitations under the License.
13
14 """Module defining MultipleLossRatioSearch class."""
15
16 import logging
17 import math
18 import time
19
20 from AbstractSearchAlgorithm import AbstractSearchAlgorithm
21 from NdrPdrResult import NdrPdrResult
22 from ReceiveRateInterval import ReceiveRateInterval
23
24
25 class MultipleLossRatioSearch(AbstractSearchAlgorithm):
26     """Optimized binary search algorithm for finding NDR and PDR bounds.
27
28     Traditional binary search algorithm needs initial interval
29     (lower and upper bound), and returns final interval after bisecting
30     (until some exit condition is met).
31     The exit condition is usually related to the interval width,
32     (upper bound value minus lower bound value).
33
34     The optimized algorithm contains several improvements
35     aimed to reduce overall search time.
36
37     One improvement is searching for two intervals at once.
38     The intervals are for NDR (No Drop Rate) and PDR (Partial Drop Rate).
39
40     Next improvement is that the initial interval does not need to be valid.
41     Imagine initial interval (10, 11) where 11 is smaller
42     than the searched value.
43     The algorithm will try (11, 13) interval next, and if 13 is still smaller,
44     (13, 17) and so on, doubling width until the upper bound is valid.
45     The part when interval expands is called external search,
46     the part when interval is bisected is called internal search.
47
48     Next improvement is that trial measurements at small trial duration
49     can be used to find a reasonable interval for full trial duration search.
50     This results in more trials performed, but smaller overall duration
51     in general.
52
53     Next improvement is bisecting in logarithmic quantities,
54     so that exit criteria can be independent of measurement units.
55
56     Next improvement is basing the initial interval on receive rates.
57
58     Final improvement is exiting early if the minimal value
59     is not a valid lower bound.
60
61     The complete search consist of several phases,
62     each phase performing several trial measurements.
63     Initial phase creates initial interval based on receive rates
64     at maximum rate and at maximum receive rate (MRR).
65     Final phase and preceding intermediate phases are performing
66     external and internal search steps,
67     each resulting interval is the starting point for the next phase.
68     The resulting interval of final phase is the result of the whole algorithm.
69
70     Each non-initial phase uses its own trial duration and width goal.
71     Any non-initial phase stops searching (for NDR or PDR independently)
72     when minimum is not a valid lower bound (at current duration),
73     or all of the following is true:
74     Both bounds are valid, bound bounds are measured at the current phase
75     trial duration, interval width is less than the width goal
76     for current phase.
77
78     TODO: Review and update this docstring according to rst docs.
79     TODO: Support configurable number of Packet Loss Ratios.
80     """
81
82     class ProgressState(object):
83         """Structure containing data to be passed around in recursion."""
84
85         def __init__(
86                 self, result, phases, duration, width_goal, packet_loss_ratio,
87                 minimum_transmit_rate, maximum_transmit_rate):
88             """Convert and store the argument values.
89
90             :param result: Current measured NDR and PDR intervals.
91             :param phases: How many intermediate phases to perform
92                 before the current one.
93             :param duration: Trial duration to use in the current phase [s].
94             :param width_goal: The goal relative width for the curreent phase.
95             :param packet_loss_ratio: PDR fraction for the current search.
96             :param minimum_transmit_rate: Minimum target transmit rate
97                 for the current search [pps].
98             :param maximum_transmit_rate: Maximum target transmit rate
99                 for the current search [pps].
100             :type result: NdrPdrResult
101             :type phases: int
102             :type duration: float
103             :type width_goal: float
104             :type packet_loss_ratio: float
105             :type minimum_transmit_rate: float
106             :type maximum_transmit_rate: float
107             """
108             self.result = result
109             self.phases = int(phases)
110             self.duration = float(duration)
111             self.width_goal = float(width_goal)
112             self.packet_loss_ratio = float(packet_loss_ratio)
113             self.minimum_transmit_rate = float(minimum_transmit_rate)
114             self.maximum_transmit_rate = float(maximum_transmit_rate)
115
116     def __init__(self, measurer, final_relative_width=0.005,
117                  final_trial_duration=30.0, initial_trial_duration=1.0,
118                  number_of_intermediate_phases=2, timeout=600.0):
119         """Store the measurer object and additional arguments.
120
121         :param measurer: Rate provider to use by this search object.
122         :param final_relative_width: Final lower bound transmit rate
123             cannot be more distant that this multiple of upper bound [1].
124         :param final_trial_duration: Trial duration for the final phase [s].
125         :param initial_trial_duration: Trial duration for the initial phase
126             and also for the first intermediate phase [s].
127         :param number_of_intermediate_phases: Number of intermediate phases
128             to perform before the final phase [1].
129         :param timeout: The search will fail itself when not finished
130             before this overall time [s].
131         :type measurer: AbstractMeasurer
132         :type final_relative_width: float
133         :type final_trial_duration: float
134         :type initial_trial_duration: int
135         :type number_of_intermediate_phases: int
136         :type timeout: float
137         """
138         super(MultipleLossRatioSearch, self).__init__(measurer)
139         self.final_trial_duration = float(final_trial_duration)
140         self.final_relative_width = float(final_relative_width)
141         self.number_of_intermediate_phases = int(number_of_intermediate_phases)
142         self.initial_trial_duration = float(initial_trial_duration)
143         self.timeout = float(timeout)
144
145
146     @staticmethod
147     def double_relative_width(relative_width):
148         """Return relative width corresponding to double logarithmic width.
149
150         :param relative_width: The base relative width to double.
151         :type relative_width: float
152         :returns: The relative width of double logarithmic size.
153         :rtype: float
154         """
155         return 1.999 * relative_width - relative_width * relative_width
156         # The number should be 2.0, but we want to avoid rounding errors,
157         # and ensure half of double is not larger than the original value.
158
159     @staticmethod
160     def double_step_down(relative_width, current_bound):
161         """Return rate of double logarithmic width below.
162
163         :param relative_width: The base relative width to double.
164         :param current_bound: The current target transmit rate to move [pps].
165         :type relative_width: float
166         :type current_bound: float
167         :returns: Transmit rate smaller by logarithmically double width [pps].
168         :rtype: float
169         """
170         return current_bound * (
171             1.0 - MultipleLossRatioSearch.double_relative_width(
172                 relative_width))
173
174     @staticmethod
175     def double_step_up(relative_width, current_bound):
176         """Return rate of double logarithmic width above.
177
178         :param relative_width: The base relative width to double.
179         :param current_bound: The current target transmit rate to move [pps].
180         :type relative_width: float
181         :type current_bound: float
182         :returns: Transmit rate larger by logarithmically double width [pps].
183         :rtype: float
184         """
185         return current_bound / (
186             1.0 - MultipleLossRatioSearch.double_relative_width(
187                 relative_width))
188
189     @staticmethod
190     def half_relative_width(relative_width):
191         """Return relative width corresponding to half logarithmic width.
192
193         :param relative_width: The base relative width to halve.
194         :type relative_width: float
195         :returns: The relative width of half logarithmic size.
196         :rtype: float
197         """
198         return 1.0 - math.sqrt(1.0 - relative_width)
199
200     @staticmethod
201     def half_step_up(relative_width, current_bound):
202         """Return rate of half logarithmic width above.
203
204         :param relative_width: The base relative width to halve.
205         :param current_bound: The current target transmit rate to move [pps].
206         :type relative_width: float
207         :type current_bound: float
208         :returns: Transmit rate larger by logarithmically half width [pps].
209         :rtype: float
210         """
211         return current_bound / (
212             1.0 - MultipleLossRatioSearch.half_relative_width(relative_width))
213
214     def narrow_down_ndr_and_pdr(
215             self, minimum_transmit_rate, maximum_transmit_rate,
216             packet_loss_ratio):
217         """Perform initial phase, create state object, proceed with next phases.
218
219         :param minimum_transmit_rate: Minimal target transmit rate [pps].
220         :param maximum_transmit_rate: Maximal target transmit rate [pps].
221         :param packet_loss_ratio: Fraction of packets lost, for PDR [1].
222         :type minimum_transmit_rate: float
223         :type maximum_transmit_rate: float
224         :type packet_loss_ratio: float
225         :returns: Structure containing narrowed down intervals
226             and their measurements.
227         :rtype: NdrPdrResult
228         :raises RuntimeError: If total duration is larger than timeout.
229         """
230         minimum_transmit_rate = float(minimum_transmit_rate)
231         maximum_transmit_rate = float(maximum_transmit_rate)
232         packet_loss_ratio = float(packet_loss_ratio)
233         line_measurement = self.measurer.measure(
234             self.initial_trial_duration, maximum_transmit_rate)
235         initial_width_goal = self.final_relative_width
236         for _ in range(self.number_of_intermediate_phases):
237             initial_width_goal = self.double_relative_width(initial_width_goal)
238         max_lo = maximum_transmit_rate * (1.0 - initial_width_goal)
239         mrr = max(
240             minimum_transmit_rate,
241             min(max_lo, line_measurement.receive_rate))
242         mrr_measurement = self.measurer.measure(
243             self.initial_trial_duration, mrr)
244         # Attempt to get narrower width.
245         if mrr_measurement.loss_fraction > 0.0:
246             max2_lo = mrr * (1.0 - initial_width_goal)
247             mrr2 = min(max2_lo, mrr_measurement.receive_rate)
248         else:
249             mrr2 = mrr / (1.0 - initial_width_goal)
250         if mrr2 > minimum_transmit_rate and mrr2 < maximum_transmit_rate:
251             line_measurement = mrr_measurement
252             mrr_measurement = self.measurer.measure(
253                 self.initial_trial_duration, mrr2)
254             if mrr2 > mrr:
255                 buf = line_measurement
256                 line_measurement = mrr_measurement
257                 mrr_measurement = buf
258         starting_interval = ReceiveRateInterval(
259             mrr_measurement, line_measurement)
260         starting_result = NdrPdrResult(starting_interval, starting_interval)
261         state = self.ProgressState(
262             starting_result, self.number_of_intermediate_phases,
263             self.final_trial_duration, self.final_relative_width,
264             packet_loss_ratio, minimum_transmit_rate, maximum_transmit_rate)
265         state = self.ndrpdr(state)
266         return state.result
267
268     def _measure_and_update_state(self, state, transmit_rate):
269         """Perform trial measurement, update bounds, return new state.
270
271         :param state: State before this measurement.
272         :param transmit_rate: Target transmit rate for this measurement [pps].
273         :type state: ProgressState
274         :type transmit_rate: float
275         :returns: State after the measurement.
276         :rtype: ProgressState
277         """
278         # TODO: Implement https://stackoverflow.com/a/24683360
279         # to avoid the string manipulation if log verbosity is too low.
280         logging.info("result before update: %s", state.result)
281         logging.debug(
282             "relative widths in goals: %s", state.result.width_in_goals(
283                 self.final_relative_width))
284         measurement = self.measurer.measure(state.duration, transmit_rate)
285         ndr_interval = self._new_interval(
286             state.result.ndr_interval, measurement, 0.0)
287         pdr_interval = self._new_interval(
288             state.result.pdr_interval, measurement, state.packet_loss_ratio)
289         state.result = NdrPdrResult(ndr_interval, pdr_interval)
290         return state
291
292     @staticmethod
293     def _new_interval(old_interval, measurement, packet_loss_ratio):
294         """Return new interval with bounds updated according to the measurement.
295
296         :param old_interval: The current interval before the measurement.
297         :param measurement: The new meaqsurement to take into account.
298         :param packet_loss_ratio: Fraction for PDR (or zero for NDR).
299         :type old_interval: ReceiveRateInterval
300         :type measurement: ReceiveRateMeasurement
301         :type packet_loss_ratio: float
302         :returns: The updated interval.
303         :rtype: ReceiveRateInterval
304         """
305         old_lo, old_hi = old_interval.measured_low, old_interval.measured_high
306         # Priority zero: direct replace if the target Tr is the same.
307         if measurement.target_tr in (old_lo.target_tr, old_hi.target_tr):
308             if measurement.target_tr == old_lo.target_tr:
309                 return ReceiveRateInterval(measurement, old_hi)
310             else:
311                 return ReceiveRateInterval(old_lo, measurement)
312         # Priority one: invalid lower bound allows only one type of update.
313         if old_lo.loss_fraction > packet_loss_ratio:
314             # We can only expand down, old bound becomes valid upper one.
315             if measurement.target_tr < old_lo.target_tr:
316                 return ReceiveRateInterval(measurement, old_lo)
317             else:
318                 return old_interval
319         # Lower bound is now valid.
320         # Next priorities depend on target Tr.
321         if measurement.target_tr < old_lo.target_tr:
322             # Lower external measurement, relevant only
323             # if the new measurement has high loss rate.
324             if measurement.loss_fraction > packet_loss_ratio:
325                 # Returning the broader interval as old_lo
326                 # would be invalid upper bound.
327                 return ReceiveRateInterval(measurement, old_hi)
328         elif measurement.target_tr > old_hi.target_tr:
329             # Upper external measurement, only relevant for invalid upper bound.
330             if old_hi.loss_fraction <= packet_loss_ratio:
331                 # Old upper bound becomes valid new lower bound.
332                 return ReceiveRateInterval(old_hi, measurement)
333         else:
334             # Internal measurement, replaced boundary
335             # depends on measured loss fraction.
336             if measurement.loss_fraction > packet_loss_ratio:
337                 # We have found a narrow valid interval,
338                 # regardless of whether old upper bound was valid.
339                 return ReceiveRateInterval(old_lo, measurement)
340             else:
341                 # In ideal world, we would not want to shrink interval
342                 # if upper bound is not valid.
343                 # In the real world, we want to shrink it for
344                 # "invalid upper bound at maximal rate" case.
345                 return ReceiveRateInterval(measurement, old_hi)
346         # Fallback, the interval is unchanged by the measurement.
347         return old_interval
348
349     def ndrpdr(self, state):
350         """Pefrom trials for this phase. Return the new state when done.
351
352         :param state: State before this phase.
353         :type state: ProgressState
354         :returns: The updated state.
355         :rtype: ProgressState
356         :raises RuntimeError: If total duration is larger than timeout.
357         """
358         start_time = time.time()
359         if state.phases > 0:
360             # We need to finish preceding intermediate phases first.
361             saved_phases = state.phases
362             state.phases -= 1
363             # Preceding phases have shorter duration.
364             saved_duration = state.duration
365             duration_multiplier = state.duration / self.initial_trial_duration
366             phase_exponent = float(state.phases) / saved_phases
367             state.duration = self.initial_trial_duration * math.pow(
368                 duration_multiplier, phase_exponent)
369             # Shorter durations do not need that narrow widths.
370             saved_width = state.width_goal
371             state.width_goal = self.double_relative_width(state.width_goal)
372             # Recurse.
373             state = self.ndrpdr(state)
374             # Restore the state for current phase.
375             state.duration = saved_duration
376             state.width_goal = saved_width
377             state.phases = saved_phases  # Not needed, but just in case.
378         logging.info(
379             "starting iterations with duration %s and relative width goal %s",
380             state.duration, state.width_goal)
381         while 1:
382             if time.time() > start_time + self.timeout:
383                 raise RuntimeError("Optimized search takes too long.")
384             # Order of priorities: invalid bounds (nl, pl, nh, ph),
385             # then narrowing relative Tr widths.
386             # Durations are not priorities yet,
387             # they will settle on their own hopefully.
388             ndr_lo = state.result.ndr_interval.measured_low
389             ndr_hi = state.result.ndr_interval.measured_high
390             pdr_lo = state.result.pdr_interval.measured_low
391             pdr_hi = state.result.pdr_interval.measured_high
392             ndr_rel_width = max(
393                 state.width_goal, state.result.ndr_interval.rel_tr_width)
394             pdr_rel_width = max(
395                 state.width_goal, state.result.pdr_interval.rel_tr_width)
396             # If we are hitting maximal or minimal rate, we cannot shift,
397             # but we can re-measure.
398             if ndr_lo.loss_fraction > 0.0:
399                 if ndr_lo.target_tr > state.minimum_transmit_rate:
400                     new_tr = max(
401                         state.minimum_transmit_rate,
402                         self.double_step_down(ndr_rel_width, ndr_lo.target_tr))
403                     logging.info("ndr lo external %s", new_tr)
404                     state = self._measure_and_update_state(state, new_tr)
405                     continue
406                 elif ndr_lo.duration < state.duration:
407                     logging.info("ndr lo minimal re-measure")
408                     state = self._measure_and_update_state(
409                         state, state.minimum_transmit_rate)
410                     continue
411             if pdr_lo.loss_fraction > state.packet_loss_ratio:
412                 if pdr_lo.target_tr > state.minimum_transmit_rate:
413                     new_tr = max(
414                         state.minimum_transmit_rate,
415                         self.double_step_down(pdr_rel_width, pdr_lo.target_tr))
416                     logging.info("pdr lo external %s", new_tr)
417                     state = self._measure_and_update_state(state, new_tr)
418                     continue
419                 elif pdr_lo.duration < state.duration:
420                     logging.info("pdr lo minimal re-measure")
421                     state = self._measure_and_update_state(
422                         state, state.minimum_transmit_rate)
423                     continue
424             if ndr_hi.loss_fraction <= 0.0:
425                 if ndr_hi.target_tr < state.maximum_transmit_rate:
426                     new_tr = min(
427                         state.maximum_transmit_rate,
428                         self.double_step_up(ndr_rel_width, ndr_hi.target_tr))
429                     logging.info("ndr hi external %s", new_tr)
430                     state = self._measure_and_update_state(state, new_tr)
431                     continue
432                 elif ndr_hi.duration < state.duration:
433                     logging.info("ndr hi maximal re-measure")
434                     state = self._measure_and_update_state(
435                         state, state.maximum_transmit_rate)
436                     continue
437             if pdr_hi.loss_fraction <= state.packet_loss_ratio:
438                 if pdr_hi.target_tr < state.maximum_transmit_rate:
439                     new_tr = min(
440                         state.maximum_transmit_rate,
441                         self.double_step_up(pdr_rel_width, pdr_hi.target_tr))
442                     logging.info("pdr hi external %s", new_tr)
443                     state = self._measure_and_update_state(state, new_tr)
444                     continue
445                 elif pdr_hi.duration < state.duration:
446                     logging.info("ndr hi maximal re-measure")
447                     state = self._measure_and_update_state(
448                         state, state.maximum_transmit_rate)
449                     continue
450             # If we are hitting maximum_transmit_rate,
451             # it is still worth narrowing width,
452             # hoping large enough loss fraction will happen.
453             # But if we are hitting the minimal rate (at current duration),
454             # no additional measurement will help with that,
455             # so we can stop narrowing in this phase.
456             if (ndr_lo.target_tr <= state.minimum_transmit_rate
457                     and ndr_lo.loss_fraction > 0.0):
458                 ndr_rel_width = 0.0
459             if (pdr_lo.target_tr <= state.minimum_transmit_rate
460                     and pdr_lo.loss_fraction > state.packet_loss_ratio):
461                 pdr_rel_width = 0.0
462             if ndr_rel_width > state.width_goal:
463                 # We have to narrow NDR width first, as NDR internal search
464                 # can invalidate PDR (but not vice versa).
465                 new_tr = self.half_step_up(ndr_rel_width, ndr_lo.target_tr)
466                 logging.info("Bisecting for NDR at %s", new_tr)
467                 state = self._measure_and_update_state(state, new_tr)
468                 continue
469             if pdr_rel_width > state.width_goal:
470                 # PDR iternal search.
471                 new_tr = self.half_step_up(pdr_rel_width, pdr_lo.target_tr)
472                 logging.info("Bisecting for PDR at %s", new_tr)
473                 state = self._measure_and_update_state(state, new_tr)
474                 continue
475             # We do not need to improve width, but there still might be
476             # some measurements with smaller duration.
477             # We need to re-measure with full duration, possibly
478             # creating invalid bounds to resolve (thus broadening width).
479             if ndr_lo.duration < state.duration:
480                 logging.info("re-measuring NDR lower bound")
481                 self._measure_and_update_state(state, ndr_lo.target_tr)
482                 continue
483             if pdr_lo.duration < state.duration:
484                 logging.info("re-measuring PDR lower bound")
485                 self._measure_and_update_state(state, pdr_lo.target_tr)
486                 continue
487             # Except when lower bounds have high loss fraction, in that case
488             # we do not need to re-measure _upper_ bounds.
489             if ndr_hi.duration < state.duration and ndr_rel_width > 0.0:
490                 logging.info("re-measuring NDR upper bound")
491                 self._measure_and_update_state(state, ndr_hi.target_tr)
492                 continue
493             if pdr_hi.duration < state.duration and pdr_rel_width > 0.0:
494                 logging.info("re-measuring PDR upper bound")
495                 self._measure_and_update_state(state, pdr_hi.target_tr)
496                 continue
497             # Widths are narrow (or lower bound minimal), bound measurements
498             # are long enough, we can return.
499             logging.info("phase done")
500             break
501         return state