resources/libraries/python/search/OptimizedSearchAlgorithm.py

   1 # Copyright (c) 2018 Cisco and/or its affiliates.
   2 # Licensed under the Apache License, Version 2.0 (the "License");
   3 # you may not use this file except in compliance with the License.
   4 # You may obtain a copy of the License at:
   5 #
   6 #     http://www.apache.org/licenses/LICENSE-2.0
   7 #
   8 # Unless required by applicable law or agreed to in writing, software
   9 # distributed under the License is distributed on an "AS IS" BASIS,
  10 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  11 # See the License for the specific language governing permissions and
  12 # limitations under the License.
  13
  14 """Module defining OptimizedSearchAlgorithm class."""
  15
  16 import logging
  17 import math
  18 import time
  19
  20 from .AbstractSearchAlgorithm import AbstractSearchAlgorithm
  21 from .NdrPdrResult import NdrPdrResult
  22 from .ReceiveRateInterval import ReceiveRateInterval
  23
  24
  25 class OptimizedSearchAlgorithm(AbstractSearchAlgorithm):
  26     """Optimized binary search algorithm for finding NDR and PDR bounds.
  27
  28     Traditional binary search algorithm needs initial interval
  29     (lower and upper bound), and returns final interval after bisecting
  30     (until some exit condition is met).
  31     The exit condition is usually related to the interval width,
  32     (upper bound value minus lower bound value).
  33
  34     The optimized algorithm contains several improvements
  35     aimed to reduce overall search time.
  36
  37     One improvement is searching for two intervals at once.
  38     The intervals are for NDR (No Drop Rate) and PDR (Partial Drop Rate).
  39
  40     Next improvement is that the initial interval does need to be valid.
  41     Imagine initial interval (10, 11) where 11 is smaller
  42     than the searched value.
  43     The algorithm will try (11, 13) interval next, and if 13 is still smaller,
  44     (13, 17) and so on, doubling width until the upper bound is valid.
  45     The part when interval expands is called external search,
  46     the part when interval is bisected is called internal search.
  47
  48     Next improvement is that trial measurements at small trial duration
  49     can be used to find a reasonable interval for full trial duration search.
  50     This results in more trials performed, but smaller overall duration
  51     in general.
  52
  53     Next improvement is bisecting in logarithmic quantities,
  54     so that exit criteria can be independent of measurement units.
  55
  56     Next improvement is basing the initial interval on receive rates.
  57
  58     Final improvement is exiting early if the minimal value
  59     is not a valid lower bound.
  60
  61     The complete search consist of several phases,
  62     each phase performing several trial measurements.
  63     Initial phase creates initial interval based on receive rates
  64     at maximum rate and at maximum receive rate (MRR).
  65     Final phase and preceding intermediate phases are performing
  66     external and internal search steps,
  67     each resulting interval is the starting point for the next phase.
  68     The resulting interval of final phase is the result of the whole algorithm.
  69
  70     Each non-initial phase uses its own trial duration and width goal.
  71     Any non-initial phase stops searching (for NDR or PDR independently)
  72     when minimum is not a valid lower bound (at current duration),
  73     or all of the following is true:
  74     Both bounds are valid, bound bounds are measured at the current phase
  75     trial duration, interval width is less than the width goal
  76     for current phase."""
  77
  78     class ProgressState(object):
  79         """Structure containing data to be passed around in recursion."""
  80
  81         def __init__(self, result, phases, duration, width_goal,
  82                      allowed_drop_fraction, fail_rate, line_rate):
  83             """Convert and store the argument values.
  84
  85             :param result: Current measured NDR and PDR intervals.
  86             :param phases: How many intermediate phases to perform
  87                 before the current one.
  88             :param duration: Trial duration to use in the current phase [s].
  89             :param width_goal: The goal relative width for the curreent phase.
  90             :param allowed_drop_fraction: PDR fraction for the current search.
  91             :param fail_rate: Minimum target transmit rate
  92                 for the current search [pps].
  93             :param line_rate: Maximum target transmit rate
  94                 for the current search [pps].
  95             :type result: NdrPdrResult
  96             :type phases: int
  97             :type duration: float
  98             :type width_goal: float
  99             :type allowed_drop_fraction: float
 100             :type fail_rate: float
 101             :type line_rate: float
 102             """
 103             self.result = result
 104             self.phases = int(phases)
 105             self.duration = float(duration)
 106             self.width_goal = float(width_goal)
 107             self.allowed_drop_fraction = float(allowed_drop_fraction)
 108             self.fail_rate = float(fail_rate)
 109             self.line_rate = float(line_rate)
 110
 111     def __init__(self, rate_provider, final_relative_width=0.005,
 112                  final_trial_duration=30.0, initial_trial_duration=1.0,
 113                  intermediate_phases=2, timeout=600.0):
 114         """Store rate provider and additional arguments.
 115
 116         :param rate_provider: Rate provider to use by this search object.
 117         :param final_relative_width: Final lower bound transmit rate
 118             cannot be more distant that this multiple of upper bound [1].
 119         :param final_trial_duration: Trial duration for the final phase [s].
 120         :param initial_trial_duration: Trial duration for the initial phase
 121             and also for the first intermediate phase [s].
 122         :param intermediate_phases: Number of intermediate phases to perform
 123             before the final phase [1].
 124         :param timeout: The search will fail itself when not finished
 125             before this overall time [s].
 126         :type rate_provider: AbstractRateProvider
 127         :type final_relative_width: float
 128         :type final_trial_duration: float
 129         :type initial_trial_duration: int
 130         :type intermediate_phases: int
 131         :type timeout: float
 132         """
 133         super(OptimizedSearchAlgorithm, self).__init__(rate_provider)
 134         self.final_trial_duration = float(final_trial_duration)
 135         self.final_relative_width = float(final_relative_width)
 136         self.intermediate_phases = int(intermediate_phases)
 137         self.initial_trial_duration = float(initial_trial_duration)
 138         self.timeout = float(timeout)
 139
 140     def narrow_down_ndr_and_pdr(
 141             self, fail_rate, line_rate, allowed_drop_fraction):
 142         """Perform initial phase, create state object, proceed with next phases.
 143
 144         :param fail_rate: Minimal target transmit rate [pps].
 145         :param line_rate: Maximal target transmit rate [pps].
 146         :param allowed_drop_fraction: Fraction of dropped packets for PDR [1].
 147         :type fail_rate: float
 148         :type line_rate: float
 149         :type allowed_drop_fraction: float
 150         :returns: Structure containing narrowed down intervals
 151             and their measurements.
 152         :rtype: NdrPdrResult
 153         :raises RuntimeError: If total duration is larger than timeout.
 154         """
 155         fail_rate = float(fail_rate)
 156         line_rate = float(line_rate)
 157         allowed_drop_fraction = float(allowed_drop_fraction)
 158         line_measurement = self.rate_provider.measure(
 159             self.initial_trial_duration, line_rate)
 160         # 0.999 is to avoid rounding errors which make
 161         # the subsequent logic think the width is too broad.
 162         max_lo = max(
 163             fail_rate, line_rate * (1.0 - 0.999 * self.final_relative_width))
 164         mrr = min(max_lo, max(fail_rate, line_measurement.receive_rate))
 165         mrr_measurement = self.rate_provider.measure(
 166             self.initial_trial_duration, mrr)
 167         # Attempt to get narrower width.
 168         max2_lo = max(
 169             fail_rate, mrr * (1.0 - 0.999 * self.final_relative_width))
 170         mrr2 = min(max2_lo, mrr_measurement.receive_rate)
 171         if mrr2 > fail_rate:
 172             line_measurement = mrr_measurement
 173             mrr_measurement = self.rate_provider.measure(
 174                 self.initial_trial_duration, mrr2)
 175         starting_interval = ReceiveRateInterval(
 176             mrr_measurement, line_measurement)
 177         starting_result = NdrPdrResult(starting_interval, starting_interval)
 178         state = self.ProgressState(
 179             starting_result, self.intermediate_phases,
 180             self.final_trial_duration, self.final_relative_width,
 181             allowed_drop_fraction, fail_rate, line_rate)
 182         state = self.ndrpdr(state)
 183         return state.result
 184
 185     def _measure_and_update_state(self, state, transmit_rate):
 186         """Perform trial measurement, update bounds, return new state.
 187
 188         :param state: State before this measurement.
 189         :param transmit_rate: Target transmit rate for this measurement [pps].
 190         :type state: ProgressState
 191         :type transmit_rate: float
 192         :returns: State after the measurement.
 193         :rtype: ProgressState
 194         """
 195         # TODO: Implement https://stackoverflow.com/a/24683360
 196         # to avoid the string manipulation if log verbosity is too low.
 197         logging.info("result before update: %s", state.result)
 198         logging.debug(
 199             "relative widths in goals: %s", state.result.width_in_goals(
 200                 self.final_relative_width))
 201         measurement = self.rate_provider.measure(state.duration, transmit_rate)
 202         ndr_interval = self._new_interval(
 203             state.result.ndr_interval, measurement, 0.0)
 204         pdr_interval = self._new_interval(
 205             state.result.pdr_interval, measurement, state.allowed_drop_fraction)
 206         state.result = NdrPdrResult(ndr_interval, pdr_interval)
 207         return state
 208
 209     @staticmethod
 210     def _new_interval(old_interval, measurement, allowed_drop_fraction):
 211         """Return new interval with bounds updated according to the measurement.
 212
 213         :param old_interval: The current interval before the measurement.
 214         :param measurement: The new meaqsurement to take into account.
 215         :param allowed_drop_fraction: Fraction for PDR (or zero for NDR).
 216         :type old_interval: ReceiveRateInterval
 217         :type measurement: ReceiveRateMeasurement
 218         :type allowed_drop_fraction: float
 219         :returns: The updated interval.
 220         :rtype: ReceiveRateInterval
 221         """
 222         old_lo, old_hi = old_interval.measured_low, old_interval.measured_high
 223         # Priority zero: direct replace if the target Tr is the same.
 224         if measurement.target_tr in (old_lo.target_tr, old_hi.target_tr):
 225             if measurement.target_tr == old_lo.target_tr:
 226                 return ReceiveRateInterval(measurement, old_hi)
 227             else:
 228                 return ReceiveRateInterval(old_lo, measurement)
 229         # Priority one: invalid lower bound allows only one type of update.
 230         if old_lo.drop_fraction > allowed_drop_fraction:
 231             # We can only expand down, old bound becomes valid upper one.
 232             if measurement.target_tr < old_lo.target_tr:
 233                 return ReceiveRateInterval(measurement, old_lo)
 234             else:
 235                 return old_interval
 236         # Lower bound is now valid.
 237         # Next priorities depend on target Tr.
 238         if measurement.target_tr < old_lo.target_tr:
 239             # Lower external measurement, relevant only
 240             # if the new measurement has high drop rate.
 241             if measurement.drop_fraction > allowed_drop_fraction:
 242                 # Returning the broader interval as old_lo
 243                 # would be invalid upper bound.
 244                 return ReceiveRateInterval(measurement, old_hi)
 245         elif measurement.target_tr > old_hi.target_tr:
 246             # Upper external measurement, only relevant for invalid upper bound.
 247             if old_hi.drop_fraction <= allowed_drop_fraction:
 248                 # Old upper bound becomes valid new lower bound.
 249                 return ReceiveRateInterval(old_hi, measurement)
 250         else:
 251             # Internal measurement, replaced boundary
 252             # depends on measured drop fraction.
 253             if measurement.drop_fraction > allowed_drop_fraction:
 254                 # We have found a narrow valid interval,
 255                 # regardless of whether old upper bound was valid.
 256                 return ReceiveRateInterval(old_lo, measurement)
 257             else:
 258                 # In ideal world, we would not want to shrink interval
 259                 # if upper bound is not valid.
 260                 # In the real world, we want to shrink it for
 261                 # "invalid upper bound at line rate" case.
 262                 return ReceiveRateInterval(measurement, old_hi)
 263         # Fallback, the interval is unchanged by the measurement.
 264         return old_interval
 265
 266     @staticmethod
 267     def double_relative_width(relative_width):
 268         """Return relative width corresponding to double logarithmic width.
 269
 270         :param relative_width: The base relative width to double.
 271         :type relative_width: float
 272         :returns: The relative width of double logarithmic size.
 273         :rtype: float
 274         """
 275         return 1.999 * relative_width - relative_width * relative_width
 276         # The number should be 2.0, but we want to avoid rounding errors,
 277         # and ensure half of double is not larger than the original value.
 278
 279     @staticmethod
 280     def double_step_down(relative_width, current_bound):
 281         """Return rate of double logarithmic width below.
 282
 283         :param relative_width: The base relative width to double.
 284         :param current_bound: The current target transmit rate to move [pps].
 285         :type relative_width: float
 286         :type current_bound: float
 287         :returns: Transmit rate smaller by logarithmically double width [pps].
 288         :rtype: float
 289         """
 290         return current_bound * (
 291             1.0 - OptimizedSearchAlgorithm.double_relative_width(
 292                 relative_width))
 293
 294     @staticmethod
 295     def double_step_up(relative_width, current_bound):
 296         """Return rate of double logarithmic width above.
 297
 298         :param relative_width: The base relative width to double.
 299         :param current_bound: The current target transmit rate to move [pps].
 300         :type relative_width: float
 301         :type current_bound: float
 302         :returns: Transmit rate larger by logarithmically double width [pps].
 303         :rtype: float
 304         """
 305         return current_bound / (
 306             1.0 - OptimizedSearchAlgorithm.double_relative_width(
 307                 relative_width))
 308
 309     @staticmethod
 310     def half_relative_width(relative_width):
 311         """Return relative width corresponding to half logarithmic width.
 312
 313         :param relative_width: The base relative width to halve.
 314         :type relative_width: float
 315         :returns: The relative width of half logarithmic size.
 316         :rtype: float
 317         """
 318         return 1.0 - math.sqrt(1.0 - relative_width)
 319
 320     @staticmethod
 321     def half_step_up(relative_width, current_bound):
 322         """Return rate of half logarithmic width above.
 323
 324         :param relative_width: The base relative width to halve.
 325         :param current_bound: The current target transmit rate to move [pps].
 326         :type relative_width: float
 327         :type current_bound: float
 328         :returns: Transmit rate larger by logarithmically half width [pps].
 329         :rtype: float
 330         """
 331         return current_bound / (
 332             1.0 - OptimizedSearchAlgorithm.half_relative_width(relative_width))
 333
 334     def ndrpdr(self, state):
 335         """Pefrom trials for this phase. Return the new state when done.
 336
 337         :param state: State before this phase.
 338         :type state: ProgressState
 339         :returns: The updates state.
 340         :rtype: ProgressState
 341         :raises RuntimeError: If total duration is larger than timeout.
 342         """
 343         if state.phases > 0:
 344             # We need to finish preceding intermediate phases first.
 345             saved_phases = state.phases
 346             state.phases -= 1
 347             # Preceding phases have shorter duration.
 348             saved_duration = state.duration
 349             duration_multiplier = state.duration / self.initial_trial_duration
 350             phase_exponent = float(state.phases) / saved_phases
 351             state.duration = self.initial_trial_duration * math.pow(
 352                 duration_multiplier, phase_exponent)
 353             # Shorter durations do not need that narrow widths.
 354             saved_width = state.width_goal
 355             state.width_goal = self.double_relative_width(state.width_goal)
 356             # Recurse.
 357             state = self.ndrpdr(state)
 358             # Restore the state for current phase.
 359             state.duration = saved_duration
 360             state.width_goal = saved_width
 361             state.phases = saved_phases  # Not needed, but just in case.
 362         logging.info(
 363             "starting iterations with duration %s and relative width goal %s",
 364             state.duration, state.width_goal)
 365         start_time = time.time()
 366         while 1:
 367             if time.time() > start_time + self.timeout:
 368                 raise RuntimeError("Optimized search takes too long.")
 369             # Order of priorities: improper bounds (nl, pl, nh, ph),
 370             # then narrowing relative Tr widths.
 371             # Durations are not priorities yet,
 372             # they will settle on their own hopefully.
 373             ndr_lo = state.result.ndr_interval.measured_low
 374             ndr_hi = state.result.ndr_interval.measured_high
 375             pdr_lo = state.result.pdr_interval.measured_low
 376             pdr_hi = state.result.pdr_interval.measured_high
 377             ndr_rel_width = max(
 378                 state.width_goal, state.result.ndr_interval.rel_tr_width)
 379             pdr_rel_width = max(
 380                 state.width_goal, state.result.pdr_interval.rel_tr_width)
 381             # If we are hitting line or fail rate, we cannot shift,
 382             # but we can re-measure.
 383             if ndr_lo.drop_fraction > 0.0:
 384                 if ndr_lo.target_tr > state.fail_rate:
 385                     new_tr = max(state.fail_rate, self.double_step_down(
 386                         ndr_rel_width, ndr_lo.target_tr))
 387                     logging.info("ndr lo external %s", new_tr)
 388                     state = self._measure_and_update_state(state, new_tr)
 389                     continue
 390                 elif ndr_lo.duration < state.duration:
 391                     logging.info("ndr lo fail re-measure")
 392                     state = self._measure_and_update_state(
 393                         state, state.fail_rate)
 394                     continue
 395             if pdr_lo.drop_fraction > state.allowed_drop_fraction:
 396                 if pdr_lo.target_tr > state.fail_rate:
 397                     new_tr = max(state.fail_rate, self.double_step_down(
 398                         pdr_rel_width, pdr_lo.target_tr))
 399                     logging.info("pdr lo external %s", new_tr)
 400                     state = self._measure_and_update_state(state, new_tr)
 401                     continue
 402                 elif pdr_lo.duration < state.duration:
 403                     logging.info("pdr lo fail re-measure")
 404                     state = self._measure_and_update_state(
 405                         state, state.fail_rate)
 406                     continue
 407             if ndr_hi.drop_fraction <= 0.0:
 408                 if ndr_hi.target_tr < state.line_rate:
 409                     new_tr = min(state.line_rate, self.double_step_up(
 410                         ndr_rel_width, ndr_hi.target_tr))
 411                     logging.info("ndr hi external %s", new_tr)
 412                     state = self._measure_and_update_state(state, new_tr)
 413                     continue
 414                 elif ndr_hi.duration < state.duration:
 415                     logging.info("ndr hi line re-measure")
 416                     state = self._measure_and_update_state(
 417                         state, state.line_rate)
 418                     continue
 419             if pdr_hi.drop_fraction <= state.allowed_drop_fraction:
 420                 if pdr_hi.target_tr < state.line_rate:
 421                     new_tr = min(state.line_rate, self.double_step_up(
 422                         pdr_rel_width, pdr_hi.target_tr))
 423                     logging.info("pdr hi external %s", new_tr)
 424                     state = self._measure_and_update_state(state, new_tr)
 425                     continue
 426                 elif pdr_hi.duration < state.duration:
 427                     logging.info("ndr hi line re-measure")
 428                     state = self._measure_and_update_state(
 429                         state, state.line_rate)
 430                     continue
 431             # If we are hitting line_rate, it is still worth narrowing width,
 432             # hoping large enough Df will happen.
 433             # But if we are hitting fail rate (at current duration),
 434             # no additional measurement will help with that,
 435             # so we can stop narrowing in this phase.
 436             if (ndr_lo.target_tr <= state.fail_rate
 437                     and ndr_lo.drop_fraction > 0.0):
 438                 ndr_rel_width = 0.0
 439             if (pdr_lo.target_tr <= state.fail_rate
 440                     and pdr_lo.drop_fraction > state.allowed_drop_fraction):
 441                 pdr_rel_width = 0.0
 442             if max(ndr_rel_width, pdr_rel_width) > state.width_goal:
 443                 # We have to narrow some width.
 444                 if ndr_rel_width >= pdr_rel_width:
 445                     new_tr = self.half_step_up(ndr_rel_width, ndr_lo.target_tr)
 446                     logging.info("Bisecting for NDR at %s", new_tr)
 447                     state = self._measure_and_update_state(state, new_tr)
 448                     continue
 449                 else:
 450                     new_tr = self.half_step_up(pdr_rel_width, pdr_lo.target_tr)
 451                     logging.info("Bisecting for PDR at %s", new_tr)
 452                     state = self._measure_and_update_state(state, new_tr)
 453                     continue
 454             # We do not need to improve width, but there still might be
 455             # some measurements with smaller duration.
 456             # We need to re-measure with full duration, possibly
 457             # creating invalid bounds to resolve (thus broadening width).
 458             if ndr_lo.duration < state.duration:
 459                 logging.info("re-measuring NDR lower bound")
 460                 self._measure_and_update_state(state, ndr_lo.target_tr)
 461                 continue
 462             if pdr_lo.duration < state.duration:
 463                 logging.info("re-measuring PDR lower bound")
 464                 self._measure_and_update_state(state, pdr_lo.target_tr)
 465                 continue
 466             # Except when lower bounds have high Df, in that case
 467             # we do not need to re-measure _upper_ bounds.
 468             if ndr_hi.duration < state.duration and ndr_rel_width > 0.0:
 469                 logging.info("re-measuring NDR upper bound")
 470                 self._measure_and_update_state(state, ndr_hi.target_tr)
 471                 continue
 472             if pdr_hi.duration < state.duration and pdr_rel_width > 0.0:
 473                 logging.info("re-measuring PDR upper bound")
 474                 self._measure_and_update_state(state, pdr_hi.target_tr)
 475                 continue
 476             # Widths are narrow (or failing), bound measurements
 477             # are long enough, we can return.
 478             logging.info("phase done")
 479             break
 480         return state