-# Copyright (c) 2020 Cisco and/or its affiliates.
+# Copyright (c) 2021 Cisco and/or its affiliates.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at:
"""Performance testing traffic generator library."""
+import math
import time
from robot.api import logger
self.sleep_till_duration = None
self.transaction_type = None
self.duration_limit = None
+ self.ramp_up_start = None
+ self.ramp_up_stop = None
+ self.ramp_up_rate = None
+ self.ramp_up_duration = None
+ self.state_timeout = None
# Transient data needed for async measurements.
self._xstats = (None, None)
# TODO: Rename "xstats" to something opaque, so T-Rex is not privileged?
f"EOF'",
sudo=True, message=u"T-Rex config generation!"
)
+
+ if Constants.TREX_RX_DESCRIPTORS_COUNT != 0:
+ exec_cmd_no_error(
+ self._node,
+ f"sh -c 'cat << EOF >> /etc/trex_cfg.yaml\n"
+ f" rx_desc: {Constants.TREX_RX_DESCRIPTORS_COUNT}\n"
+ f"EOF'",
+ sudo=True, message=u"T-Rex rx_desc modification!"
+ )
+
+ if Constants.TREX_TX_DESCRIPTORS_COUNT != 0:
+ exec_cmd_no_error(
+ self._node,
+ f"sh -c 'cat << EOF >> /etc/trex_cfg.yaml\n"
+ f" tx_desc: {Constants.TREX_TX_DESCRIPTORS_COUNT}\n"
+ f"EOF'",
+ sudo=True, message=u"T-Rex tx_desc modification!"
+ )
else:
raise ValueError(u"Unknown OSI layer!")
raise RuntimeError(u"Start TRex failed!")
# Test T-Rex API responsiveness.
- cmd = u"python3"
- cmd += f" {Constants.REMOTE_FW_DIR}/GPL/tools/trex/"
+ cmd = f"python3 {Constants.REMOTE_FW_DIR}/GPL/tools/trex/"
if osi_layer in (u"L2", u"L3"):
- cmd += f"trex_stl_assert.py"
+ cmd += u"trex_stl_assert.py"
elif osi_layer == u"L7":
- cmd += f"trex_astf_assert.py"
+ cmd += u"trex_astf_assert.py"
else:
raise ValueError(u"Unknown OSI layer!")
try:
raise ValueError(u"Unsupported T-Rex traffic profile!")
self._stop_time = time.monotonic()
- return self.get_measurement_result()
+ return self._get_measurement_result()
+
+ def _compute_duration(self, duration, multiplier):
+ """Compute duration for profile driver.
+
+ The final result is influenced by transaction scale and duration limit.
+ It is assumed a higher level function has already set those to self.
+ The duration argument is the target value from search point of view,
+ before the overrides are applied here.
+
+ Minus one (signalling async traffic start) is kept.
+
+ Completeness flag is also included. Duration limited or async trials
+ are not considered complete for ramp-up purposes.
+
+ :param duration: Time expressed in seconds for how long to send traffic.
+ :param multiplier: Traffic rate in transactions per second.
+ :type duration: float
+ :type multiplier: float
+ :returns: New duration and whether it was a complete ramp-up candidate.
+ :rtype: float, bool
+ """
+ if duration < 0.0:
+ # Keep the async -1.
+ return duration, False
+ computed_duration = duration
+ if self.transaction_scale:
+ computed_duration = self.transaction_scale / multiplier
+ # Log the computed duration,
+ # so we can compare with what telemetry suggests
+ # the real duration was.
+ logger.debug(f"Expected duration {computed_duration}")
+ if not self.duration_limit:
+ return computed_duration, True
+ limited_duration = min(computed_duration, self.duration_limit)
+ return limited_duration, (limited_duration == computed_duration)
def trex_astf_start_remote_exec(
self, duration, multiplier, async_call=False):
if not isinstance(duration, (float, int)):
duration = float(duration)
- # Duration logic.
- computed_duration = duration
- if duration > 0.0:
- if self.transaction_scale:
- computed_duration = self.transaction_scale / multiplier
- # Log the computed duration,
- # so we can compare with what telemetry suggests
- # the real duration was.
- logger.debug(f"Expected duration {computed_duration}")
- computed_duration += 0.1115
- # Else keep -1.
- if self.duration_limit:
- computed_duration = min(computed_duration, self.duration_limit)
+ # TODO: Refactor the code so duration is computed only once,
+ # and both the initial and the computed durations are logged.
+ computed_duration, _ = self._compute_duration(duration, multiplier)
command_line = OptionString().add(u"python3")
dirname = f"{Constants.REMOTE_FW_DIR}/GPL/tools/trex"
command_line.add_if(u"async_start", async_call)
command_line.add_if(u"latency", self.use_latency)
command_line.add_if(u"force", Constants.TREX_SEND_FORCE)
+ command_line.add_with_value(
+ u"delay", Constants.PERF_TRIAL_ASTF_DELAY
+ )
self._start_time = time.monotonic()
self._rate = multiplier
p_0, p_1 = (1, 0) if self._ifaces_reordered else (0, 1)
if not isinstance(duration, (float, int)):
duration = float(duration)
- if self.duration_limit:
- duration = min(duration, self.duration_limit)
+
+ # TODO: Refactor the code so duration is computed only once,
+ # and both the initial and the computed durations are logged.
+ duration, _ = self._compute_duration(duration=duration, multiplier=rate)
command_line = OptionString().add(u"python3")
dirname = f"{Constants.REMOTE_FW_DIR}/GPL/tools/trex"
command_line.add_if(u"async_start", async_call)
command_line.add_if(u"latency", self.use_latency)
command_line.add_if(u"force", Constants.TREX_SEND_FORCE)
+ command_line.add_with_value(u"delay", Constants.PERF_TRIAL_STL_DELAY)
# TODO: This is ugly. Handle parsing better.
self._start_time = time.monotonic()
transaction_type=u"packet",
duration_limit=0.0,
use_latency=False,
+ ramp_up_rate=None,
+ ramp_up_duration=None,
+ state_timeout=300.0,
+ ramp_up_only=False,
):
"""Send traffic from all configured interfaces on TG.
Bidirectional STL profiles are treated as transactions with two packets.
+ The return value is None for async.
+
:param duration: Duration of test traffic generation in seconds.
:param rate: Traffic rate in transactions per second.
:param frame_size: Frame size (L2) in Bytes.
duration.
:param use_latency: Whether to measure latency during the trial.
Default: False.
+ :param ramp_up_rate: Rate to use in ramp-up trials [pps].
+ :param ramp_up_duration: Duration of ramp-up trials [s].
+ :param state_timeout: Time of life of DUT state [s].
+ :param ramp_up_only: If true, do not perform main trial measurement.
:type duration: float
:type rate: float
:type frame_size: str
:type transaction_type: str
:type duration_limit: float
:type use_latency: bool
+ :type ramp_up_rate: float
+ :type ramp_up_duration: float
+ :type state_timeout: float
+ :type ramp_up_only: bool
:returns: TG results.
- :rtype: str
+ :rtype: ReceiveRateMeasurement or None
:raises ValueError: If TG traffic profile is not supported.
"""
self.set_rate_provider_defaults(
transaction_type=transaction_type,
duration_limit=duration_limit,
use_latency=use_latency,
+ ramp_up_rate=ramp_up_rate,
+ ramp_up_duration=ramp_up_duration,
+ state_timeout=state_timeout,
+ )
+ return self._send_traffic_on_tg_with_ramp_up(
+ duration=duration,
+ rate=rate,
+ async_call=async_call,
+ ramp_up_only=ramp_up_only,
)
- self._send_traffic_on_tg_internal(duration, rate, async_call)
- def _send_traffic_on_tg_internal(self, duration, rate, async_call=False):
+ def _send_traffic_on_tg_internal(
+ self, duration, rate, async_call=False):
"""Send traffic from all configured interfaces on TG.
This is an internal function, it assumes set_rate_provider_defaults
need to specify their own values, and we do not want the measure call
to overwrite them with defaults.
+ This function is used both for automated ramp-up trials
+ and for explicitly called trials.
+
:param duration: Duration of test traffic generation in seconds.
:param rate: Traffic rate in transactions per second.
:param async_call: Async mode.
:type rate: float
:type async_call: bool
:returns: TG results.
- :rtype: str
+ :rtype: ReceiveRateMeasurement or None
:raises ValueError: If TG traffic profile is not supported.
"""
subtype = check_subtype(self._node)
else:
raise ValueError(u"Unsupported T-Rex traffic profile!")
- return self._result
+ return None if async_call else self._get_measurement_result()
+
+ def _send_traffic_on_tg_with_ramp_up(
+ self, duration, rate, async_call=False, ramp_up_only=False):
+ """Send traffic from all interfaces on TG, maybe after ramp-up.
+
+ This is an internal function, it assumes set_rate_provider_defaults
+ has been called to remember most values.
+ The reason why need to remember various values is that
+ the traffic can be asynchronous, and parsing needs those values.
+ The reason why this is a separate function from the one
+ which calls set_rate_provider_defaults is that some search algorithms
+ need to specify their own values, and we do not want the measure call
+ to overwrite them with defaults.
+
+ If ramp-up tracking is detected, a computation is performed,
+ and if state timeout is near, trial at ramp-up rate and duration
+ is inserted before the main trial measurement.
+
+ The ramp_up_only parameter forces a ramp-up without immediate
+ trial measurement, which is useful in case self remembers
+ a previous ramp-up trial that belongs to a different test (phase).
+
+ Return None if trial is async or ramp-up only.
+
+ :param duration: Duration of test traffic generation in seconds.
+ :param rate: Traffic rate in transactions per second.
+ :param async_call: Async mode.
+ :param ramp_up_only: If true, do not perform main trial measurement.
+ :type duration: float
+ :type rate: float
+ :type async_call: bool
+ :type ramp_up_only: bool
+ :returns: TG results.
+ :rtype: ReceiveRateMeasurement or None
+ :raises ValueError: If TG traffic profile is not supported.
+ """
+ complete = False
+ if self.ramp_up_rate:
+ # Figure out whether we need to insert a ramp-up trial.
+ # TODO: Give up on async_call=True?
+ if ramp_up_only or self.ramp_up_start is None:
+ # We never ramped up yet (at least not in this test case).
+ ramp_up_needed = True
+ else:
+ # We ramped up before, but maybe it was too long ago.
+ # Adding a constant overhead to be safe.
+ time_now = time.monotonic() + 1.0
+ computed_duration, complete = self._compute_duration(
+ duration=duration,
+ multiplier=rate,
+ )
+ # There are two conditions for inserting ramp-up.
+ # If early sessions are expiring already,
+ # or if late sessions are to expire before measurement is over.
+ ramp_up_start_delay = time_now - self.ramp_up_start
+ ramp_up_stop_delay = time_now - self.ramp_up_stop
+ ramp_up_stop_delay += computed_duration
+ bigger_delay = max(ramp_up_start_delay, ramp_up_stop_delay)
+ # Final boolean decision.
+ ramp_up_needed = (bigger_delay >= self.state_timeout)
+ if ramp_up_needed:
+ logger.debug(
+ u"State may time out during next real trial, "
+ u"inserting a ramp-up trial."
+ )
+ self.ramp_up_start = time.monotonic()
+ self._send_traffic_on_tg_internal(
+ duration=self.ramp_up_duration,
+ rate=self.ramp_up_rate,
+ async_call=async_call,
+ )
+ self.ramp_up_stop = time.monotonic()
+ logger.debug(u"Ramp-up done.")
+ else:
+ logger.debug(
+ u"State will probably not time out during next real trial, "
+ u"no ramp-up trial needed just yet."
+ )
+ if ramp_up_only:
+ return None
+ trial_start = time.monotonic()
+ result = self._send_traffic_on_tg_internal(
+ duration=duration,
+ rate=rate,
+ async_call=async_call,
+ )
+ trial_end = time.monotonic()
+ if self.ramp_up_rate:
+ # Optimization: No loss acts as a good ramp-up, if it was complete.
+ if complete and result is not None and result.loss_count == 0:
+ logger.debug(u"Good trial acts as a ramp-up")
+ self.ramp_up_start = trial_start
+ self.ramp_up_stop = trial_end
+ else:
+ logger.debug(u"Loss or incomplete, does not act as a ramp-up.")
+ return result
def no_traffic_loss_occurred(self):
"""Fail if loss occurred in traffic run.
"""
if self._received is None:
raise RuntimeError(u"The traffic generation has not been issued")
- if self._received == u"0":
+ if self._received == 0:
raise RuntimeError(u"No traffic forwarded")
def partial_traffic_loss_accepted(
self._l7_data[u"server"][u"tcp"][u"rx_bytes"] = \
int(self._result.get(u"server_tcp_rx_bytes", 0))
- def get_measurement_result(self):
+ def _get_measurement_result(self):
"""Return the result of last measurement as ReceiveRateMeasurement.
Separate function, as measurements can end either by time
if not target_duration:
target_duration = approximated_duration
transmit_rate = self._rate
+ unsent = 0
if self.transaction_type == u"packet":
partial_attempt_count = self._sent
- expected_attempt_count = self._sent
- fail_count = self._loss
+ packet_rate = transmit_rate * self.ppta
+ # We have a float. TRex way of rounding it is not obvious.
+ # The biggest source of mismatch is Inter Stream Gap.
+ # So the code tolerates 10 usec of missing packets.
+ expected_attempt_count = (target_duration - 1e-5) * packet_rate
+ expected_attempt_count = math.ceil(expected_attempt_count)
+ # TRex can send more.
+ expected_attempt_count = max(expected_attempt_count, self._sent)
+ unsent = expected_attempt_count - self._sent
+ pass_count = self._received
+ fail_count = expected_attempt_count - pass_count
elif self.transaction_type == u"udp_cps":
if not self.transaction_scale:
raise RuntimeError(u"Add support for no-limit udp_cps.")
partial_attempt_count = self._l7_data[u"client"][u"sent"]
# We do not care whether TG is slow, it should have attempted all.
expected_attempt_count = self.transaction_scale
+ unsent = expected_attempt_count - partial_attempt_count
pass_count = self._l7_data[u"client"][u"received"]
fail_count = expected_attempt_count - pass_count
elif self.transaction_type == u"tcp_cps":
partial_attempt_count = ctca
# We do not care whether TG is slow, it should have attempted all.
expected_attempt_count = self.transaction_scale
+ unsent = expected_attempt_count - partial_attempt_count
# From TCP point of view, server/connects counts full connections,
# but we are testing NAT session so client/connects counts that
# (half connections from TCP point of view).
raise RuntimeError(u"Add support for no-limit udp_pps.")
partial_attempt_count = self._sent
expected_attempt_count = self.transaction_scale * self.ppta
- fail_count = self._loss + (expected_attempt_count - self._sent)
+ unsent = expected_attempt_count - self._sent
+ fail_count = self._loss + unsent
elif self.transaction_type == u"tcp_pps":
if not self.transaction_scale:
raise RuntimeError(u"Add support for no-limit tcp_pps.")
# A simple workaround is to add absolute difference.
# Probability of retransmissions exactly cancelling
# packets unsent due to duration stretching is quite low.
- fail_count = self._loss + abs(expected_attempt_count - self._sent)
+ unsent = abs(expected_attempt_count - self._sent)
+ fail_count = self._loss + unsent
else:
raise RuntimeError(f"Unknown parsing {self.transaction_type!r}")
+ if unsent and isinstance(self._approximated_duration, float):
+ # Do not report unsent for "manual".
+ logger.debug(f"Unsent packets/transactions: {unsent}")
if fail_count < 0 and not self.negative_loss:
fail_count = 0
measurement = ReceiveRateMeasurement(
time_stop = time_start + duration
if self.resetter:
self.resetter()
- self._send_traffic_on_tg_internal(
+ result = self._send_traffic_on_tg_with_ramp_up(
duration=duration,
rate=transmit_rate,
async_call=False,
)
- result = self.get_measurement_result()
logger.debug(f"trial measurement result: {result!r}")
# In PLRsearch, computation needs the specified time to complete.
if self.sleep_till_duration:
negative_loss=True,
sleep_till_duration=False,
use_latency=False,
+ ramp_up_rate=None,
+ ramp_up_duration=None,
+ state_timeout=300.0,
):
"""Store values accessed by measure().
sleep until it matches duration. Needed for PLRsearch.
:param use_latency: Whether to measure latency during the trial.
Default: False.
+ :param ramp_up_rate: Rate to use in ramp-up trials [pps].
+ :param ramp_up_duration: Duration of ramp-up trials [s].
+ :param state_timeout: Time of life of DUT state [s].
:type frame_size: str or int
:type traffic_profile: str
:type ppta: int
:type negative_loss: bool
:type sleep_till_duration: bool
:type use_latency: bool
+ :type ramp_up_rate: float
+ :type ramp_up_duration: float
+ :type state_timeout: float
"""
self.frame_size = frame_size
self.traffic_profile = str(traffic_profile)
self.negative_loss = bool(negative_loss)
self.sleep_till_duration = bool(sleep_till_duration)
self.use_latency = bool(use_latency)
+ self.ramp_up_rate = float(ramp_up_rate)
+ self.ramp_up_duration = float(ramp_up_duration)
+ self.state_timeout = float(state_timeout)
class OptimizedSearch:
initial_trial_duration=1.0,
number_of_intermediate_phases=2,
timeout=720.0,
- doublings=1,
ppta=1,
resetter=None,
traffic_directions=2,
transaction_scale=0,
transaction_type=u"packet",
use_latency=False,
+ ramp_up_rate=None,
+ ramp_up_duration=None,
+ state_timeout=300.0,
+ expansion_coefficient=4.0,
):
"""Setup initialized TG, perform optimized search, return intervals.
- If transaction_scale is nonzero, all non-init trial durations
- are set to 2.0 (as they do not affect the real trial duration)
+ If transaction_scale is nonzero, all init and non-init trial durations
+ are set to 1.0 (as they do not affect the real trial duration)
and zero intermediate phases are used.
- The initial phase still uses 1.0 seconds, to force remeasurement.
- That makes initial phase act as a warmup.
+ This way no re-measurement happens.
+ Warmup has to be handled via resetter or ramp-up mechanisms.
:param frame_size: Frame size identifier or value [B].
:param traffic_profile: Module name as a traffic profile identifier.
See GPL/traffic_profiles/trex for implemented modules.
:param minimum_transmit_rate: Minimal load in transactions per second.
:param maximum_transmit_rate: Maximal load in transactions per second.
- :param packet_loss_ratio: Fraction of packets lost, for PDR [1].
+ :param packet_loss_ratio: Ratio of packets lost, for PDR [1].
:param final_relative_width: Final lower bound transmit rate
cannot be more distant that this multiple of upper bound [1].
:param final_trial_duration: Trial duration for the final phase [s].
to perform before the final phase [1].
:param timeout: The search will fail itself when not finished
before this overall time [s].
- :param doublings: How many doublings to do in external search step.
- Default 1 is suitable for fairly stable tests,
- less stable tests might get better overal duration with 2 or more.
:param ppta: Packets per transaction, aggregated over directions.
Needed for udp_pps which does not have a good transaction counter,
so we need to compute expected number of packets.
transactions. Default: "packet".
:param use_latency: Whether to measure latency during the trial.
Default: False.
+ :param ramp_up_rate: Rate to use in ramp-up trials [pps].
+ :param ramp_up_duration: Duration of ramp-up trials [s].
+ :param state_timeout: Time of life of DUT state [s].
+ :param expansion_coefficient: In external search multiply width by this.
:type frame_size: str or int
:type traffic_profile: str
:type minimum_transmit_rate: float
:type initial_trial_duration: float
:type number_of_intermediate_phases: int
:type timeout: float
- :type doublings: int
:type ppta: int
:type resetter: Optional[Callable[[], None]]
:type traffic_directions: int
:type transaction_scale: int
:type transaction_type: str
:type use_latency: bool
+ :type ramp_up_rate: float
+ :type ramp_up_duration: float
+ :type state_timeout: float
+ :type expansion_coefficient: float
:returns: Structure containing narrowed down NDR and PDR intervals
and their measurements.
- :rtype: NdrPdrResult
+ :rtype: List[Receiverateinterval]
:raises RuntimeError: If total duration is larger than timeout.
"""
# we need instance of TrafficGenerator instantiated by Robot Framework
# even though this is surprising for log readers.
if transaction_scale:
initial_trial_duration = 1.0
- final_trial_duration = 2.0
+ final_trial_duration = 1.0
number_of_intermediate_phases = 0
- timeout = 3600.0
+ timeout += transaction_scale * 3e-4
tg_instance.set_rate_provider_defaults(
frame_size=frame_size,
traffic_profile=traffic_profile,
transaction_scale=transaction_scale,
transaction_type=transaction_type,
use_latency=use_latency,
+ ramp_up_rate=ramp_up_rate,
+ ramp_up_duration=ramp_up_duration,
+ state_timeout=state_timeout,
)
algorithm = MultipleLossRatioSearch(
measurer=tg_instance,
number_of_intermediate_phases=number_of_intermediate_phases,
initial_trial_duration=initial_trial_duration,
timeout=timeout,
- doublings=doublings,
+ debug=logger.debug,
+ expansion_coefficient=expansion_coefficient,
)
- result = algorithm.narrow_down_ndr_and_pdr(
+ if packet_loss_ratio:
+ packet_loss_ratios = [0.0, packet_loss_ratio]
+ else:
+ # Happens in reconf tests.
+ packet_loss_ratios = [packet_loss_ratio]
+ results = algorithm.narrow_down_intervals(
min_rate=minimum_transmit_rate,
max_rate=maximum_transmit_rate,
- packet_loss_ratio=packet_loss_ratio,
+ packet_loss_ratios=packet_loss_ratios,
)
- return result
+ return results
@staticmethod
def perform_soak_search(
transaction_scale=0,
transaction_type=u"packet",
use_latency=False,
+ ramp_up_rate=None,
+ ramp_up_duration=None,
+ state_timeout=300.0,
):
"""Setup initialized TG, perform soak search, return avg and stdev.
See GPL/traffic_profiles/trex for implemented modules.
:param minimum_transmit_rate: Minimal load in transactions per second.
:param maximum_transmit_rate: Maximal load in transactions per second.
- :param plr_target: Fraction of packets lost to achieve [1].
+ :param plr_target: Ratio of packets lost to achieve [1].
:param tdpt: Trial duration per trial.
The algorithm linearly increases trial duration with trial number,
this is the increment between succesive trials, in seconds.
transactions. Default: "packet".
:param use_latency: Whether to measure latency during the trial.
Default: False.
+ :param ramp_up_rate: Rate to use in ramp-up trials [pps].
+ :param ramp_up_duration: Duration of ramp-up trials [s].
+ :param state_timeout: Time of life of DUT state [s].
:type frame_size: str or int
:type traffic_profile: str
:type minimum_transmit_rate: float
:type transaction_scale: int
:type transaction_type: str
:type use_latency: bool
+ :type ramp_up_rate: float
+ :type ramp_up_duration: float
+ :type state_timeout: float
:returns: Average and stdev of estimated aggregate rate giving PLR.
:rtype: 2-tuple of float
"""
# TODO: Move to robot code? We have a single call site
# but MLRsearch has two and we want the two to be used similarly.
if transaction_scale:
+ # TODO: What is a good value for max scale?
+ # TODO: Scale the timeout with transaction scale.
timeout = 7200.0
tg_instance.set_rate_provider_defaults(
frame_size=frame_size,
transaction_scale=transaction_scale,
transaction_type=transaction_type,
use_latency=use_latency,
+ ramp_up_rate=ramp_up_rate,
+ ramp_up_duration=ramp_up_duration,
+ state_timeout=state_timeout,
)
algorithm = PLRsearch(
measurer=tg_instance,