X-Git-Url: https://gerrit.fd.io/r/gitweb?a=blobdiff_plain;f=resources%2Flibraries%2Fpython%2FTrafficGenerator.py;h=b0c64907e067096db233713f410f1c53a3b8e763;hb=92ed218a4277796211d57cbe3b1592f09270b09e;hp=9a9519e2483259ed33c569ba833ad0c8df44af67;hpb=b6fbffad32515ccf94404680cb5280c2cb561af5;p=csit.git diff --git a/resources/libraries/python/TrafficGenerator.py b/resources/libraries/python/TrafficGenerator.py index 9a9519e248..b0c64907e0 100644 --- a/resources/libraries/python/TrafficGenerator.py +++ b/resources/libraries/python/TrafficGenerator.py @@ -1,4 +1,4 @@ -# Copyright (c) 2021 Cisco and/or its affiliates. +# Copyright (c) 2023 Cisco and/or its affiliates. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at: @@ -13,13 +13,13 @@ """Performance testing traffic generator library.""" +import math import time from robot.api import logger from robot.libraries.BuiltIn import BuiltIn from .Constants import Constants -from .CpuUtils import CpuUtils from .DropRateSearch import DropRateSearch from .MLRsearch.AbstractMeasurer import AbstractMeasurer from .MLRsearch.MultipleLossRatioSearch import MultipleLossRatioSearch @@ -30,6 +30,8 @@ from .ssh import exec_cmd_no_error, exec_cmd from .topology import NodeType from .topology import NodeSubTypeTG from .topology import Topology +from .TRexConfigGenerator import TrexConfig +from .DUTSetup import DUTSetup as DS __all__ = [u"TGDropRateSearchImpl", u"TrafficGenerator", u"OptimizedSearch"] @@ -127,18 +129,13 @@ class TrexMode: STL = u"STL" -# TODO: Pylint says too-many-instance-attributes. class TrafficGenerator(AbstractMeasurer): """Traffic Generator.""" - # TODO: Remove "trex" from lines which could work with other TGs. - # Use one instance of TrafficGenerator for all tests in test suite ROBOT_LIBRARY_SCOPE = u"TEST SUITE" def __init__(self): - # TODO: Separate into few dataclasses/dicts. - # Pylint dislikes large unstructured state, and it is right. self._node = None self._mode = None # TG interface order mapping @@ -177,8 +174,7 @@ class TrafficGenerator(AbstractMeasurer): self.ramp_up_duration = None self.state_timeout = None # Transient data needed for async measurements. - self._xstats = (None, None) - # TODO: Rename "xstats" to something opaque, so T-Rex is not privileged? + self._xstats = () @property def node(self): @@ -251,101 +247,97 @@ class TrafficGenerator(AbstractMeasurer): f"{self._node[u'subtype']} not running in {expected_mode} mode!" ) - # TODO: pylint says disable=too-many-locals. - def initialize_traffic_generator( - self, tg_node, tg_if1, tg_if2, tg_if1_adj_node, tg_if1_adj_if, - tg_if2_adj_node, tg_if2_adj_if, osi_layer, tg_if1_dst_mac=None, - tg_if2_dst_mac=None): - """TG initialization. + @staticmethod + def get_tg_type(tg_node): + """Log and return the installed traffic generator type. + + :param tg_node: Node from topology file. + :type tg_node: dict + :returns: Traffic generator type string. + :rtype: str + :raises RuntimeError: If command returns nonzero return code. + """ + return str(check_subtype(tg_node)) - TODO: Document why do we need (and how do we use) _ifaces_reordered. + @staticmethod + def get_tg_version(tg_node): + """Log and return the installed traffic generator version. - :param tg_node: Traffic generator node. - :param tg_if1: TG - name of first interface. - :param tg_if2: TG - name of second interface. - :param tg_if1_adj_node: TG if1 adjecent node. - :param tg_if1_adj_if: TG if1 adjecent interface. - :param tg_if2_adj_node: TG if2 adjecent node. - :param tg_if2_adj_if: TG if2 adjecent interface. - :param osi_layer: 'L2', 'L3' or 'L7' - OSI Layer testing type. - :param tg_if1_dst_mac: Interface 1 destination MAC address. - :param tg_if2_dst_mac: Interface 2 destination MAC address. + :param tg_node: Node from topology file. :type tg_node: dict - :type tg_if1: str - :type tg_if2: str - :type tg_if1_adj_node: dict - :type tg_if1_adj_if: str - :type tg_if2_adj_node: dict - :type tg_if2_adj_if: str - :type osi_layer: str - :type tg_if1_dst_mac: str - :type tg_if2_dst_mac: str - :returns: nothing - :raises RuntimeError: In case of issue during initialization. + :returns: Traffic generator version string. + :rtype: str + :raises RuntimeError: If command returns nonzero return code. """ subtype = check_subtype(tg_node) if subtype == NodeSubTypeTG.TREX: - self._node = tg_node - self._mode = TrexMode.ASTF if osi_layer == u"L7" else TrexMode.STL - if1 = dict() - if2 = dict() - if1[u"pci"] = Topology().get_interface_pci_addr(self._node, tg_if1) - if2[u"pci"] = Topology().get_interface_pci_addr(self._node, tg_if2) - if1[u"addr"] = Topology().get_interface_mac(self._node, tg_if1) - if2[u"addr"] = Topology().get_interface_mac(self._node, tg_if2) - - if osi_layer == u"L2": - if1[u"adj_addr"] = if2[u"addr"] - if2[u"adj_addr"] = if1[u"addr"] - elif osi_layer in (u"L3", u"L7"): - if1[u"adj_addr"] = Topology().get_interface_mac( - tg_if1_adj_node, tg_if1_adj_if - ) - if2[u"adj_addr"] = Topology().get_interface_mac( - tg_if2_adj_node, tg_if2_adj_if - ) - else: - raise ValueError(u"Unknown OSI layer!") + command = f"cat {Constants.TREX_INSTALL_DIR}/VERSION" + message = u"Get T-Rex version failed!" + stdout, _ = exec_cmd_no_error(tg_node, command, message=message) + return stdout.strip() + else: + return "none" - # in case of switched environment we can override MAC addresses - if tg_if1_dst_mac is not None and tg_if2_dst_mac is not None: - if1[u"adj_addr"] = tg_if1_dst_mac - if2[u"adj_addr"] = tg_if2_dst_mac + def initialize_traffic_generator(self, osi_layer, parallel_links=1): + """TG initialization. - if min(if1[u"pci"], if2[u"pci"]) != if1[u"pci"]: - if1, if2 = if2, if1 - self._ifaces_reordered = True + :param osi_layer: 'L2', 'L3' or 'L7' - OSI Layer testing type. + :param parallel_links: Number of parallel links to configure. + :type osi_layer: str + :type parallel_links: int + :raises ValueError: If OSI layer is unknown. + """ + if osi_layer not in ("L2", "L3", "L7"): + raise ValueError("Unknown OSI layer!") - master_thread_id, latency_thread_id, socket, threads = \ - CpuUtils.get_affinity_trex( - self._node, tg_if1, tg_if2, - tg_dtc=Constants.TREX_CORE_COUNT) + topology = BuiltIn().get_variable_value("&{topology_info}") + self._node = topology["TG"] + subtype = check_subtype(self._node) - if osi_layer in (u"L2", u"L3", u"L7"): - exec_cmd_no_error( - self._node, - f"sh -c 'cat << EOF > /etc/trex_cfg.yaml\n" - f"- version: 2\n" - f" c: {len(threads)}\n" - f" limit_memory: {Constants.TREX_LIMIT_MEMORY}\n" - f" interfaces: [\"{if1[u'pci']}\",\"{if2[u'pci']}\"]\n" - f" port_info:\n" - f" - dest_mac: \'{if1[u'adj_addr']}\'\n" - f" src_mac: \'{if1[u'addr']}\'\n" - f" - dest_mac: \'{if2[u'adj_addr']}\'\n" - f" src_mac: \'{if2[u'addr']}\'\n" - f" platform :\n" - f" master_thread_id: {master_thread_id}\n" - f" latency_thread_id: {latency_thread_id}\n" - f" dual_if:\n" - f" - socket: {socket}\n" - f" threads: {threads}\n" - f"EOF'", - sudo=True, message=u"T-Rex config generation!" - ) - else: - raise ValueError(u"Unknown OSI layer!") + if subtype == NodeSubTypeTG.TREX: + trex_topology = list() + self._mode = TrexMode.ASTF if osi_layer == "L7" else TrexMode.STL + + for l in range(1, parallel_links*2, 2): + tg_if1_adj_addr = topology[f"TG_pf{l+1}_mac"][0] + tg_if2_adj_addr = topology[f"TG_pf{l}_mac"][0] + if osi_layer in ("L3", "L7") and "DUT1" in topology.keys(): + ifl = BuiltIn().get_variable_value("${int}") + last = topology["duts_count"] + tg_if1_adj_addr = Topology().get_interface_mac( + topology["DUT1"], + BuiltIn().get_variable_value( + f"${{DUT1_{ifl}{l}}}[0]" + ) + ) + tg_if2_adj_addr = Topology().get_interface_mac( + topology[f"DUT{last}"], + BuiltIn().get_variable_value( + f"${{DUT{last}_{ifl}{l+1}}}[0]" + ) + ) + trex_topology.append( + dict( + interface=topology[f"TG_pf{l}"][0], + dst_mac=tg_if1_adj_addr + ) + ) + trex_topology.append( + dict( + interface=topology[f"TG_pf{l+1}"][0], + dst_mac=tg_if2_adj_addr + ) + ) + if1_pci = topology[f"TG_pf{l}_pci"][0] + if2_pci = topology[f"TG_pf{l+1}_pci"][0] + if min(if1_pci, if2_pci) != if1_pci: + self._ifaces_reordered = True + trex_topology.reverse() + + TrexConfig.add_startup_configuration( + self._node, trex_topology + ) TrafficGenerator.startup_trex( self._node, osi_layer, subtype=subtype ) @@ -373,18 +365,27 @@ class TrafficGenerator(AbstractMeasurer): tg_node, cmd, sudo=True, message=u"Kill TRex failed!" ) - # Configure TRex. - ports = '' + # Prepare interfaces for TRex. + tg_port_drv = Constants.TREX_PORT_DRIVER + mlx_driver = u"" for port in tg_node[u"interfaces"].values(): - if u'Mellanox' not in port.get(u'model'): - ports += f" {port.get(u'pci_address')}" - - cmd = f"sh -c \"cd {Constants.TREX_INSTALL_DIR}/scripts/ && " \ - f"./dpdk_nic_bind.py -u {ports} || true\"" - exec_cmd_no_error( - tg_node, cmd, sudo=True, - message=u"Unbind PCI ports from driver failed!" - ) + if u"Mellanox" in port.get(u"model"): + mlx_driver = port.get(u"driver") + pci_addr = port.get(u'pci_address') + cur_driver = DS.get_pci_dev_driver(tg_node, pci_addr) + if cur_driver == mlx_driver: + pass + elif not cur_driver: + DS.pci_driver_bind(tg_node, pci_addr, mlx_driver) + else: + DS.pci_driver_unbind(tg_node, pci_addr) + DS.pci_driver_bind(tg_node, pci_addr, mlx_driver) + else: + pci_addr = port.get(u'pci_address') + cur_driver = DS.get_pci_dev_driver(tg_node, pci_addr) + if cur_driver: + DS.pci_driver_unbind(tg_node, pci_addr) + DS.pci_driver_bind(tg_node, pci_addr, tg_port_drv) # Start TRex. cd_cmd = f"cd '{Constants.TREX_INSTALL_DIR}/scripts/'" @@ -498,11 +499,11 @@ class TrafficGenerator(AbstractMeasurer): command_line = OptionString().add(u"python3") dirname = f"{Constants.REMOTE_FW_DIR}/GPL/tools/trex" command_line.add(f"'{dirname}/trex_stl_stop.py'") - command_line.change_prefix(u"--") + command_line.add("--xstat") for index, value in enumerate(self._xstats): if value is not None: - value = value.replace(u"'", u"\"") - command_line.add_equals(f"xstat{index}", f"'{value}'") + value = value.replace("'", "\"") + command_line.add(f"'{value}'") stdout, _ = exec_cmd_no_error( node, command_line, message=u"T-Rex STL runtime error!" @@ -559,7 +560,6 @@ class TrafficGenerator(AbstractMeasurer): # so we can compare with what telemetry suggests # the real duration was. logger.debug(f"Expected duration {computed_duration}") - computed_duration += 0.1115 if not self.duration_limit: return computed_duration, True limited_duration = min(computed_duration, self.duration_limit) @@ -611,8 +611,6 @@ class TrafficGenerator(AbstractMeasurer): if not isinstance(duration, (float, int)): duration = float(duration) - # TODO: Refactor the code so duration is computed only once, - # and both the initial and the computed durations are logged. computed_duration, _ = self._compute_duration(duration, multiplier) command_line = OptionString().add(u"python3") @@ -625,6 +623,9 @@ class TrafficGenerator(AbstractMeasurer): ) command_line.add_with_value(u"duration", f"{computed_duration!r}") command_line.add_with_value(u"frame_size", self.frame_size) + command_line.add_with_value( + u"n_data_frames", Constants.ASTF_N_DATA_FRAMES + ) command_line.add_with_value(u"multiplier", multiplier) command_line.add_with_value(u"port_0", p_0) command_line.add_with_value(u"port_1", p_1) @@ -634,6 +635,9 @@ class TrafficGenerator(AbstractMeasurer): command_line.add_if(u"async_start", async_call) command_line.add_if(u"latency", self.use_latency) command_line.add_if(u"force", Constants.TREX_SEND_FORCE) + command_line.add_with_value( + u"delay", Constants.PERF_TRIAL_ASTF_DELAY + ) self._start_time = time.monotonic() self._rate = multiplier @@ -650,7 +654,7 @@ class TrafficGenerator(AbstractMeasurer): self._sent = None self._loss = None self._latency = None - xstats = [None, None] + xstats = [] self._l7_data = dict() self._l7_data[u"client"] = dict() self._l7_data[u"client"][u"active_flows"] = None @@ -683,10 +687,8 @@ class TrafficGenerator(AbstractMeasurer): index = 0 for line in stdout.splitlines(): if f"Xstats snapshot {index}: " in line: - xstats[index] = line[19:] + xstats.append(line[19:]) index += 1 - if index == 2: - break self._xstats = tuple(xstats) else: self._target_duration = duration @@ -718,8 +720,6 @@ class TrafficGenerator(AbstractMeasurer): if not isinstance(duration, (float, int)): duration = float(duration) - # TODO: Refactor the code so duration is computed only once, - # and both the initial and the computed durations are logged. duration, _ = self._compute_duration(duration=duration, multiplier=rate) command_line = OptionString().add(u"python3") @@ -741,8 +741,8 @@ class TrafficGenerator(AbstractMeasurer): command_line.add_if(u"async_start", async_call) command_line.add_if(u"latency", self.use_latency) command_line.add_if(u"force", Constants.TREX_SEND_FORCE) + command_line.add_with_value(u"delay", Constants.PERF_TRIAL_STL_DELAY) - # TODO: This is ugly. Handle parsing better. self._start_time = time.monotonic() self._rate = float(rate[:-3]) if u"pps" in rate else float(rate) stdout, _ = exec_cmd_no_error( @@ -759,14 +759,12 @@ class TrafficGenerator(AbstractMeasurer): self._loss = None self._latency = None - xstats = [None, None] + xstats = [] index = 0 for line in stdout.splitlines(): if f"Xstats snapshot {index}: " in line: - xstats[index] = line[19:] + xstats.append(line[19:]) index += 1 - if index == 2: - break self._xstats = tuple(xstats) else: self._target_duration = duration @@ -789,7 +787,7 @@ class TrafficGenerator(AbstractMeasurer): use_latency=False, ramp_up_rate=None, ramp_up_duration=None, - state_timeout=300.0, + state_timeout=240.0, ramp_up_only=False, ): """Send traffic from all configured interfaces on TG. @@ -914,7 +912,6 @@ class TrafficGenerator(AbstractMeasurer): ) elif u"trex-stl" in self.traffic_profile: unit_rate_str = str(rate) + u"pps" - # TODO: Suport transaction_scale et al? self.trex_stl_start_remote_exec( duration, unit_rate_str, async_call ) @@ -961,7 +958,6 @@ class TrafficGenerator(AbstractMeasurer): complete = False if self.ramp_up_rate: # Figure out whether we need to insert a ramp-up trial. - # TODO: Give up on async_call=True? if ramp_up_only or self.ramp_up_start is None: # We never ramped up yet (at least not in this test case). ramp_up_needed = True @@ -1033,8 +1029,6 @@ class TrafficGenerator(AbstractMeasurer): def fail_if_no_traffic_forwarded(self): """Fail if no traffic forwarded. - TODO: Check number of passed transactions instead. - :returns: nothing :raises Exception: If no traffic forwarded. """ @@ -1193,9 +1187,7 @@ class TrafficGenerator(AbstractMeasurer): The target_tr field of ReceiveRateMeasurement is in transactions per second. Transmit count and loss count units depend on the transaction type. Usually they are in transactions - per second, or aggregate packets per second. - - TODO: Fail on running or already reported measurement. + per second, or aggregated packets per second. :returns: Structure containing the result of the measurement. :rtype: ReceiveRateMeasurement @@ -1226,16 +1218,27 @@ class TrafficGenerator(AbstractMeasurer): if not target_duration: target_duration = approximated_duration transmit_rate = self._rate + unsent = 0 if self.transaction_type == u"packet": partial_attempt_count = self._sent - expected_attempt_count = self._sent - fail_count = self._loss + packet_rate = transmit_rate * self.ppta + # We have a float. TRex way of rounding it is not obvious. + # The biggest source of mismatch is Inter Stream Gap. + # So the code tolerates 10 usec of missing packets. + expected_attempt_count = (target_duration - 1e-5) * packet_rate + expected_attempt_count = math.ceil(expected_attempt_count) + # TRex can send more. + expected_attempt_count = max(expected_attempt_count, self._sent) + unsent = expected_attempt_count - self._sent + pass_count = self._received + fail_count = expected_attempt_count - pass_count elif self.transaction_type == u"udp_cps": if not self.transaction_scale: raise RuntimeError(u"Add support for no-limit udp_cps.") partial_attempt_count = self._l7_data[u"client"][u"sent"] # We do not care whether TG is slow, it should have attempted all. expected_attempt_count = self.transaction_scale + unsent = expected_attempt_count - partial_attempt_count pass_count = self._l7_data[u"client"][u"received"] fail_count = expected_attempt_count - pass_count elif self.transaction_type == u"tcp_cps": @@ -1245,6 +1248,7 @@ class TrafficGenerator(AbstractMeasurer): partial_attempt_count = ctca # We do not care whether TG is slow, it should have attempted all. expected_attempt_count = self.transaction_scale + unsent = expected_attempt_count - partial_attempt_count # From TCP point of view, server/connects counts full connections, # but we are testing NAT session so client/connects counts that # (half connections from TCP point of view). @@ -1255,7 +1259,8 @@ class TrafficGenerator(AbstractMeasurer): raise RuntimeError(u"Add support for no-limit udp_pps.") partial_attempt_count = self._sent expected_attempt_count = self.transaction_scale * self.ppta - fail_count = self._loss + (expected_attempt_count - self._sent) + unsent = expected_attempt_count - self._sent + fail_count = self._loss + unsent elif self.transaction_type == u"tcp_pps": if not self.transaction_scale: raise RuntimeError(u"Add support for no-limit tcp_pps.") @@ -1268,9 +1273,13 @@ class TrafficGenerator(AbstractMeasurer): # A simple workaround is to add absolute difference. # Probability of retransmissions exactly cancelling # packets unsent due to duration stretching is quite low. - fail_count = self._loss + abs(expected_attempt_count - self._sent) + unsent = abs(expected_attempt_count - self._sent) + fail_count = self._loss + unsent else: raise RuntimeError(f"Unknown parsing {self.transaction_type!r}") + if unsent and isinstance(self._approximated_duration, float): + # Do not report unsent for "manual". + logger.debug(f"Unsent packets/transactions: {unsent}") if fail_count < 0 and not self.negative_loss: fail_count = 0 measurement = ReceiveRateMeasurement( @@ -1322,8 +1331,6 @@ class TrafficGenerator(AbstractMeasurer): if self.sleep_till_duration: sleeptime = time_stop - time.monotonic() if sleeptime > 0.0: - # TODO: Sometimes we have time to do additional trials here, - # adapt PLRsearch to accept all the results. time.sleep(sleeptime) return result @@ -1343,7 +1350,7 @@ class TrafficGenerator(AbstractMeasurer): use_latency=False, ramp_up_rate=None, ramp_up_duration=None, - state_timeout=300.0, + state_timeout=240.0, ): """Store values accessed by measure(). @@ -1364,7 +1371,6 @@ class TrafficGenerator(AbstractMeasurer): :param transaction_type: An identifier specifying which counters and formulas to use when computing attempted and failed transactions. Default: "packet". - TODO: Does this also specify parsing for the measured duration? :param duration_limit: Zero or maximum limit for computed (or given) duration. :param negative_loss: If false, negative loss is reported as zero loss. @@ -1412,7 +1418,7 @@ class OptimizedSearch: """Class to be imported as Robot Library, containing search keywords. Aside of setting up measurer and forwarding arguments, - the main business is to translate min/max rate from unidir to aggregate. + the main business is to translate min/max rate from unidir to aggregated. """ @staticmethod @@ -1426,7 +1432,7 @@ class OptimizedSearch: final_trial_duration=30.0, initial_trial_duration=1.0, number_of_intermediate_phases=2, - timeout=720.0, + timeout=1200.0, ppta=1, resetter=None, traffic_directions=2, @@ -1436,7 +1442,7 @@ class OptimizedSearch: use_latency=False, ramp_up_rate=None, ramp_up_duration=None, - state_timeout=300.0, + state_timeout=240.0, expansion_coefficient=4.0, ): """Setup initialized TG, perform optimized search, return intervals. @@ -1513,8 +1519,6 @@ class OptimizedSearch: u"resources.libraries.python.TrafficGenerator" ) # Overrides for fixed transaction amount. - # TODO: Move to robot code? We have two call sites, so this saves space, - # even though this is surprising for log readers. if transaction_scale: initial_trial_duration = 1.0 final_trial_duration = 1.0 @@ -1577,7 +1581,7 @@ class OptimizedSearch: use_latency=False, ramp_up_rate=None, ramp_up_duration=None, - state_timeout=300.0, + state_timeout=240.0, ): """Setup initialized TG, perform soak search, return avg and stdev. @@ -1635,18 +1639,14 @@ class OptimizedSearch: :type ramp_up_rate: float :type ramp_up_duration: float :type state_timeout: float - :returns: Average and stdev of estimated aggregate rate giving PLR. + :returns: Average and stdev of estimated aggregated rate giving PLR. :rtype: 2-tuple of float """ tg_instance = BuiltIn().get_library_instance( u"resources.libraries.python.TrafficGenerator" ) # Overrides for fixed transaction amount. - # TODO: Move to robot code? We have a single call site - # but MLRsearch has two and we want the two to be used similarly. if transaction_scale: - # TODO: What is a good value for max scale? - # TODO: Scale the timeout with transaction scale. timeout = 7200.0 tg_instance.set_rate_provider_defaults( frame_size=frame_size,