feat(core): Multilink TRex Sync mode I.
[csit.git] / resources / libraries / python / TrafficGenerator.py
index a26d0fa..fa645c3 100644 (file)
@@ -1,4 +1,4 @@
-# Copyright (c) 2021 Cisco and/or its affiliates.
+# Copyright (c) 2023 Cisco and/or its affiliates.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at:
@@ -20,7 +20,6 @@ from robot.api import logger
 from robot.libraries.BuiltIn import BuiltIn
 
 from .Constants import Constants
-from .CpuUtils import CpuUtils
 from .DropRateSearch import DropRateSearch
 from .MLRsearch.AbstractMeasurer import AbstractMeasurer
 from .MLRsearch.MultipleLossRatioSearch import MultipleLossRatioSearch
@@ -31,6 +30,8 @@ from .ssh import exec_cmd_no_error, exec_cmd
 from .topology import NodeType
 from .topology import NodeSubTypeTG
 from .topology import Topology
+from .TRexConfigGenerator import TrexConfig
+from .DUTSetup import DUTSetup as DS
 
 __all__ = [u"TGDropRateSearchImpl", u"TrafficGenerator", u"OptimizedSearch"]
 
@@ -128,18 +129,13 @@ class TrexMode:
     STL = u"STL"
 
 
-# TODO: Pylint says too-many-instance-attributes.
 class TrafficGenerator(AbstractMeasurer):
     """Traffic Generator."""
 
-    # TODO: Remove "trex" from lines which could work with other TGs.
-
     # Use one instance of TrafficGenerator for all tests in test suite
     ROBOT_LIBRARY_SCOPE = u"TEST SUITE"
 
     def __init__(self):
-        # TODO: Separate into few dataclasses/dicts.
-        #       Pylint dislikes large unstructured state, and it is right.
         self._node = None
         self._mode = None
         # TG interface order mapping
@@ -178,8 +174,7 @@ class TrafficGenerator(AbstractMeasurer):
         self.ramp_up_duration = None
         self.state_timeout = None
         # Transient data needed for async measurements.
-        self._xstats = (None, None)
-        # TODO: Rename "xstats" to something opaque, so T-Rex is not privileged?
+        self._xstats = ()
 
     @property
     def node(self):
@@ -252,119 +247,96 @@ class TrafficGenerator(AbstractMeasurer):
             f"{self._node[u'subtype']} not running in {expected_mode} mode!"
         )
 
-    # TODO: pylint says disable=too-many-locals.
-    def initialize_traffic_generator(
-            self, tg_node, tg_if1, tg_if2, tg_if1_adj_node, tg_if1_adj_if,
-            tg_if2_adj_node, tg_if2_adj_if, osi_layer, tg_if1_dst_mac=None,
-            tg_if2_dst_mac=None):
-        """TG initialization.
+    @staticmethod
+    def get_tg_type(tg_node):
+        """Log and return the installed traffic generator type.
+
+        :param tg_node: Node from topology file.
+        :type tg_node: dict
+        :returns: Traffic generator type string.
+        :rtype: str
+        :raises RuntimeError: If command returns nonzero return code.
+        """
+        return str(check_subtype(tg_node))
 
-        TODO: Document why do we need (and how do we use) _ifaces_reordered.
+    @staticmethod
+    def get_tg_version(tg_node):
+        """Log and return the installed traffic generator version.
 
-        :param tg_node: Traffic generator node.
-        :param tg_if1: TG - name of first interface.
-        :param tg_if2: TG - name of second interface.
-        :param tg_if1_adj_node: TG if1 adjecent node.
-        :param tg_if1_adj_if: TG if1 adjecent interface.
-        :param tg_if2_adj_node: TG if2 adjecent node.
-        :param tg_if2_adj_if: TG if2 adjecent interface.
-        :param osi_layer: 'L2', 'L3' or 'L7' - OSI Layer testing type.
-        :param tg_if1_dst_mac: Interface 1 destination MAC address.
-        :param tg_if2_dst_mac: Interface 2 destination MAC address.
+        :param tg_node: Node from topology file.
         :type tg_node: dict
-        :type tg_if1: str
-        :type tg_if2: str
-        :type tg_if1_adj_node: dict
-        :type tg_if1_adj_if: str
-        :type tg_if2_adj_node: dict
-        :type tg_if2_adj_if: str
-        :type osi_layer: str
-        :type tg_if1_dst_mac: str
-        :type tg_if2_dst_mac: str
-        :returns: nothing
-        :raises RuntimeError: In case of issue during initialization.
+        :returns: Traffic generator version string.
+        :rtype: str
+        :raises RuntimeError: If command returns nonzero return code.
         """
         subtype = check_subtype(tg_node)
         if subtype == NodeSubTypeTG.TREX:
-            self._node = tg_node
-            self._mode = TrexMode.ASTF if osi_layer == u"L7" else TrexMode.STL
-            if1 = dict()
-            if2 = dict()
-            if1[u"pci"] = Topology().get_interface_pci_addr(self._node, tg_if1)
-            if2[u"pci"] = Topology().get_interface_pci_addr(self._node, tg_if2)
-            if1[u"addr"] = Topology().get_interface_mac(self._node, tg_if1)
-            if2[u"addr"] = Topology().get_interface_mac(self._node, tg_if2)
-
-            if osi_layer == u"L2":
-                if1[u"adj_addr"] = if2[u"addr"]
-                if2[u"adj_addr"] = if1[u"addr"]
-            elif osi_layer in (u"L3", u"L7"):
-                if1[u"adj_addr"] = Topology().get_interface_mac(
-                    tg_if1_adj_node, tg_if1_adj_if
-                )
-                if2[u"adj_addr"] = Topology().get_interface_mac(
-                    tg_if2_adj_node, tg_if2_adj_if
-                )
-            else:
-                raise ValueError(u"Unknown OSI layer!")
-
-            # in case of switched environment we can override MAC addresses
-            if tg_if1_dst_mac is not None and tg_if2_dst_mac is not None:
-                if1[u"adj_addr"] = tg_if1_dst_mac
-                if2[u"adj_addr"] = tg_if2_dst_mac
+            command = f"cat {Constants.TREX_INSTALL_DIR}/VERSION"
+            message = u"Get T-Rex version failed!"
+            stdout, _ = exec_cmd_no_error(tg_node, command, message=message)
+            return stdout.strip()
+        return "none"
 
-            if min(if1[u"pci"], if2[u"pci"]) != if1[u"pci"]:
-                if1, if2 = if2, if1
-                self._ifaces_reordered = True
+    def initialize_traffic_generator(self, osi_layer, parallel_links=1):
+        """TG initialization.
 
-            master_thread_id, latency_thread_id, socket, threads = \
-                CpuUtils.get_affinity_trex(
-                    self._node, tg_if1, tg_if2,
-                    tg_dtc=Constants.TREX_CORE_COUNT)
+        :param osi_layer: 'L2', 'L3' or 'L7' - OSI Layer testing type.
+        :param parallel_links: Number of parallel links to configure.
+        :type osi_layer: str
+        :type parallel_links: int
+        :raises ValueError: If OSI layer is unknown.
+        """
+        if osi_layer not in ("L2", "L3", "L7"):
+            raise ValueError("Unknown OSI layer!")
 
-            if osi_layer in (u"L2", u"L3", u"L7"):
-                exec_cmd_no_error(
-                    self._node,
-                    f"sh -c 'cat << EOF > /etc/trex_cfg.yaml\n"
-                    f"- version: 2\n"
-                    f"  c: {len(threads)}\n"
-                    f"  limit_memory: {Constants.TREX_LIMIT_MEMORY}\n"
-                    f"  interfaces: [\"{if1[u'pci']}\",\"{if2[u'pci']}\"]\n"
-                    f"  port_info:\n"
-                    f"      - dest_mac: \'{if1[u'adj_addr']}\'\n"
-                    f"        src_mac: \'{if1[u'addr']}\'\n"
-                    f"      - dest_mac: \'{if2[u'adj_addr']}\'\n"
-                    f"        src_mac: \'{if2[u'addr']}\'\n"
-                    f"  platform :\n"
-                    f"      master_thread_id: {master_thread_id}\n"
-                    f"      latency_thread_id: {latency_thread_id}\n"
-                    f"      dual_if:\n"
-                    f"          - socket: {socket}\n"
-                    f"            threads: {threads}\n"
-                    f"EOF'",
-                    sudo=True, message=u"T-Rex config generation!"
-                )
+        topology = BuiltIn().get_variable_value("&{topology_info}")
+        self._node = topology["TG"]
+        subtype = check_subtype(self._node)
 
-                if Constants.TREX_RX_DESCRIPTORS_COUNT != 0:
-                    exec_cmd_no_error(
-                        self._node,
-                        f"sh -c 'cat << EOF >> /etc/trex_cfg.yaml\n"
-                        f"  rx_desc: {Constants.TREX_RX_DESCRIPTORS_COUNT}\n"
-                        f"EOF'",
-                        sudo=True, message=u"T-Rex rx_desc modification!"
+        if subtype == NodeSubTypeTG.TREX:
+            trex_topology = list()
+            self._mode = TrexMode.ASTF if osi_layer == "L7" else TrexMode.STL
+
+            for link in range(1, parallel_links*2, 2):
+                tg_if1_adj_addr = topology[f"TG_pf{link+1}_mac"][0]
+                tg_if2_adj_addr = topology[f"TG_pf{link}_mac"][0]
+                if osi_layer in ("L3", "L7") and "DUT1" in topology.keys():
+                    ifl = BuiltIn().get_variable_value("${int}")
+                    last = topology["duts_count"]
+                    tg_if1_adj_addr = Topology().get_interface_mac(
+                        topology["DUT1"],
+                        BuiltIn().get_variable_value(
+                            f"${{DUT1_{ifl}{link}}}[0]"
+                        )
                     )
-
-                if Constants.TREX_TX_DESCRIPTORS_COUNT != 0:
-                    exec_cmd_no_error(
-                        self._node,
-                        f"sh -c 'cat << EOF >> /etc/trex_cfg.yaml\n"
-                        f"  tx_desc: {Constants.TREX_TX_DESCRIPTORS_COUNT}\n"
-                        f"EOF'",
-                        sudo=True, message=u"T-Rex tx_desc modification!"
+                    tg_if2_adj_addr = Topology().get_interface_mac(
+                        topology[f"DUT{last}"],
+                        BuiltIn().get_variable_value(
+                            f"${{DUT{last}_{ifl}{link+1}}}[0]"
+                        )
                     )
-            else:
-                raise ValueError(u"Unknown OSI layer!")
 
+                trex_topology.append(
+                    dict(
+                        interface=topology[f"TG_pf{link}"][0],
+                        dst_mac=tg_if1_adj_addr
+                    )
+                )
+                trex_topology.append(
+                    dict(
+                        interface=topology[f"TG_pf{link+1}"][0],
+                        dst_mac=tg_if2_adj_addr
+                    )
+                )
+                if1_pci = topology[f"TG_pf{link}_pci"][0]
+                if2_pci = topology[f"TG_pf{link+1}_pci"][0]
+                if min(if1_pci, if2_pci) != if1_pci:
+                    self._ifaces_reordered = True
+                    trex_topology.reverse()
+
+            TrexConfig.add_startup_configuration(
+                self._node, trex_topology
+            )
             TrafficGenerator.startup_trex(
                 self._node, osi_layer, subtype=subtype
             )
@@ -392,18 +364,27 @@ class TrafficGenerator(AbstractMeasurer):
                     tg_node, cmd, sudo=True, message=u"Kill TRex failed!"
                 )
 
-                # Configure TRex.
-                ports = ''
+                # Prepare interfaces for TRex.
+                tg_port_drv = Constants.TREX_PORT_DRIVER
+                mlx_driver = u""
                 for port in tg_node[u"interfaces"].values():
-                    if u'Mellanox' not in port.get(u'model'):
-                        ports += f" {port.get(u'pci_address')}"
-
-                cmd = f"sh -c \"cd {Constants.TREX_INSTALL_DIR}/scripts/ && " \
-                    f"./dpdk_nic_bind.py -u {ports} || true\""
-                exec_cmd_no_error(
-                    tg_node, cmd, sudo=True,
-                    message=u"Unbind PCI ports from driver failed!"
-                )
+                    if u"Mellanox" in port.get(u"model"):
+                        mlx_driver = port.get(u"driver")
+                        pci_addr = port.get(u'pci_address')
+                        cur_driver = DS.get_pci_dev_driver(tg_node, pci_addr)
+                        if cur_driver == mlx_driver:
+                            pass
+                        elif not cur_driver:
+                            DS.pci_driver_bind(tg_node, pci_addr, mlx_driver)
+                        else:
+                            DS.pci_driver_unbind(tg_node, pci_addr)
+                            DS.pci_driver_bind(tg_node, pci_addr, mlx_driver)
+                    else:
+                        pci_addr = port.get(u'pci_address')
+                        cur_driver = DS.get_pci_dev_driver(tg_node, pci_addr)
+                        if cur_driver:
+                            DS.pci_driver_unbind(tg_node, pci_addr)
+                        DS.pci_driver_bind(tg_node, pci_addr, tg_port_drv)
 
                 # Start TRex.
                 cd_cmd = f"cd '{Constants.TREX_INSTALL_DIR}/scripts/'"
@@ -517,11 +498,11 @@ class TrafficGenerator(AbstractMeasurer):
         command_line = OptionString().add(u"python3")
         dirname = f"{Constants.REMOTE_FW_DIR}/GPL/tools/trex"
         command_line.add(f"'{dirname}/trex_stl_stop.py'")
-        command_line.change_prefix(u"--")
-        for index, value in enumerate(self._xstats):
+        command_line.add("--xstat")
+        for value in self._xstats:
             if value is not None:
-                value = value.replace(u"'", u"\"")
-                command_line.add_equals(f"xstat{index}", f"'{value}'")
+                value = value.replace("'", "\"")
+                command_line.add(f"'{value}'")
         stdout, _ = exec_cmd_no_error(
             node, command_line,
             message=u"T-Rex STL runtime error!"
@@ -629,8 +610,6 @@ class TrafficGenerator(AbstractMeasurer):
         if not isinstance(duration, (float, int)):
             duration = float(duration)
 
-        # TODO: Refactor the code so duration is computed only once,
-        # and both the initial and the computed durations are logged.
         computed_duration, _ = self._compute_duration(duration, multiplier)
 
         command_line = OptionString().add(u"python3")
@@ -643,6 +622,9 @@ class TrafficGenerator(AbstractMeasurer):
         )
         command_line.add_with_value(u"duration", f"{computed_duration!r}")
         command_line.add_with_value(u"frame_size", self.frame_size)
+        command_line.add_with_value(
+            u"n_data_frames", Constants.ASTF_N_DATA_FRAMES
+        )
         command_line.add_with_value(u"multiplier", multiplier)
         command_line.add_with_value(u"port_0", p_0)
         command_line.add_with_value(u"port_1", p_1)
@@ -671,7 +653,7 @@ class TrafficGenerator(AbstractMeasurer):
             self._sent = None
             self._loss = None
             self._latency = None
-            xstats = [None, None]
+            xstats = []
             self._l7_data = dict()
             self._l7_data[u"client"] = dict()
             self._l7_data[u"client"][u"active_flows"] = None
@@ -704,10 +686,8 @@ class TrafficGenerator(AbstractMeasurer):
             index = 0
             for line in stdout.splitlines():
                 if f"Xstats snapshot {index}: " in line:
-                    xstats[index] = line[19:]
+                    xstats.append(line[19:])
                     index += 1
-                if index == 2:
-                    break
             self._xstats = tuple(xstats)
         else:
             self._target_duration = duration
@@ -739,8 +719,6 @@ class TrafficGenerator(AbstractMeasurer):
         if not isinstance(duration, (float, int)):
             duration = float(duration)
 
-        # TODO: Refactor the code so duration is computed only once,
-        # and both the initial and the computed durations are logged.
         duration, _ = self._compute_duration(duration=duration, multiplier=rate)
 
         command_line = OptionString().add(u"python3")
@@ -764,7 +742,6 @@ class TrafficGenerator(AbstractMeasurer):
         command_line.add_if(u"force", Constants.TREX_SEND_FORCE)
         command_line.add_with_value(u"delay", Constants.PERF_TRIAL_STL_DELAY)
 
-        # TODO: This is ugly. Handle parsing better.
         self._start_time = time.monotonic()
         self._rate = float(rate[:-3]) if u"pps" in rate else float(rate)
         stdout, _ = exec_cmd_no_error(
@@ -781,14 +758,12 @@ class TrafficGenerator(AbstractMeasurer):
             self._loss = None
             self._latency = None
 
-            xstats = [None, None]
+            xstats = []
             index = 0
             for line in stdout.splitlines():
                 if f"Xstats snapshot {index}: " in line:
-                    xstats[index] = line[19:]
+                    xstats.append(line[19:])
                     index += 1
-                if index == 2:
-                    break
             self._xstats = tuple(xstats)
         else:
             self._target_duration = duration
@@ -811,7 +786,7 @@ class TrafficGenerator(AbstractMeasurer):
             use_latency=False,
             ramp_up_rate=None,
             ramp_up_duration=None,
-            state_timeout=300.0,
+            state_timeout=240.0,
             ramp_up_only=False,
         ):
         """Send traffic from all configured interfaces on TG.
@@ -936,7 +911,6 @@ class TrafficGenerator(AbstractMeasurer):
                 )
             elif u"trex-stl" in self.traffic_profile:
                 unit_rate_str = str(rate) + u"pps"
-                # TODO: Suport transaction_scale et al?
                 self.trex_stl_start_remote_exec(
                     duration, unit_rate_str, async_call
                 )
@@ -983,7 +957,6 @@ class TrafficGenerator(AbstractMeasurer):
         complete = False
         if self.ramp_up_rate:
             # Figure out whether we need to insert a ramp-up trial.
-            # TODO: Give up on async_call=True?
             if ramp_up_only or self.ramp_up_start is None:
                 # We never ramped up yet (at least not in this test case).
                 ramp_up_needed = True
@@ -1055,8 +1028,6 @@ class TrafficGenerator(AbstractMeasurer):
     def fail_if_no_traffic_forwarded(self):
         """Fail if no traffic forwarded.
 
-        TODO: Check number of passed transactions instead.
-
         :returns: nothing
         :raises Exception: If no traffic forwarded.
         """
@@ -1215,9 +1186,7 @@ class TrafficGenerator(AbstractMeasurer):
         The target_tr field of ReceiveRateMeasurement is in
         transactions per second. Transmit count and loss count units
         depend on the transaction type. Usually they are in transactions
-        per second, or aggregate packets per second.
-
-        TODO: Fail on running or already reported measurement.
+        per second, or aggregated packets per second.
 
         :returns: Structure containing the result of the measurement.
         :rtype: ReceiveRateMeasurement
@@ -1361,8 +1330,6 @@ class TrafficGenerator(AbstractMeasurer):
         if self.sleep_till_duration:
             sleeptime = time_stop - time.monotonic()
             if sleeptime > 0.0:
-                # TODO: Sometimes we have time to do additional trials here,
-                # adapt PLRsearch to accept all the results.
                 time.sleep(sleeptime)
         return result
 
@@ -1382,7 +1349,7 @@ class TrafficGenerator(AbstractMeasurer):
             use_latency=False,
             ramp_up_rate=None,
             ramp_up_duration=None,
-            state_timeout=300.0,
+            state_timeout=240.0,
         ):
         """Store values accessed by measure().
 
@@ -1403,7 +1370,6 @@ class TrafficGenerator(AbstractMeasurer):
         :param transaction_type: An identifier specifying which counters
             and formulas to use when computing attempted and failed
             transactions. Default: "packet".
-            TODO: Does this also specify parsing for the measured duration?
         :param duration_limit: Zero or maximum limit for computed (or given)
             duration.
         :param negative_loss: If false, negative loss is reported as zero loss.
@@ -1451,7 +1417,7 @@ class OptimizedSearch:
     """Class to be imported as Robot Library, containing search keywords.
 
     Aside of setting up measurer and forwarding arguments,
-    the main business is to translate min/max rate from unidir to aggregate.
+    the main business is to translate min/max rate from unidir to aggregated.
     """
 
     @staticmethod
@@ -1465,7 +1431,7 @@ class OptimizedSearch:
             final_trial_duration=30.0,
             initial_trial_duration=1.0,
             number_of_intermediate_phases=2,
-            timeout=720.0,
+            timeout=1200.0,
             ppta=1,
             resetter=None,
             traffic_directions=2,
@@ -1475,7 +1441,7 @@ class OptimizedSearch:
             use_latency=False,
             ramp_up_rate=None,
             ramp_up_duration=None,
-            state_timeout=300.0,
+            state_timeout=240.0,
             expansion_coefficient=4.0,
     ):
         """Setup initialized TG, perform optimized search, return intervals.
@@ -1552,8 +1518,6 @@ class OptimizedSearch:
             u"resources.libraries.python.TrafficGenerator"
         )
         # Overrides for fixed transaction amount.
-        # TODO: Move to robot code? We have two call sites, so this saves space,
-        #       even though this is surprising for log readers.
         if transaction_scale:
             initial_trial_duration = 1.0
             final_trial_duration = 1.0
@@ -1616,7 +1580,7 @@ class OptimizedSearch:
             use_latency=False,
             ramp_up_rate=None,
             ramp_up_duration=None,
-            state_timeout=300.0,
+            state_timeout=240.0,
     ):
         """Setup initialized TG, perform soak search, return avg and stdev.
 
@@ -1674,18 +1638,14 @@ class OptimizedSearch:
         :type ramp_up_rate: float
         :type ramp_up_duration: float
         :type state_timeout: float
-        :returns: Average and stdev of estimated aggregate rate giving PLR.
+        :returns: Average and stdev of estimated aggregated rate giving PLR.
         :rtype: 2-tuple of float
         """
         tg_instance = BuiltIn().get_library_instance(
             u"resources.libraries.python.TrafficGenerator"
         )
         # Overrides for fixed transaction amount.
-        # TODO: Move to robot code? We have a single call site
-        #       but MLRsearch has two and we want the two to be used similarly.
         if transaction_scale:
-            # TODO: What is a good value for max scale?
-            # TODO: Scale the timeout with transaction scale.
             timeout = 7200.0
         tg_instance.set_rate_provider_defaults(
             frame_size=frame_size,