UTI: Normalize trending data

[csit.git] / resources / libraries / python / TrafficGenerator.py
diff --git a/resources/libraries/python/TrafficGenerator.py b/resources/libraries/python/TrafficGenerator.py

index 30be3b9..2a28896 100644 (file)
--- a/resources/libraries/python/TrafficGenerator.py
+++ b/resources/libraries/python/TrafficGenerator.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2021 Cisco and/or its affiliates.
+# Copyright (c) 2022 Cisco and/or its affiliates.
  # Licensed under the Apache License, Version 2.0 (the "License");
  # you may not use this file except in compliance with the License.
  # You may obtain a copy of the License at:
@@ -13,6 +13,7 @@
  
  """Performance testing traffic generator library."""
  
+import math
  import time
  
  from robot.api import logger
@@ -251,7 +252,38 @@ class TrafficGenerator(AbstractMeasurer):
              f"{self._node[u'subtype']} not running in {expected_mode} mode!"
          )
  
-    # TODO: pylint says disable=too-many-locals.
+    @staticmethod
+    def get_tg_type(tg_node):
+        """Log and return the installed traffic generator type.
+
+        :param tg_node: Node from topology file.
+        :type tg_node: dict
+        :returns: Traffic generator type string.
+        :rtype: str
+        :raises RuntimeError: If command returns nonzero return code.
+        """
+        return str(check_subtype(tg_node))
+
+    @staticmethod
+    def get_tg_version(tg_node):
+        """Log and return the installed traffic generator version.
+
+        :param tg_node: Node from topology file.
+        :type tg_node: dict
+        :returns: Traffic generator version string.
+        :rtype: str
+        :raises RuntimeError: If command returns nonzero return code.
+        """
+        subtype = check_subtype(tg_node)
+        if subtype == NodeSubTypeTG.TREX:
+            command = f"cat {Constants.TREX_INSTALL_DIR}/VERSION"
+            message = u"Get T-Rex version failed!"
+            stdout, _ = exec_cmd_no_error(tg_node, command, message=message)
+            return stdout.strip()
+        else:
+            return "none"
+
+    # TODO: pylint disable=too-many-locals.
      def initialize_traffic_generator(
              self, tg_node, tg_if1, tg_if2, tg_if1_adj_node, tg_if1_adj_if,
              tg_if2_adj_node, tg_if2_adj_if, osi_layer, tg_if1_dst_mac=None,
@@ -343,6 +375,24 @@ class TrafficGenerator(AbstractMeasurer):
                      f"EOF'",
                      sudo=True, message=u"T-Rex config generation!"
                  )
+
+                if Constants.TREX_RX_DESCRIPTORS_COUNT != 0:
+                    exec_cmd_no_error(
+                        self._node,
+                        f"sh -c 'cat << EOF >> /etc/trex_cfg.yaml\n"
+                        f"  rx_desc: {Constants.TREX_RX_DESCRIPTORS_COUNT}\n"
+                        f"EOF'",
+                        sudo=True, message=u"T-Rex rx_desc modification!"
+                    )
+
+                if Constants.TREX_TX_DESCRIPTORS_COUNT != 0:
+                    exec_cmd_no_error(
+                        self._node,
+                        f"sh -c 'cat << EOF >> /etc/trex_cfg.yaml\n"
+                        f"  tx_desc: {Constants.TREX_TX_DESCRIPTORS_COUNT}\n"
+                        f"EOF'",
+                        sudo=True, message=u"T-Rex tx_desc modification!"
+                    )
              else:
                  raise ValueError(u"Unknown OSI layer!")
  
@@ -373,18 +423,37 @@ class TrafficGenerator(AbstractMeasurer):
                      tg_node, cmd, sudo=True, message=u"Kill TRex failed!"
                  )
  
-                # Configure TRex.
-                ports = ''
+                # Prepare interfaces for TRex.
+                mlx_ports = u""
+                mlx_driver = u""
+                itl_ports = u""
                  for port in tg_node[u"interfaces"].values():
-                    if u'Mellanox' not in port.get(u'model'):
-                        ports += f" {port.get(u'pci_address')}"
-
-                cmd = f"sh -c \"cd {Constants.TREX_INSTALL_DIR}/scripts/ && " \
-                    f"./dpdk_nic_bind.py -u {ports} || true\""
-                exec_cmd_no_error(
-                    tg_node, cmd, sudo=True,
-                    message=u"Unbind PCI ports from driver failed!"
-                )
+                    if u"Mellanox" in port.get(u"model"):
+                        mlx_ports += f" {port.get(u'pci_address')}"
+                        mlx_driver = port.get(u"driver")
+                    if u"Intel" in port.get(u"model"):
+                        itl_ports += f" {port.get(u'pci_address')}"
+
+                if itl_ports:
+                    cmd = (
+                        f"sh -c \"cd {Constants.TREX_INSTALL_DIR}/scripts/ && ",
+                        f"./dpdk_nic_bind.py -u {itl_ports} || ",
+                        f"true\""
+                    )
+                    exec_cmd_no_error(
+                        tg_node, cmd, sudo=True,
+                        message=u"Unbind PCI ports from driver failed!"
+                    )
+                if mlx_ports:
+                    cmd = (
+                        f"sh -c \"cd {Constants.TREX_INSTALL_DIR}/scripts/ && ",
+                        f"./dpdk_nic_bind.py -b {mlx_driver} {mlx_ports} || ",
+                        f"true\""
+                    )
+                    exec_cmd_no_error(
+                        tg_node, cmd, sudo=True,
+                        message=u"Bind PCI ports from driver failed!"
+                    )
  
                  # Start TRex.
                  cd_cmd = f"cd '{Constants.TREX_INSTALL_DIR}/scripts/'"
@@ -559,7 +628,6 @@ class TrafficGenerator(AbstractMeasurer):
              # so we can compare with what telemetry suggests
              # the real duration was.
              logger.debug(f"Expected duration {computed_duration}")
-            computed_duration += 0.1115
          if not self.duration_limit:
              return computed_duration, True
          limited_duration = min(computed_duration, self.duration_limit)
@@ -625,6 +693,9 @@ class TrafficGenerator(AbstractMeasurer):
          )
          command_line.add_with_value(u"duration", f"{computed_duration!r}")
          command_line.add_with_value(u"frame_size", self.frame_size)
+        command_line.add_with_value(
+            u"n_data_frames", Constants.ASTF_N_DATA_FRAMES
+        )
          command_line.add_with_value(u"multiplier", multiplier)
          command_line.add_with_value(u"port_0", p_0)
          command_line.add_with_value(u"port_1", p_1)
@@ -634,6 +705,9 @@ class TrafficGenerator(AbstractMeasurer):
          command_line.add_if(u"async_start", async_call)
          command_line.add_if(u"latency", self.use_latency)
          command_line.add_if(u"force", Constants.TREX_SEND_FORCE)
+        command_line.add_with_value(
+            u"delay", Constants.PERF_TRIAL_ASTF_DELAY
+        )
  
          self._start_time = time.monotonic()
          self._rate = multiplier
@@ -741,6 +815,7 @@ class TrafficGenerator(AbstractMeasurer):
          command_line.add_if(u"async_start", async_call)
          command_line.add_if(u"latency", self.use_latency)
          command_line.add_if(u"force", Constants.TREX_SEND_FORCE)
+        command_line.add_with_value(u"delay", Constants.PERF_TRIAL_STL_DELAY)
  
          # TODO: This is ugly. Handle parsing better.
          self._start_time = time.monotonic()
@@ -789,7 +864,7 @@ class TrafficGenerator(AbstractMeasurer):
              use_latency=False,
              ramp_up_rate=None,
              ramp_up_duration=None,
-            state_timeout=300.0,
+            state_timeout=240.0,
              ramp_up_only=False,
          ):
          """Send traffic from all configured interfaces on TG.
@@ -962,8 +1037,8 @@ class TrafficGenerator(AbstractMeasurer):
          if self.ramp_up_rate:
              # Figure out whether we need to insert a ramp-up trial.
              # TODO: Give up on async_call=True?
-            if self.ramp_up_start is None:
-                # We never ramped up yet.
+            if ramp_up_only or self.ramp_up_start is None:
+                # We never ramped up yet (at least not in this test case).
                  ramp_up_needed = True
              else:
                  # We ramped up before, but maybe it was too long ago.
@@ -1040,7 +1115,7 @@ class TrafficGenerator(AbstractMeasurer):
          """
          if self._received is None:
              raise RuntimeError(u"The traffic generation has not been issued")
-        if self._received == u"0":
+        if self._received == 0:
              raise RuntimeError(u"No traffic forwarded")
  
      def partial_traffic_loss_accepted(
@@ -1193,7 +1268,7 @@ class TrafficGenerator(AbstractMeasurer):
          The target_tr field of ReceiveRateMeasurement is in
          transactions per second. Transmit count and loss count units
          depend on the transaction type. Usually they are in transactions
-        per second, or aggregate packets per second.
+        per second, or aggregated packets per second.
  
          TODO: Fail on running or already reported measurement.
  
@@ -1226,16 +1301,27 @@ class TrafficGenerator(AbstractMeasurer):
          if not target_duration:
              target_duration = approximated_duration
          transmit_rate = self._rate
+        unsent = 0
          if self.transaction_type == u"packet":
              partial_attempt_count = self._sent
-            expected_attempt_count = self._sent
-            fail_count = self._loss
+            packet_rate = transmit_rate * self.ppta
+            # We have a float. TRex way of rounding it is not obvious.
+            # The biggest source of mismatch is Inter Stream Gap.
+            # So the code tolerates 10 usec of missing packets.
+            expected_attempt_count = (target_duration - 1e-5) * packet_rate
+            expected_attempt_count = math.ceil(expected_attempt_count)
+            # TRex can send more.
+            expected_attempt_count = max(expected_attempt_count, self._sent)
+            unsent = expected_attempt_count - self._sent
+            pass_count = self._received
+            fail_count = expected_attempt_count - pass_count
          elif self.transaction_type == u"udp_cps":
              if not self.transaction_scale:
                  raise RuntimeError(u"Add support for no-limit udp_cps.")
              partial_attempt_count = self._l7_data[u"client"][u"sent"]
              # We do not care whether TG is slow, it should have attempted all.
              expected_attempt_count = self.transaction_scale
+            unsent = expected_attempt_count - partial_attempt_count
              pass_count = self._l7_data[u"client"][u"received"]
              fail_count = expected_attempt_count - pass_count
          elif self.transaction_type == u"tcp_cps":
@@ -1245,6 +1331,7 @@ class TrafficGenerator(AbstractMeasurer):
              partial_attempt_count = ctca
              # We do not care whether TG is slow, it should have attempted all.
              expected_attempt_count = self.transaction_scale
+            unsent = expected_attempt_count - partial_attempt_count
              # From TCP point of view, server/connects counts full connections,
              # but we are testing NAT session so client/connects counts that
              # (half connections from TCP point of view).
@@ -1255,7 +1342,8 @@ class TrafficGenerator(AbstractMeasurer):
                  raise RuntimeError(u"Add support for no-limit udp_pps.")
              partial_attempt_count = self._sent
              expected_attempt_count = self.transaction_scale * self.ppta
-            fail_count = self._loss + (expected_attempt_count - self._sent)
+            unsent = expected_attempt_count - self._sent
+            fail_count = self._loss + unsent
          elif self.transaction_type == u"tcp_pps":
              if not self.transaction_scale:
                  raise RuntimeError(u"Add support for no-limit tcp_pps.")
@@ -1268,9 +1356,13 @@ class TrafficGenerator(AbstractMeasurer):
              # A simple workaround is to add absolute difference.
              # Probability of retransmissions exactly cancelling
              # packets unsent due to duration stretching is quite low.
-            fail_count = self._loss + abs(expected_attempt_count - self._sent)
+            unsent = abs(expected_attempt_count - self._sent)
+            fail_count = self._loss + unsent
          else:
              raise RuntimeError(f"Unknown parsing {self.transaction_type!r}")
+        if unsent and isinstance(self._approximated_duration, float):
+            # Do not report unsent for "manual".
+            logger.debug(f"Unsent packets/transactions: {unsent}")
          if fail_count < 0 and not self.negative_loss:
              fail_count = 0
          measurement = ReceiveRateMeasurement(
@@ -1343,7 +1435,7 @@ class TrafficGenerator(AbstractMeasurer):
              use_latency=False,
              ramp_up_rate=None,
              ramp_up_duration=None,
-            state_timeout=300.0,
+            state_timeout=240.0,
          ):
          """Store values accessed by measure().
  
@@ -1412,7 +1504,7 @@ class OptimizedSearch:
      """Class to be imported as Robot Library, containing search keywords.
  
      Aside of setting up measurer and forwarding arguments,
-    the main business is to translate min/max rate from unidir to aggregate.
+    the main business is to translate min/max rate from unidir to aggregated.
      """
  
      @staticmethod
@@ -1426,8 +1518,7 @@ class OptimizedSearch:
              final_trial_duration=30.0,
              initial_trial_duration=1.0,
              number_of_intermediate_phases=2,
-            timeout=720.0,
-            doublings=1,
+            timeout=1200.0,
              ppta=1,
              resetter=None,
              traffic_directions=2,
@@ -1437,22 +1528,23 @@ class OptimizedSearch:
              use_latency=False,
              ramp_up_rate=None,
              ramp_up_duration=None,
-            state_timeout=300.0,
+            state_timeout=240.0,
+            expansion_coefficient=4.0,
      ):
          """Setup initialized TG, perform optimized search, return intervals.
  
-        If transaction_scale is nonzero, all non-init trial durations
-        are set to 2.0 (as they do not affect the real trial duration)
+        If transaction_scale is nonzero, all init and non-init trial durations
+        are set to 1.0 (as they do not affect the real trial duration)
          and zero intermediate phases are used.
-        The initial phase still uses 1.0 seconds, to force remeasurement.
-        That makes initial phase act as a warmup.
+        This way no re-measurement happens.
+        Warmup has to be handled via resetter or ramp-up mechanisms.
  
          :param frame_size: Frame size identifier or value [B].
          :param traffic_profile: Module name as a traffic profile identifier.
              See GPL/traffic_profiles/trex for implemented modules.
          :param minimum_transmit_rate: Minimal load in transactions per second.
          :param maximum_transmit_rate: Maximal load in transactions per second.
-        :param packet_loss_ratio: Fraction of packets lost, for PDR [1].
+        :param packet_loss_ratio: Ratio of packets lost, for PDR [1].
          :param final_relative_width: Final lower bound transmit rate
              cannot be more distant that this multiple of upper bound [1].
          :param final_trial_duration: Trial duration for the final phase [s].
@@ -1462,9 +1554,6 @@ class OptimizedSearch:
              to perform before the final phase [1].
          :param timeout: The search will fail itself when not finished
              before this overall time [s].
-        :param doublings: How many doublings to do in external search step.
-            Default 1 is suitable for fairly stable tests,
-            less stable tests might get better overal duration with 2 or more.
          :param ppta: Packets per transaction, aggregated over directions.
              Needed for udp_pps which does not have a good transaction counter,
              so we need to compute expected number of packets.
@@ -1483,6 +1572,7 @@ class OptimizedSearch:
          :param ramp_up_rate: Rate to use in ramp-up trials [pps].
          :param ramp_up_duration: Duration of ramp-up trials [s].
          :param state_timeout: Time of life of DUT state [s].
+        :param expansion_coefficient: In external search multiply width by this.
          :type frame_size: str or int
          :type traffic_profile: str
          :type minimum_transmit_rate: float
@@ -1493,7 +1583,6 @@ class OptimizedSearch:
          :type initial_trial_duration: float
          :type number_of_intermediate_phases: int
          :type timeout: float
-        :type doublings: int
          :type ppta: int
          :type resetter: Optional[Callable[[], None]]
          :type traffic_directions: int
@@ -1504,9 +1593,10 @@ class OptimizedSearch:
          :type ramp_up_rate: float
          :type ramp_up_duration: float
          :type state_timeout: float
+        :type expansion_coefficient: float
          :returns: Structure containing narrowed down NDR and PDR intervals
              and their measurements.
-        :rtype: NdrPdrResult
+        :rtype: List[Receiverateinterval]
          :raises RuntimeError: If total duration is larger than timeout.
          """
          # we need instance of TrafficGenerator instantiated by Robot Framework
@@ -1519,7 +1609,7 @@ class OptimizedSearch:
          #       even though this is surprising for log readers.
          if transaction_scale:
              initial_trial_duration = 1.0
-            final_trial_duration = 2.0
+            final_trial_duration = 1.0
              number_of_intermediate_phases = 0
              timeout += transaction_scale * 3e-4
          tg_instance.set_rate_provider_defaults(
@@ -1544,14 +1634,20 @@ class OptimizedSearch:
              number_of_intermediate_phases=number_of_intermediate_phases,
              initial_trial_duration=initial_trial_duration,
              timeout=timeout,
-            doublings=doublings,
+            debug=logger.debug,
+            expansion_coefficient=expansion_coefficient,
          )
-        result = algorithm.narrow_down_ndr_and_pdr(
+        if packet_loss_ratio:
+            packet_loss_ratios = [0.0, packet_loss_ratio]
+        else:
+            # Happens in reconf tests.
+            packet_loss_ratios = [packet_loss_ratio]
+        results = algorithm.narrow_down_intervals(
              min_rate=minimum_transmit_rate,
              max_rate=maximum_transmit_rate,
-            packet_loss_ratio=packet_loss_ratio,
+            packet_loss_ratios=packet_loss_ratios,
          )
-        return result
+        return results
  
      @staticmethod
      def perform_soak_search(
@@ -1573,7 +1669,7 @@ class OptimizedSearch:
              use_latency=False,
              ramp_up_rate=None,
              ramp_up_duration=None,
-            state_timeout=300.0,
+            state_timeout=240.0,
      ):
          """Setup initialized TG, perform soak search, return avg and stdev.
  
@@ -1582,7 +1678,7 @@ class OptimizedSearch:
              See GPL/traffic_profiles/trex for implemented modules.
          :param minimum_transmit_rate: Minimal load in transactions per second.
          :param maximum_transmit_rate: Maximal load in transactions per second.
-        :param plr_target: Fraction of packets lost to achieve [1].
+        :param plr_target: Ratio of packets lost to achieve [1].
          :param tdpt: Trial duration per trial.
              The algorithm linearly increases trial duration with trial number,
              this is the increment between succesive trials, in seconds.
@@ -1631,7 +1727,7 @@ class OptimizedSearch:
          :type ramp_up_rate: float
          :type ramp_up_duration: float
          :type state_timeout: float
-        :returns: Average and stdev of estimated aggregate rate giving PLR.
+        :returns: Average and stdev of estimated aggregated rate giving PLR.
          :rtype: 2-tuple of float
          """
          tg_instance = BuiltIn().get_library_instance(