Revert "fix(jobspec): Delete ipsec nfv density tests"
[csit.git] / resources / libraries / python / ContainerUtils.py
index 6e413c6..fc32248 100644 (file)
@@ -1,4 +1,4 @@
-# Copyright (c) 2019 Cisco and/or its affiliates.
+# Copyright (c) 2024 Cisco and/or its affiliates.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at:
@@ -23,9 +23,11 @@ from robot.libraries.BuiltIn import BuiltIn
 
 from resources.libraries.python.Constants import Constants
 from resources.libraries.python.CpuUtils import CpuUtils
+from resources.libraries.python.PapiExecutor import PapiSocketExecutor
 from resources.libraries.python.ssh import SSH
 from resources.libraries.python.topology import Topology, SocketType
 from resources.libraries.python.VppConfigGenerator import VppConfigGenerator
+from resources.libraries.python.VPPUtil import VPPUtil
 
 
 __all__ = [
@@ -141,23 +143,52 @@ class ContainerManager:
             self.engine.container = self.containers[container]
             self.engine.execute(command)
 
-    def start_vpp_in_all_containers(self):
+    def start_vpp_in_all_containers(self, verify=True):
         """Start VPP in all containers."""
         for container in self.containers:
             self.engine.container = self.containers[container]
-            self.engine.start_vpp()
+            # For multiple containers, delayed verify is faster.
+            self.engine.start_vpp(verify=False)
+        if verify:
+            self.verify_vpp_in_all_containers()
 
-    def restart_vpp_in_all_containers(self):
+    def _disconnect_papi_to_all_containers(self):
+        """Disconnect any open PAPI connections to VPPs in containers.
+
+        The current PAPI implementation caches open connections,
+        so explicit disconnect is needed before VPP becomes inaccessible.
+
+        Currently this is a protected method, as restart, stop and destroy
+        are the only dangerous methods, and all are handled by ContainerManager.
+        """
+        for container_object in self.containers.values():
+            PapiSocketExecutor.disconnect_by_node_and_socket(
+                container_object.node,
+                container_object.api_socket,
+            )
+
+    def restart_vpp_in_all_containers(self, verify=True):
         """Restart VPP in all containers."""
+        self._disconnect_papi_to_all_containers()
         for container in self.containers:
             self.engine.container = self.containers[container]
-            self.engine.restart_vpp()
+            # For multiple containers, delayed verify is faster.
+            self.engine.restart_vpp(verify=False)
+        if verify:
+            self.verify_vpp_in_all_containers()
 
     def verify_vpp_in_all_containers(self):
         """Verify that VPP is installed and running in all containers."""
+        # For multiple containers, multiple fors are faster.
         for container in self.containers:
             self.engine.container = self.containers[container]
-            self.engine.verify_vpp()
+            self.engine.verify_vppctl()
+        for container in self.containers:
+            self.engine.container = self.containers[container]
+            self.engine.adjust_privileges()
+        for container in self.containers:
+            self.engine.container = self.containers[container]
+            self.engine.verify_vpp_papi()
 
     def configure_vpp_in_all_containers(self, chain_topology, **kwargs):
         """Configure VPP in all containers.
@@ -174,8 +205,8 @@ class ContainerManager:
         dut_cnt = len(
             Counter(
                 [
-                    self.containers[container].node[u"host"]
-                    for container in self.containers
+                    f"{container.node['host']}{container.node['port']}"
+                    for container in self.containers.values()
                 ]
             )
         )
@@ -225,6 +256,11 @@ class ContainerManager:
                 self._configure_vpp_chain_ipsec(
                     mid1=mid1, mid2=mid2, sid1=sid1, sid2=sid2,
                     guest_dir=guest_dir, nf_instance=idx, **kwargs)
+            elif chain_topology == u"chain_dma":
+                self._configure_vpp_chain_dma(
+                    mid1=mid1, mid2=mid2, sid1=sid1, sid2=sid2,
+                    guest_dir=guest_dir, **kwargs
+                )
             else:
                 raise RuntimeError(
                     f"Container topology {chain_topology} not implemented"
@@ -247,6 +283,25 @@ class ContainerManager:
             f"{self.engine.container.name}-{kwargs[u'sid2']}"
         )
 
+    def _configure_vpp_chain_dma(self, **kwargs):
+        """Configure VPP in chain topology with l2xc (dma).
+
+        :param kwargs: Named parameters.
+        :type kwargs: dict
+        """
+        dma_wqs = kwargs[f"dma_wqs"]
+        self.engine.create_vpp_startup_config_dma(dma_wqs)
+
+        self.engine.create_vpp_exec_config(
+            u"memif_create_chain_dma.exec",
+            mid1=kwargs[u"mid1"], mid2=kwargs[u"mid2"],
+            sid1=kwargs[u"sid1"], sid2=kwargs[u"sid2"],
+            socket1=f"{kwargs[u'guest_dir']}/memif-"
+            f"{self.engine.container.name}-{kwargs[u'sid1']}",
+            socket2=f"{kwargs[u'guest_dir']}/memif-"
+            f"{self.engine.container.name}-{kwargs[u'sid2']}"
+        )
+
     def _configure_vpp_cross_horiz(self, **kwargs):
         """Configure VPP in cross horizontal topology (single memif).
 
@@ -297,10 +352,10 @@ class ContainerManager:
         """
         self.engine.create_vpp_startup_config()
 
-        vif1_mac = kwargs[u"tg_if1_mac"] \
+        vif1_mac = kwargs[u"tg_pf1_mac"] \
             if (kwargs[u"mid1"] - 1) % kwargs[u"nodes"] + 1 == 1 \
             else f"52:54:00:00:{(kwargs[u'mid1'] - 1):02X}:02"
-        vif2_mac = kwargs[u"tg_if2_mac"] \
+        vif2_mac = kwargs[u"tg_pf2_mac"] \
             if (kwargs[u"mid2"] - 1) % kwargs[u"nodes"] + 1 == kwargs[u"nodes"]\
             else f"52:54:00:00:{(kwargs['mid2'] + 1):02X}:01"
         self.engine.create_vpp_exec_config(
@@ -332,11 +387,11 @@ class ContainerManager:
                 self.engine.container.node, kwargs[u"dut1_if2"])
             if_black_name = Topology.get_interface_name(
                 self.engine.container.node, kwargs[u"dut1_if1"])
-            tg_if_ip4 = kwargs[u"tg_if2_ip4"]
-            tg_if_mac = kwargs[u"tg_if2_mac"]
+            tg_pf_ip4 = kwargs[u"tg_pf2_ip4"]
+            tg_pf_mac = kwargs[u"tg_pf2_mac"]
         else:
-            tg_if_ip4 = kwargs[u"tg_if1_ip4"]
-            tg_if_mac = kwargs[u"tg_if1_mac"]
+            tg_pf_ip4 = kwargs[u"tg_pf1_ip4"]
+            tg_pf_mac = kwargs[u"tg_pf1_mac"]
             if1_pci = Topology.get_interface_pci_addr(
                 self.engine.container.node, kwargs[u"dut2_if1"])
             if2_pci = Topology.get_interface_pci_addr(
@@ -368,7 +423,7 @@ class ContainerManager:
                 f"create interface memif id {i} socket-id 2 master\n"
                 f"set interface state memif2/{i} up\n"
                 f"set interface l2 bridge memif2/{i} 2\n"
-                f"set ip arp memif2/{i} {tg_if_ip4} {tg_if_mac} "
+                f"set ip neighbor memif2/{i} {tg_pf_ip4} {tg_pf_mac} "
                 f"static\n\n"
             )
 
@@ -401,8 +456,8 @@ class ContainerManager:
             tnl_local_ip = f"{local_ip_base}.{nf_instance + 100}"
             tnl_remote_ip = f"{local_ip_base}.{nf_instance}"
             remote_ip_base = kwargs[u"dut1_if1_ip4"].rsplit(u".", 1)[0]
-            tg_if_ip4 = kwargs[u"tg_if1_ip4"]
-            tg_if_mac = kwargs[u"tg_if1_mac"]
+            tg_pf_ip4 = kwargs[u"tg_pf1_ip4"]
+            tg_pf_mac = kwargs[u"tg_pf1_mac"]
             raddr_ip4 = kwargs[u"laddr_ip4"]
             l_mac1 = 17
             l_mac2 = 18
@@ -411,8 +466,8 @@ class ContainerManager:
             tnl_local_ip = f"{local_ip_base}.{nf_instance}"
             tnl_remote_ip = f"{local_ip_base}.{nf_instance + 100}"
             remote_ip_base = kwargs[u"dut2_if2_ip4"].rsplit(u".", 1)[0]
-            tg_if_ip4 = kwargs[u"tg_if2_ip4"]
-            tg_if_mac = kwargs[u"tg_if2_mac"]
+            tg_pf_ip4 = kwargs[u"tg_pf2_ip4"]
+            tg_pf_mac = kwargs[u"tg_pf2_mac"]
             raddr_ip4 = kwargs[u"raddr_ip4"]
             l_mac1 = 1
             l_mac2 = 2
@@ -428,8 +483,8 @@ class ContainerManager:
             sid2=u"2",
             mac1=f"02:02:00:00:{l_mac1:02X}:{(nf_instance - 1):02X}",
             mac2=f"02:02:00:00:{l_mac2:02X}:{(nf_instance - 1):02X}",
-            tg_if2_ip4=tg_if_ip4,
-            tg_if2_mac=tg_if_mac,
+            tg_pf2_ip4=tg_pf_ip4,
+            tg_pf2_mac=tg_pf_mac,
             raddr_ip4=raddr_ip4,
             tnl_local_ip=tnl_local_ip,
             tnl_remote_ip=tnl_remote_ip,
@@ -455,10 +510,10 @@ class ContainerManager:
         role2 = u"master" if node == kwargs[u"nodes"] else u"slave"
         kwargs[u"mid2"] = kwargs[u"mid2"] \
             if node == kwargs[u"nodes"] else kwargs[u"mid2"] + 1
-        vif1_mac = kwargs[u"tg_if1_mac"] \
+        vif1_mac = kwargs[u"tg_pf1_mac"] \
             if (kwargs[u"mid1"] - 1) % kwargs[u"nodes"] + 1 == 1 \
             else f"52:54:00:00:{(kwargs[u'mid1'] - 1):02X}:02"
-        vif2_mac = kwargs[u"tg_if2_mac"] \
+        vif2_mac = kwargs[u"tg_pf2_mac"] \
             if (kwargs[u"mid2"] - 1) % kwargs[u"nodes"] + 1 == kwargs[u"nodes"]\
             else f"52:54:00:00:{(kwargs[u'mid2'] + 1):02X}:01"
         socket1 = f"{kwargs[u'guest_dir']}/memif-{self.engine.container.name}-"\
@@ -481,12 +536,16 @@ class ContainerManager:
 
     def stop_all_containers(self):
         """Stop all containers."""
+        # TODO: Rework if containers can be affected outside ContainerManager.
+        self._disconnect_papi_to_all_containers()
         for container in self.containers:
             self.engine.container = self.containers[container]
             self.engine.stop()
 
     def destroy_all_containers(self):
         """Destroy all containers."""
+        # TODO: Rework if containers can be affected outside ContainerManager.
+        self._disconnect_papi_to_all_containers()
         for container in self.containers:
             self.engine.container = self.containers[container]
             self.engine.destroy()
@@ -543,54 +602,105 @@ class ContainerEngine:
         """System info."""
         raise NotImplementedError
 
-    def start_vpp(self):
+    def start_vpp(self, verify=True):
         """Start VPP inside a container."""
         self.execute(
-            u"setsid /usr/bin/vpp -c /etc/vpp/startup.conf "
-            u">/tmp/vppd.log 2>&1 < /dev/null &")
+            u"/usr/bin/vpp -c /etc/vpp/startup.conf")
 
         topo_instance = BuiltIn().get_library_instance(
             u"resources.libraries.python.topology.Topology"
         )
+        topo_instance.add_new_socket(
+            self.container.node,
+            SocketType.CLI,
+            self.container.name,
+            self.container.cli_socket,
+        )
         topo_instance.add_new_socket(
             self.container.node,
             SocketType.PAPI,
             self.container.name,
-            f"/tmp/vpp_sockets/{self.container.name}/api.sock"
+            self.container.api_socket,
         )
         topo_instance.add_new_socket(
             self.container.node,
             SocketType.STATS,
             self.container.name,
-            f"/tmp/vpp_sockets/{self.container.name}/stats.sock"
+            self.container.stats_socket,
         )
+        if verify:
+            self.verify_vpp()
 
-    def restart_vpp(self):
+    def restart_vpp(self, verify=True):
         """Restart VPP service inside a container."""
         self.execute(u"pkill vpp")
-        self.start_vpp()
-        self.execute(u"cat /tmp/vppd.log")
+        self.start_vpp(verify=verify)
+
+    def verify_vpp(self):
+        """Verify VPP is running and ready."""
+        self.verify_vppctl()
+        self.adjust_privileges()
+        self.verify_vpp_papi()
 
     # TODO Rewrite to use the VPPUtil.py functionality and remove this.
-    def verify_vpp(self, retries=120, retry_wait=1):
+    def verify_vppctl(self, retries=120, retry_wait=1):
         """Verify that VPP is installed and running inside container.
 
+        This function waits a while so VPP can start.
+        PCI interfaces are listed for debug purposes.
+        When the check passes, VPP API socket is created on remote side,
+        but perhaps its directory does not have the correct access rights yet.
+
         :param retries: Check for VPP for this number of times Default: 120
         :param retry_wait: Wait for this number of seconds between retries.
         """
-        cmd = (u"vppctl show pci 2>&1 | "
-               u"fgrep -v 'Connection refused' | "
-               u"fgrep -v 'No such file or directory'")
+        for _ in range(retries + 1):
+            try:
+                # Execute puts the command into single quotes,
+                # so inner arguments are enclosed in qouble quotes here.
+                self.execute(
+                    u'/usr/bin/vppctl show pci 2>&1 | '
+                    u'fgrep -v "Connection refused" | '
+                    u'fgrep -v "No such file or directory"'
+                )
+                break
+            except (RuntimeError, AssertionError):
+                sleep(retry_wait)
+        else:
+            self.execute(u"cat /tmp/vppd.log")
+            raise RuntimeError(
+                f"VPP did not come up in container: {self.container.name}"
+            )
+
+    def adjust_privileges(self):
+        """Adjust privileges to control VPP without sudo."""
+        self.execute("chmod -R o+rwx /run/vpp")
+
+    def verify_vpp_papi(self, retries=120, retry_wait=1):
+        """Verify that VPP is available for PAPI.
 
+        This also opens and caches PAPI connection for quick reuse.
+        The connection is disconnected when ContainerManager decides to do so.
+
+        :param retries: Check for VPP for this number of times Default: 120
+        :param retry_wait: Wait for this number of seconds between retries.
+        """
+        # Wait for success.
         for _ in range(retries + 1):
             try:
-                self.execute(cmd)
+                VPPUtil.vpp_show_version(
+                    node=self.container.node,
+                    remote_vpp_socket=self.container.api_socket,
+                    log=False,
+                )
                 break
-            except RuntimeError:
+            except (RuntimeError, AssertionError):
                 sleep(retry_wait)
         else:
-            msg = f"VPP did not come up in container: {self.container.name}"
-            raise RuntimeError(msg)
+            self.execute(u"cat /tmp/vppd.log")
+            raise RuntimeError(
+                f"VPP PAPI fails in container: {self.container.name}"
+            )
 
     def create_base_vpp_startup_config(self, cpuset_cpus=None):
         """Create base startup configuration of VPP on container.
@@ -607,10 +717,8 @@ class ContainerEngine:
         vpp_config = VppConfigGenerator()
         vpp_config.set_node(self.container.node)
         vpp_config.add_unix_cli_listen()
-        vpp_config.add_unix_nodaemon()
         vpp_config.add_unix_exec(u"/tmp/running.exec")
         vpp_config.add_socksvr(socket=Constants.SOCKSVR_PATH)
-        vpp_config.add_statseg_per_node_counters(value=u"on")
         if cpuset_cpus:
             # We will pop the first core from the list to be a main core
             vpp_config.add_cpu_main_core(str(cpuset_cpus.pop(0)))
@@ -620,9 +728,13 @@ class ContainerEngine:
         vpp_config.add_buffers_per_numa(215040)
         vpp_config.add_plugin(u"disable", u"default")
         vpp_config.add_plugin(u"enable", u"memif_plugin.so")
-        vpp_config.add_heapsize(u"4G")
-        vpp_config.add_ip_heap_size(u"4G")
-        vpp_config.add_statseg_size(u"4G")
+        vpp_config.add_plugin(u"enable", u"perfmon_plugin.so")
+        vpp_config.add_main_heap_size(u"2G")
+        vpp_config.add_main_heap_page_size(self.container.page_size)
+        vpp_config.add_default_hugepage_size(self.container.page_size)
+        vpp_config.add_statseg_size(u"2G")
+        vpp_config.add_statseg_page_size(self.container.page_size)
+        vpp_config.add_statseg_per_node_counters(u"on")
 
         return vpp_config
 
@@ -654,6 +766,7 @@ class ContainerEngine:
         vpp_config.add_dpdk_no_tx_checksum_offload()
         vpp_config.add_dpdk_dev_default_rxq(rxq)
         vpp_config.add_plugin(u"enable", u"dpdk_plugin.so")
+        vpp_config.add_plugin(u"enable", u"perfmon_plugin.so")
 
         # Apply configuration
         self.execute(u"mkdir -p /etc/vpp/")
@@ -668,9 +781,26 @@ class ContainerEngine:
         :type cpuset_cpus: list
         """
         vpp_config = self.create_base_vpp_startup_config(cpuset_cpus)
-        vpp_config.add_plugin(u"enable", u"crypto_ia32_plugin.so")
+        vpp_config.add_plugin(u"enable", u"crypto_native_plugin.so")
         vpp_config.add_plugin(u"enable", u"crypto_ipsecmb_plugin.so")
         vpp_config.add_plugin(u"enable", u"crypto_openssl_plugin.so")
+        vpp_config.add_plugin(u"enable", u"perfmon_plugin.so")
+
+        # Apply configuration
+        self.execute(u"mkdir -p /etc/vpp/")
+        self.execute(
+            f'echo "{vpp_config.get_config_str()}" | tee /etc/vpp/startup.conf'
+        )
+
+    def create_vpp_startup_config_dma(self, dma_devices):
+        """Create startup configuration of VPP DMA.
+
+        :param dma_devices: DMA devices list.
+        :type dma_devices: list
+        """
+        vpp_config = self.create_base_vpp_startup_config()
+        vpp_config.add_plugin(u"enable", u"dma_intel_plugin.so")
+        vpp_config.add_dma_dev(dma_devices)
 
         # Apply configuration
         self.execute(u"mkdir -p /etc/vpp/")
@@ -689,7 +819,7 @@ class ContainerEngine:
         running = u"/tmp/running.exec"
         template = f"{Constants.RESOURCES_TPL_CONTAINER}/{template_file}"
 
-        with open(template, "r") as src_file:
+        with open(template, u"rt") as src_file:
             src = Template(src_file.read())
             self.execute(f'echo "{src.safe_substitute(**kwargs)}" > {running}')
 
@@ -715,18 +845,6 @@ class ContainerEngine:
         :type name: str
         :raises RuntimeError: If applying cgroup settings via cgset failed.
         """
-        ret, _, _ = self.container.ssh.exec_command_sudo(
-            u"cgset -r cpuset.cpu_exclusive=0 /"
-        )
-        if int(ret) != 0:
-            raise RuntimeError(u"Failed to apply cgroup settings.")
-
-        ret, _, _ = self.container.ssh.exec_command_sudo(
-            u"cgset -r cpuset.mem_exclusive=0 /"
-        )
-        if int(ret) != 0:
-            raise RuntimeError(u"Failed to apply cgroup settings.")
-
         ret, _, _ = self.container.ssh.exec_command_sudo(
             f"cgcreate -g cpuset:/{name}"
         )
@@ -734,13 +852,13 @@ class ContainerEngine:
             raise RuntimeError(u"Failed to copy cgroup settings from root.")
 
         ret, _, _ = self.container.ssh.exec_command_sudo(
-            f"cgset -r cpuset.cpu_exclusive=0 /{name}"
+            f"cgset -r cpuset.cpus=0 /{name}"
         )
         if int(ret) != 0:
             raise RuntimeError(u"Failed to apply cgroup settings.")
 
         ret, _, _ = self.container.ssh.exec_command_sudo(
-            f"cgset -r cpuset.mem_exclusive=0 /{name}"
+            f"cgset -r cpuset.mems=0 /{name}"
         )
         if int(ret) != 0:
             raise RuntimeError(u"Failed to apply cgroup settings.")
@@ -771,7 +889,7 @@ class LXC(ContainerEngine):
             else u"amd64"
 
         image = self.container.image if self.container.image \
-            else f"-d ubuntu -r bionic -a {target_arch}"
+            else f"-d ubuntu -r jammy -a {target_arch}"
 
         cmd = f"lxc-create -t download --name {self.container.name} " \
             f"-- {image} --no-validate"
@@ -993,13 +1111,13 @@ class Docker(ContainerEngine):
                 else Constants.DOCKER_SUT_IMAGE_UBUNTU
             setattr(self.container, u"image", img)
 
-        cmd = f"docker pull {self.container.image}"
-
-        ret, _, _ = self.container.ssh.exec_command_sudo(cmd, timeout=1800)
-        if int(ret) != 0:
-            raise RuntimeError(
-                f"Failed to create container {self.container.name}."
-            )
+        if "/" in self.container.image:
+            cmd = f"docker pull {self.container.image}"
+            ret, _, _ = self.container.ssh.exec_command_sudo(cmd, timeout=1800)
+            if int(ret) != 0:
+                raise RuntimeError(
+                    f"Failed to create container {self.container.name}."
+                )
 
         if self.container.cpuset_cpus:
             self._configure_cgroup(u"docker")
@@ -1036,8 +1154,8 @@ class Docker(ContainerEngine):
             if self.container.mnt else u""
 
         cmd = f"docker run --privileged --detach --interactive --tty --rm " \
-            f"--cgroup-parent docker {cpuset_cpus} {cpuset_mems} {publish} " \
-            f"{env} {volume} --name {self.container.name} " \
+            f"--cgroup-parent docker.slice {cpuset_cpus} {cpuset_mems} " \
+            f"{publish} {env} {volume} --name {self.container.name} " \
             f"{self.container.image} {command}"
 
         ret, _, _ = self.container.ssh.exec_command_sudo(cmd)
@@ -1180,8 +1298,19 @@ class Container:
         except KeyError:
             # Creating new attribute
             if attr == u"node":
+                # Create and cache a connected SSH instance.
                 self.__dict__[u"ssh"] = SSH()
                 self.__dict__[u"ssh"].connect(value)
+            elif attr == u"name":
+                # Socket paths to not have mutable state,
+                # this just saves some horizontal space in callers.
+                # TODO: Rename the dir so other apps can add sockets easily.
+                # E.g. f"/tmp/app_sockets/{value}/vpp_api.sock"
+                path = f"/tmp/vpp_sockets/{value}"
+                self.__dict__[u"socket_dir"] = path
+                self.__dict__[u"api_socket"] = f"{path}/api.sock"
+                self.__dict__[u"cli_socket"] = f"{path}/cli.sock"
+                self.__dict__[u"stats_socket"] = f"{path}/stats.sock"
             self.__dict__[attr] = value
         else:
             # Updating attribute base of type