PAPI: Cache connected client instances
[csit.git] / resources / libraries / python / ContainerUtils.py
index 59d98aa..3d70684 100644 (file)
@@ -23,9 +23,11 @@ from robot.libraries.BuiltIn import BuiltIn
 
 from resources.libraries.python.Constants import Constants
 from resources.libraries.python.CpuUtils import CpuUtils
+from resources.libraries.python.PapiExecutor import PapiSocketExecutor
 from resources.libraries.python.ssh import SSH
 from resources.libraries.python.topology import Topology, SocketType
 from resources.libraries.python.VppConfigGenerator import VppConfigGenerator
+from resources.libraries.python.VPPUtil import VPPUtil
 
 
 __all__ = [
@@ -141,23 +143,52 @@ class ContainerManager:
             self.engine.container = self.containers[container]
             self.engine.execute(command)
 
-    def start_vpp_in_all_containers(self):
+    def start_vpp_in_all_containers(self, verify=True):
         """Start VPP in all containers."""
         for container in self.containers:
             self.engine.container = self.containers[container]
-            self.engine.start_vpp()
+            # For multiple containers, delayed verify is faster.
+            self.engine.start_vpp(verify=False)
+        if verify:
+            self.verify_vpp_in_all_containers()
 
-    def restart_vpp_in_all_containers(self):
+    def _disconnect_papi_to_all_containers(self):
+        """Disconnect any open PAPI connections to VPPs in containers.
+
+        The current PAPI implementation caches open connections,
+        so explicit disconnect is needed before VPP becomes inaccessible.
+
+        Currently this is a protected method, as restart, stop and destroy
+        are the only dangerous methods, and all are handled by ContainerManager.
+        """
+        for container_object in self.containers.values():
+            PapiSocketExecutor.disconnect_by_node_and_socket(
+                container_object.node,
+                container_object.api_socket,
+            )
+
+    def restart_vpp_in_all_containers(self, verify=True):
         """Restart VPP in all containers."""
+        self._disconnect_papi_to_all_containers()
         for container in self.containers:
             self.engine.container = self.containers[container]
-            self.engine.restart_vpp()
+            # For multiple containers, delayed verify is faster.
+            self.engine.restart_vpp(verify=False)
+        if verify:
+            self.verify_vpp_in_all_containers()
 
     def verify_vpp_in_all_containers(self):
         """Verify that VPP is installed and running in all containers."""
+        # For multiple containers, multiple fors are faster.
+        for container in self.containers:
+            self.engine.container = self.containers[container]
+            self.engine.verify_vppctl()
         for container in self.containers:
             self.engine.container = self.containers[container]
-            self.engine.verify_vpp()
+            self.engine.adjust_privileges()
+        for container in self.containers:
+            self.engine.container = self.containers[container]
+            self.engine.verify_vpp_papi()
 
     def configure_vpp_in_all_containers(self, chain_topology, **kwargs):
         """Configure VPP in all containers.
@@ -481,12 +512,16 @@ class ContainerManager:
 
     def stop_all_containers(self):
         """Stop all containers."""
+        # TODO: Rework if containers can be affected outside ContainerManager.
+        self._disconnect_papi_to_all_containers()
         for container in self.containers:
             self.engine.container = self.containers[container]
             self.engine.stop()
 
     def destroy_all_containers(self):
         """Destroy all containers."""
+        # TODO: Rework if containers can be affected outside ContainerManager.
+        self._disconnect_papi_to_all_containers()
         for container in self.containers:
             self.engine.container = self.containers[container]
             self.engine.destroy()
@@ -543,7 +578,7 @@ class ContainerEngine:
         """System info."""
         raise NotImplementedError
 
-    def start_vpp(self):
+    def start_vpp(self, verify=True):
         """Start VPP inside a container."""
         self.execute(
             u"setsid /usr/bin/vpp -c /etc/vpp/startup.conf "
@@ -556,38 +591,51 @@ class ContainerEngine:
             self.container.node,
             SocketType.PAPI,
             self.container.name,
-            f"/tmp/vpp_sockets/{self.container.name}/api.sock"
+            self.container.api_socket,
         )
         topo_instance.add_new_socket(
             self.container.node,
             SocketType.STATS,
             self.container.name,
-            f"/tmp/vpp_sockets/{self.container.name}/stats.sock"
+            self.container.stats_socket,
         )
-        self.verify_vpp()
-        self.adjust_privileges()
+        if verify:
+            self.verify_vpp()
 
-    def restart_vpp(self):
+    def restart_vpp(self, verify=True):
         """Restart VPP service inside a container."""
         self.execute(u"pkill vpp")
-        self.start_vpp()
+        self.start_vpp(verify=verify)
+
+    def verify_vpp(self):
+        """Verify VPP is running and ready."""
+        self.verify_vppctl()
+        self.adjust_privileges()
+        self.verify_vpp_papi()
 
     # TODO Rewrite to use the VPPUtil.py functionality and remove this.
-    def verify_vpp(self, retries=120, retry_wait=1):
+    def verify_vppctl(self, retries=120, retry_wait=1):
         """Verify that VPP is installed and running inside container.
 
+        This function waits a while so VPP can start.
+        PCI interfaces are listed for debug purposes.
+        When the check passes, VPP API socket is created on remote side,
+        but perhaps its directory does not have the correct access rights yet.
+
         :param retries: Check for VPP for this number of times Default: 120
         :param retry_wait: Wait for this number of seconds between retries.
         """
         for _ in range(retries + 1):
             try:
+                # Execute puts the command into single quotes,
+                # so inner arguments are enclosed in qouble quotes here.
                 self.execute(
-                    u"vppctl show pci 2>&1 | "
-                    u"fgrep -v 'Connection refused' | "
-                    u"fgrep -v 'No such file or directory'"
+                    u'vppctl show pci 2>&1 | '
+                    u'fgrep -v "Connection refused" | '
+                    u'fgrep -v "No such file or directory"'
                 )
                 break
-            except RuntimeError:
+            except (RuntimeError, AssertionError):
                 sleep(retry_wait)
         else:
             self.execute(u"cat /tmp/vppd.log")
@@ -599,6 +647,32 @@ class ContainerEngine:
         """Adjust privileges to control VPP without sudo."""
         self.execute("chmod -R o+rwx /run/vpp")
 
+    def verify_vpp_papi(self, retries=120, retry_wait=1):
+        """Verify that VPP is available for PAPI.
+
+        This also opens and caches PAPI connection for quick reuse.
+        The connection is disconnected when ContainerManager decides to do so.
+
+        :param retries: Check for VPP for this number of times Default: 120
+        :param retry_wait: Wait for this number of seconds between retries.
+        """
+        # Wait for success.
+        for _ in range(retries + 1):
+            try:
+                VPPUtil.vpp_show_version(
+                    node=self.container.node,
+                    remote_vpp_socket=self.container.api_socket,
+                    log=False,
+                )
+                break
+            except (RuntimeError, AssertionError):
+                sleep(retry_wait)
+        else:
+            self.execute(u"cat /tmp/vppd.log")
+            raise RuntimeError(
+                f"VPP PAPI fails in container: {self.container.name}"
+            )
+
     def create_base_vpp_startup_config(self, cpuset_cpus=None):
         """Create base startup configuration of VPP on container.
 
@@ -1188,8 +1262,18 @@ class Container:
         except KeyError:
             # Creating new attribute
             if attr == u"node":
+                # Create and cache a connected SSH instance.
                 self.__dict__[u"ssh"] = SSH()
                 self.__dict__[u"ssh"].connect(value)
+            elif attr == u"name":
+                # Socket paths to not have mutable state,
+                # this just saves some horizontal space in callers.
+                # TODO: Rename the dir so other apps can add sockets easily.
+                # E.g. f"/tmp/app_sockets/{value}/vpp_api.sock"
+                path = f"/tmp/vpp_sockets/{value}"
+                self.__dict__[u"socket_dir"] = path
+                self.__dict__[u"api_socket"] = f"{path}/api.sock"
+                self.__dict__[u"stats_socket"] = f"{path}/stats.sock"
             self.__dict__[attr] = value
         else:
             # Updating attribute base of type