feat(VPPUtil): Wait up to 10s to confirm VPP kill 04/42404/1
authorVratko Polak <[email protected]>
Fri, 14 Feb 2025 09:45:23 +0000 (10:45 +0100)
committerTibor Frank <[email protected]>
Thu, 6 Mar 2025 12:59:46 +0000 (12:59 +0000)
Rarely, the previous code did not give enough time
for Linux to receive all VPP interfaces back.

+ Reduce used SSH library timeout values in the edited block.
+ Fail the keyword only after attempting all cleanup steps.
- Do not change other CSIT kills, yet.

Change-Id: I356f2c6fba0ef7b8e4d8b66bf6a5e43aceb72ed2
Signed-off-by: Vratko Polak <[email protected]>
(cherry picked from commit 064967bfdc939324292bde368f5e0a8831c24374)

resources/libraries/python/VPPUtil.py

index 29138da..7faa4a8 100644 (file)
@@ -21,7 +21,9 @@ from resources.libraries.python.PapiExecutor import PapiSocketExecutor
 from resources.libraries.python.model.ExportResult import (
     export_dut_type_and_version
 )
-from resources.libraries.python.ssh import exec_cmd_no_error, exec_cmd
+from resources.libraries.python.ssh import (
+    exec_cmd_no_error, exec_cmd, SSHTimeout
+)
 from resources.libraries.python.topology import Topology, SocketType, NodeType
 
 
@@ -78,20 +80,29 @@ class VPPUtil:
         :param node_key: Topology node key.
         :type node: dict
         :type node_key: str
+        :raises RuntimeError: If VPP is not killed within 10 seconds.
         """
         PapiSocketExecutor.disconnect_all_sockets_by_node(node)
-        command = "pkill -9 vpp; sleep 1"
-        exec_cmd(node, command, timeout=180, sudo=True)
-        command = (
-            "/bin/rm -f /dev/shm/db /dev/shm/global_vm /dev/shm/vpe-api"
-        )
-        exec_cmd(node, command, timeout=180, sudo=True)
-
+        command = "pkill -9 vpp"
+        exec_cmd(node, command, timeout=1, sudo=True)
+        # Different testbeds need different time to confirm the kill is done.
+        unsure = False
+        command = "while pgrep vpp; do sleep 0.2; done"
+        try:
+            # Sudo is not needed, and would need bash -c to handle semicolons.
+            unsure, _, _ = exec_cmd(node, command, timeout=10)
+        except SSHTimeout:
+            unsure = True
+        # Continue cleanup even if VPP may still be running after 10 seconds.
+        command = "/bin/rm -f /dev/shm/db /dev/shm/global_vm /dev/shm/vpe-api"
+        exec_cmd(node, command, timeout=1, sudo=True)
         if node_key:
             if Topology.get_node_sockets(node, socket_type=SocketType.PAPI):
                 Topology.del_node_socket_id(node, SocketType.PAPI, node_key)
             if Topology.get_node_sockets(node, socket_type=SocketType.STATS):
                 Topology.del_node_socket_id(node, SocketType.STATS, node_key)
+        if unsure:
+            raise RuntimeError(f"VPP kill not confirmed!")
 
     @staticmethod
     def stop_vpp_service_on_all_duts(nodes):