+ # Do we have the connected instance in the cache?
+ vpp_instance = self.get_connected_client(check_connected=False)
+ if vpp_instance is not None:
+ return self
+ # No luck, create and connect a new instance.
+ time_enter = time.time()
+ node = self._node
+ # Parsing takes longer than connecting, prepare instance before tunnel.
+ vpp_instance = self.ensure_vpp_instance()
+ # Store into cache as soon as possible.
+ # If connection fails, it is better to attempt disconnect anyway.
+ self.set_connected_client(vpp_instance)
+ # Set additional attributes.
+ vpp_instance.csit_temp_dir = tempfile.TemporaryDirectory(dir=u"/tmp")
+ temp_path = vpp_instance.csit_temp_dir.name
+ api_socket = temp_path + u"/vpp-api.sock"
+ vpp_instance.csit_local_vpp_socket = api_socket
+ ssh_socket = temp_path + u"/ssh.sock"
+ vpp_instance.csit_control_socket = ssh_socket
+ # Cleanup possibilities.
+ ret_code, _ = run([u"ls", ssh_socket], check=False)
+ if ret_code != 2:
+ # This branch never seems to be hit in CI,
+ # but may be useful when testing manually.
+ run(
+ [u"ssh", u"-S", ssh_socket, u"-O", u"exit", u"0.0.0.0"],
+ check=False, log=True
+ )
+ # TODO: Is any sleep necessary? How to prove if not?
+ run([u"sleep", u"0.1"])
+ run([u"rm", u"-vrf", ssh_socket])
+ # Even if ssh can perhaps reuse this file,
+ # we need to remove it for readiness detection to work correctly.
+ run([u"rm", u"-rvf", api_socket])
+ # We use sleep command. The ssh command will exit in 30 second,
+ # unless a local socket connection is established,
+ # in which case the ssh command will exit only when
+ # the ssh connection is closed again (via control socket).
+ # The log level is to suppress "Warning: Permanently added" messages.
+ ssh_cmd = [
+ u"ssh", u"-S", ssh_socket, u"-M", u"-L",
+ api_socket + u":" + self._remote_vpp_socket,
+ u"-p", str(node[u"port"]),
+ u"-o", u"LogLevel=ERROR",
+ u"-o", u"UserKnownHostsFile=/dev/null",
+ u"-o", u"StrictHostKeyChecking=no",
+ u"-o", u"ExitOnForwardFailure=yes",
+ node[u"username"] + u"@" + node[u"host"],
+ u"sleep", u"30"
+ ]
+ priv_key = node.get(u"priv_key")
+ if priv_key:
+ # This is tricky. We need a file to pass the value to ssh command.
+ # And we need ssh command, because paramiko does not support sockets
+ # (neither ssh_socket, nor _remote_vpp_socket).
+ key_file = tempfile.NamedTemporaryFile()
+ key_file.write(priv_key)
+ # Make sure the content is written, but do not close yet.
+ key_file.flush()
+ ssh_cmd[1:1] = [u"-i", key_file.name]
+ password = node.get(u"password")
+ if password:
+ # Prepend sshpass command to set password.
+ ssh_cmd[:0] = [u"sshpass", u"-p", password]
+ time_stop = time.time() + 10.0
+ # subprocess.Popen seems to be the best way to run commands
+ # on background. Other ways (shell=True with "&" and ssh with -f)
+ # seem to be too dependent on shell behavior.
+ # In particular, -f does NOT return values for run().
+ subprocess.Popen(ssh_cmd)
+ # Check socket presence on local side.
+ while time.time() < time_stop:
+ # It can take a moment for ssh to create the socket file.
+ ret_code, _ = run(
+ [u"ls", u"-l", api_socket], check=False
+ )
+ if not ret_code:
+ break
+ time.sleep(0.1)
+ else:
+ raise RuntimeError(u"Local side socket has not appeared.")
+ if priv_key:
+ # Socket up means the key has been read. Delete file by closing it.
+ key_file.close()
+ # Everything is ready, set the local socket address and connect.
+ vpp_instance.transport.server_address = api_socket
+ # It seems we can get read error even if every preceding check passed.
+ # Single retry seems to help.
+ for _ in range(2):
+ try:
+ vpp_instance.connect_sync(u"csit_socket")
+ except (IOError, struct.error) as err:
+ logger.warn(f"Got initial connect error {err!r}")
+ vpp_instance.disconnect()
+ else:
+ break
+ else:
+ raise RuntimeError(u"Failed to connect to VPP over a socket.")
+ logger.trace(
+ f"Establishing socket connection took {time.time()-time_enter}s"
+ )
+ return self
+
+ def __exit__(self, exc_type, exc_val, exc_tb):
+ """No-op, the client instance remains in cache in connected state."""
+
+ @classmethod
+ def disconnect_by_key(cls, key):
+ """Disconnect a connected client instance, noop it not connected.
+
+ Also remove the local sockets by deleting the temporary directory.
+ Put disconnected client instances to the reuse list.
+ The added attributes are not cleaned up,
+ as their values will get overwritten on next connect.
+
+ This method is useful for disconnect_all type of work.
+
+ :param key: Tuple identifying the node (and socket).
+ :type key: tuple of str
+ """
+ client_instance = cls.conn_cache.get(key, None)
+ if client_instance is None:
+ return
+ logger.debug(f"Disconnecting by key: {key}")
+ client_instance.disconnect()
+ run([
+ u"ssh", u"-S", client_instance.csit_control_socket, u"-O",
+ u"exit", u"0.0.0.0"
+ ], check=False)
+ # Temp dir has autoclean, but deleting explicitly
+ # as an error can happen.
+ try:
+ client_instance.csit_temp_dir.cleanup()
+ except FileNotFoundError:
+ # There is a race condition with ssh removing its ssh.sock file.
+ # Single retry should be enough to ensure the complete removal.
+ shutil.rmtree(client_instance.csit_temp_dir.name)
+ # Finally, put disconnected clients to reuse list.
+ cls.reusable_vpp_client_list.append(client_instance)
+ # Invalidate cache last. Repeated errors are better than silent leaks.
+ del cls.conn_cache[key]
+
+ @classmethod
+ def disconnect_by_node_and_socket(
+ cls, node, remote_socket=Constants.SOCKSVR_PATH
+ ):
+ """Disconnect a connected client instance, noop it not connected.
+
+ Also remove the local sockets by deleting the temporary directory.
+ Put disconnected client instances to the reuse list.
+ The added attributes are not cleaned up,
+ as their values will get overwritten on next connect.
+
+ Call this method just before killing/restarting remote VPP instance.
+ """
+ key = cls.key_for_node_and_socket(node, remote_socket)
+ return cls.disconnect_by_key(key)
+
+ @classmethod
+ def disconnect_all_sockets_by_node(cls, node):
+ """Disconnect all socket connected client instance.
+
+ Noop if not connected.
+
+ Also remove the local sockets by deleting the temporary directory.
+ Put disconnected client instances to the reuse list.
+ The added attributes are not cleaned up,
+ as their values will get overwritten on next connect.
+
+ Call this method just before killing/restarting remote VPP instance.
+ """
+ sockets = Topology.get_node_sockets(node, socket_type=SocketType.PAPI)
+ if sockets:
+ for socket in sockets.values():
+ # TODO: Remove sockets from topology.
+ PapiSocketExecutor.disconnect_by_node_and_socket(node, socket)
+ # Always attempt to disconnect the default socket.
+ return cls.disconnect_by_node_and_socket(node)
+
+ @staticmethod
+ def disconnect_all_papi_connections():
+ """Disconnect all connected client instances, tear down the SSH tunnels.
+
+ Also remove the local sockets by deleting the temporary directory.
+ Put disconnected client instances to the reuse list.
+ The added attributes are not cleaned up,
+ as their values will get overwritten on next connect.
+
+ This should be a class method,
+ but we prefer to call static methods from Robot.
+
+ Call this method just before killing/restarting all VPP instances.