CSIT-1142 2-node topology - keywords
[csit.git] / resources / libraries / python / ssh.py
index 87fc02d..4bed173 100644 (file)
@@ -1,4 +1,4 @@
-# Copyright (c) 2016 Cisco and/or its affiliates.
+# Copyright (c) 2018 Cisco and/or its affiliates.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at:
@@ -50,7 +50,7 @@ class SSH(object):
 
         :param node: Node in topology.
         :type node: dict
-        :return: IP address and port for the specified node.
+        :returns: IP address and port for the specified node.
         :rtype: int
         """
 
@@ -60,14 +60,27 @@ class SSH(object):
         """Connect to node prior to running exec_command or scp.
 
         If there already is a connection to the node, this method reuses it.
+
+        :param node: Node in topology.
+        :param attempts: Number of reconnect attempts.
+        :type node: dict
+        :type attempts: int
+        :raises IOError: If cannot connect to host.
         """
-        try:
-            self._node = node
-            node_hash = self._node_hash(node)
-            if node_hash in SSH.__existing_connections:
-                self._ssh = SSH.__existing_connections[node_hash]
-                logger.debug('reusing ssh: {0}'.format(self._ssh))
+        self._node = node
+        node_hash = self._node_hash(node)
+        if node_hash in SSH.__existing_connections:
+            self._ssh = SSH.__existing_connections[node_hash]
+            if self._ssh.get_transport().is_active():
+                logger.debug('Reusing SSH: {ssh}'.format(ssh=self._ssh))
             else:
+                if attempts > 0:
+                    self._reconnect(attempts-1)
+                else:
+                    raise IOError('Cannot connect to {host}'.
+                                  format(host=node['host']))
+        else:
+            try:
                 start = time()
                 pkey = None
                 if 'priv_key' in node:
@@ -84,19 +97,14 @@ class SSH(object):
                 self._ssh.get_transport().set_keepalive(10)
 
                 SSH.__existing_connections[node_hash] = self._ssh
-
-                logger.trace('connect took {} seconds'.format(time() - start))
-                logger.debug('new ssh: {0}'.format(self._ssh))
-
-            logger.debug('Connect peer: {0}'.
-                         format(self._ssh.get_transport().getpeername()))
-            logger.debug('Connections: {0}'.
-                         format(str(SSH.__existing_connections)))
-        except:
-            if attempts > 0:
-                self._reconnect(attempts-1)
-            else:
-                raise
+                logger.debug('New SSH to {peer} took {total} seconds: {ssh}'.
+                             format(
+                                 peer=self._ssh.get_transport().getpeername(),
+                                 total=(time() - start),
+                                 ssh=self._ssh))
+            except SSHException:
+                raise IOError('Cannot connect to {host}'.
+                              format(host=node['host']))
 
     def disconnect(self, node):
         """Close SSH connection to the node.
@@ -106,19 +114,22 @@ class SSH(object):
         """
         node_hash = self._node_hash(node)
         if node_hash in SSH.__existing_connections:
-            logger.debug('Disconnecting peer: {}, {}'.
-                         format(node['host'], node['port']))
+            logger.debug('Disconnecting peer: {host}, {port}'.
+                         format(host=node['host'], port=node['port']))
             ssh = SSH.__existing_connections.pop(node_hash)
             ssh.close()
 
     def _reconnect(self, attempts=0):
-        """Close the SSH connection and open it again."""
+        """Close the SSH connection and open it again.
 
+        :param attempts: Number of reconnect attempts.
+        :type attempts: int
+        """
         node = self._node
         self.disconnect(node)
         self.connect(node, attempts)
-        logger.debug('Reconnecting peer done: {}'.
-                     format(self._ssh.get_transport().getpeername()))
+        logger.debug('Reconnecting peer done: {host}, {port}'.
+                     format(host=node['host'], port=node['port']))
 
     def exec_command(self, cmd, timeout=10):
         """Execute SSH command on a new channel on the connected Node.
@@ -132,21 +143,25 @@ class SSH(object):
         :rtype: tuple(int, str, str)
         :raise SSHTimeout: If command is not finished in timeout time.
         """
-        start = time()
         stdout = StringIO.StringIO()
         stderr = StringIO.StringIO()
         try:
             chan = self._ssh.get_transport().open_session(timeout=5)
+            peer = self._ssh.get_transport().getpeername()
         except AttributeError:
             self._reconnect()
             chan = self._ssh.get_transport().open_session(timeout=5)
+            peer = self._ssh.get_transport().getpeername()
         except SSHException:
             self._reconnect()
             chan = self._ssh.get_transport().open_session(timeout=5)
+            peer = self._ssh.get_transport().getpeername()
         chan.settimeout(timeout)
-        logger.trace('exec_command on {0}: {1}'
-                     .format(self._ssh.get_transport().getpeername(), cmd))
 
+        logger.trace('exec_command on {peer} with timeout {timeout}: {cmd}'
+                     .format(peer=peer, timeout=timeout, cmd=cmd))
+
+        start = time()
         chan.exec_command(cmd)
         while not chan.exit_status_ready() and timeout is not None:
             if chan.recv_ready():
@@ -157,10 +172,11 @@ class SSH(object):
 
             if time() - start > timeout:
                 raise SSHTimeout(
-                    'Timeout exception during execution of command: {0}\n'
-                    'Current contents of stdout buffer: {1}\n'
-                    'Current contents of stderr buffer: {2}\n'
-                    .format(cmd, stdout.getvalue(), stderr.getvalue())
+                    'Timeout exception during execution of command: {cmd}\n'
+                    'Current contents of stdout buffer: {stdout}\n'
+                    'Current contents of stderr buffer: {stderr}\n'
+                    .format(cmd=cmd, stdout=stdout.getvalue(),
+                            stderr=stderr.getvalue())
                 )
 
             sleep(0.1)
@@ -173,14 +189,12 @@ class SSH(object):
             stderr.write(chan.recv_stderr(self.__MAX_RECV_BUF))
 
         end = time()
-        logger.trace('exec_command on {0} took {1} seconds'.format(
-            self._ssh.get_transport().getpeername(), end-start))
-
-        logger.trace('chan_recv/_stderr took {} seconds'.format(time()-end))
+        logger.trace('exec_command on {peer} took {total} seconds'.
+                     format(peer=peer, total=end-start))
 
-        logger.trace('return RC {}'.format(return_code))
-        logger.trace('return STDOUT {}'.format(stdout.getvalue()))
-        logger.trace('return STDERR {}'.format(stderr.getvalue()))
+        logger.trace('return RC {rc}'.format(rc=return_code))
+        logger.trace('return STDOUT {stdout}'.format(stdout=stdout.getvalue()))
+        logger.trace('return STDERR {stderr}'.format(stderr=stderr.getvalue()))
         return return_code, stdout.getvalue(), stderr.getvalue()
 
     def exec_command_sudo(self, cmd, cmd_input=None, timeout=30):
@@ -189,7 +203,7 @@ class SSH(object):
         :param cmd: Command to be executed.
         :param cmd_input: Input redirected to the command.
         :param timeout: Timeout.
-        :return: return_code, stdout, stderr
+        :returns: return_code, stdout, stderr
 
         :Example:
 
@@ -221,7 +235,7 @@ class SSH(object):
         :type lxc_params: str
         :type sudo: bool
         :type timeout: int
-        :return: return_code, stdout, stderr
+        :returns: return_code, stdout, stderr
         """
         command = "lxc-attach {p} --name {n} -- /bin/sh -c '{c}'"\
             .format(p=lxc_params, n=lxc_name, c=lxc_cmd)
@@ -234,7 +248,7 @@ class SSH(object):
         """Open interactive terminal on a new channel on the connected Node.
 
         :param time_out: Timeout in seconds.
-        :return: SSH channel with opened terminal.
+        :returns: SSH channel with opened terminal.
 
         .. warning:: Interruptingcow is used here, and it uses
            signal(SIGALRM) to let the operating system interrupt program
@@ -250,7 +264,7 @@ class SSH(object):
         chan.set_combine_stderr(True)
 
         buf = ''
-        while not buf.endswith((":~$ ", "~]$ ")):
+        while not buf.endswith((":~$ ", "~]$ ", "~]# ")):
             try:
                 chunk = chan.recv(self.__MAX_RECV_BUF)
                 if not chunk:
@@ -273,7 +287,7 @@ class SSH(object):
         :param cmd: Command to be executed.
         :param prompt: Command prompt, sequence of characters used to
         indicate readiness to accept commands.
-        :return: Command output.
+        :returns: Command output.
 
         .. warning:: Interruptingcow is used here, and it uses
            signal(SIGALRM) to let the operating system interrupt program
@@ -311,7 +325,7 @@ class SSH(object):
         """
         chan.close()
 
-    def scp(self, local_path, remote_path, get=False, timeout=10):
+    def scp(self, local_path, remote_path, get=False, timeout=30):
         """Copy files from local_path to remote_path or vice versa.
 
         connect() method has to be called first!