FIX: Force kill QEMU in case of failed initialization
[csit.git] / resources / libraries / python / QemuUtils.py
index 8ee9725..428599b 100644 (file)
@@ -18,7 +18,7 @@ import json
 
 from robot.api import logger
 
-from resources.libraries.python.ssh import SSH
+from resources.libraries.python.ssh import SSH, SSHTimeout
 from resources.libraries.python.constants import Constants
 from resources.libraries.python.topology import NodeType
 
@@ -26,13 +26,16 @@ from resources.libraries.python.topology import NodeType
 class QemuUtils(object):
     """QEMU utilities."""
 
-    __QEMU_BIN = '/usr/bin/qemu-system-x86_64'
-    # QEMU Machine Protocol socket
-    __QMP_SOCK = '/tmp/qmp.sock'
-    # QEMU Guest Agent socket
-    __QGA_SOCK = '/tmp/qga.sock'
-
-    def __init__(self):
+    def __init__(self, qemu_id=1):
+        self._qemu_id = qemu_id
+        # Path to QEMU binary
+        self._qemu_bin = '/usr/bin/qemu-system-x86_64'
+        # QEMU Machine Protocol socket
+        self._qmp_sock = '/tmp/qmp{0}.sock'.format(self._qemu_id)
+        # QEMU Guest Agent socket
+        self._qga_sock = '/tmp/qga{0}.sock'.format(self._qemu_id)
+        # QEMU PID file
+        self._pid_file = '/tmp/qemu{0}.pid'.format(self._qemu_id)
         self._qemu_opt = {}
         # Default 1 CPU.
         self._qemu_opt['smp'] = '-smp 1,sockets=1,cores=1,threads=1'
@@ -40,10 +43,11 @@ class QemuUtils(object):
         # management interface.
         self._qemu_opt['options'] = '-cpu host -daemonize -enable-kvm ' \
             '-machine pc,accel=kvm,usb=off,mem-merge=off ' \
-            '-net nic,macaddr=52:54:00:00:02:01 -balloon none'
-        self._qemu_opt['ssh_fwd_port'] = 10022
+            '-net nic,macaddr=52:54:00:00:{0:02x}:ff -balloon none'\
+            .format(self._qemu_id)
+        self._qemu_opt['ssh_fwd_port'] = 10021 + qemu_id
         # Default serial console port
-        self._qemu_opt['serial_port'] = 4556
+        self._qemu_opt['serial_port'] = 4555 + qemu_id
         # Default 512MB virtual RAM
         self._qemu_opt['mem_size'] = 512
         # Default huge page mount point, required for Vhost-user interfaces.
@@ -55,7 +59,7 @@ class QemuUtils(object):
         # VM node info dict
         self._vm_info = {
             'type': NodeType.VM,
-            'port': 10022,
+            'port': self._qemu_opt['ssh_fwd_port'],
             'username': 'cisco',
             'password': 'cisco',
             'interfaces': {},
@@ -65,10 +69,18 @@ class QemuUtils(object):
         self._vhost_id = 0
         self._ssh = None
         self._node = None
-        self._socks = [self.__QMP_SOCK, self.__QGA_SOCK]
+        self._socks = [self._qmp_sock, self._qga_sock]
+
+    def qemu_set_bin(self, path):
+        """Set binary path for QEMU.
+
+        :param path: Absolute path in filesystem.
+        :type path: str
+        """
+        self._qemu_bin = path
 
     def qemu_set_smp(self, cpus, cores, threads, sockets):
-        """Set SMP option for QEMU
+        """Set SMP option for QEMU.
 
         :param cpus: Number of CPUs.
         :param cores: Number of CPU cores on one socket.
@@ -182,7 +194,7 @@ class QemuUtils(object):
         :param socket: Path of the unix socket.
         :param server: If True the socket shall be a listening socket.
         :param mac: Vhost-user interface MAC address (optional, otherwise is
-            used autogenerated MAC 52:54:00:00:04:xx).
+            used auto-generated MAC 52:54:00:00:xx:yy).
         :type socket: str
         :type server: bool
         :type mac: str
@@ -195,13 +207,15 @@ class QemuUtils(object):
             chardev += ',server'
         self._qemu_opt['options'] += chardev
         # Create Vhost-user network backend.
-        netdev = ' -netdev vhost-user,id=vhost{0},chardev=char{0},'\
-            'queues={1}'.format(self._vhost_id, self._qemu_opt['queues'])
+        netdev = (' -netdev vhost-user,id=vhost{0},chardev=char{0},queues={1}'
+                  .format(self._vhost_id, self._qemu_opt['queues']))
         self._qemu_opt['options'] += netdev
-        # If MAC is not specified use autogenerated 52:54:00:00:04:<vhost_id>
-        # e.g. vhost1 MAC is 52:54:00:00:04:01
+        # If MAC is not specified use auto-generated MAC address based on
+        # template 52:54:00:00:<qemu_id>:<vhost_id>, e.g. vhost1 MAC of QEMU
+        #  with ID 1 is 52:54:00:00:01:01
         if mac is None:
-            mac = '52:54:00:00:04:{0:02x}'.format(self._vhost_id)
+            mac = '52:54:00:00:{0:02x}:{1:02x}'.\
+                format(self._qemu_id, self._vhost_id)
         extend_options = 'mq=on,csum=off,gso=off,guest_tso4=off,'\
             'guest_tso6=off,guest_ecn=off,mrg_rxbuf=off'
         # Create Virtio network device.
@@ -227,9 +241,10 @@ class QemuUtils(object):
             response will contain the "error" keyword instead of "return".
         """
         # To enter command mode, the qmp_capabilities command must be issued.
-        qmp_cmd = 'echo "{ \\"execute\\": \\"qmp_capabilities\\" }' + \
-            '{ \\"execute\\": \\"' + cmd + '\\" }" | sudo -S nc -U ' + \
-            self.__QMP_SOCK
+        qmp_cmd = 'echo "{ \\"execute\\": \\"qmp_capabilities\\" }' \
+                  '{ \\"execute\\": \\"' + cmd + \
+                  '\\" }" | sudo -S socat - UNIX-CONNECT:' + self._qmp_sock
+
         (ret_code, stdout, stderr) = self._ssh.exec_command(qmp_cmd)
         if int(ret_code) != 0:
             logger.debug('QMP execute failed {0}'.format(stderr))
@@ -246,8 +261,9 @@ class QemuUtils(object):
     def _qemu_qga_flush(self):
         """Flush the QGA parser state
         """
-        qga_cmd = 'printf "\xFF" | sudo -S nc ' \
-            '-q 1 -U ' + self.__QGA_SOCK
+        qga_cmd = '(printf "\xFF"; sleep 1) | sudo -S socat - UNIX-CONNECT:' + \
+                  self._qga_sock
+        #TODO: probably need something else
         (ret_code, stdout, stderr) = self._ssh.exec_command(qga_cmd)
         if int(ret_code) != 0:
             logger.debug('QGA execute failed {0}'.format(stderr))
@@ -267,8 +283,10 @@ class QemuUtils(object):
         :param cmd: QGA command to execute.
         :type cmd: str
         """
-        qga_cmd = 'echo "{ \\"execute\\": \\"' + cmd + '\\" }" | sudo -S nc ' \
-            '-q 1 -U ' + self.__QGA_SOCK
+        qga_cmd = '(echo "{ \\"execute\\": \\"' + \
+                  cmd + \
+                  '\\" }"; sleep 1) | sudo -S socat - UNIX-CONNECT:' + \
+                  self._qga_sock
         (ret_code, stdout, stderr) = self._ssh.exec_command(qga_cmd)
         if int(ret_code) != 0:
             logger.debug('QGA execute failed {0}'.format(stderr))
@@ -282,7 +300,8 @@ class QemuUtils(object):
     def _wait_until_vm_boot(self, timeout=60):
         """Wait until QEMU VM is booted.
 
-        Ping QEMU guest agent each 5s until VM booted or timeout.
+        First try to flush qga until there is output.
+        Then ping QEMU guest agent each 5s until VM booted or timeout.
 
         :param timeout: Waiting timeout in seconds (optional, default 60s).
         :type timeout: int
@@ -292,7 +311,21 @@ class QemuUtils(object):
             if time() - start > timeout:
                 raise RuntimeError('timeout, VM {0} not booted on {1}'.format(
                     self._qemu_opt['disk_image'], self._node['host']))
-            self._qemu_qga_flush()
+            out = None
+            try:
+                out = self._qemu_qga_flush()
+            except ValueError:
+                logger.trace('QGA qga flush unexpected output {}'.format(out))
+            # Empty output - VM not booted yet
+            if not out:
+                sleep(5)
+            else:
+                break
+        while True:
+            if time() - start > timeout:
+                raise RuntimeError('timeout, VM {0} not booted on {1}'.format(
+                    self._qemu_opt['disk_image'], self._node['host']))
+            out = None
             try:
                 out = self._qemu_qga_exec('guest-ping')
             except ValueError:
@@ -490,39 +523,44 @@ class QemuUtils(object):
             'share=on -m {0} -numa node,memdev=mem'.format(
                 self._qemu_opt.get('mem_size'), self._qemu_opt.get('huge_mnt'))
 
-        # By default check only if hugepages are availbale.
+        # By default check only if hugepages are available.
         # If 'huge_allocate' is set to true try to allocate as well.
         self._huge_page_check(allocate=self._qemu_opt.get('huge_allocate'))
 
+        # Disk option
+        drive = '-drive file={0},format=raw,cache=none,if=virtio'.format(
+            self._qemu_opt.get('disk_image'))
         # Setup QMP via unix socket
-        qmp = '-qmp unix:{0},server,nowait'.format(self.__QMP_SOCK)
+        qmp = '-qmp unix:{0},server,nowait'.format(self._qmp_sock)
         # Setup serial console
         serial = '-chardev socket,host=127.0.0.1,port={0},id=gnc0,server,' \
             'nowait -device isa-serial,chardev=gnc0'.format(
                 self._qemu_opt.get('serial_port'))
         # Setup QGA via chardev (unix socket) and isa-serial channel
-        qga = '-chardev socket,path=/tmp/qga.sock,server,nowait,id=qga0 ' \
-            '-device isa-serial,chardev=qga0'
+        qga = '-chardev socket,path={0},server,nowait,id=qga0 ' \
+            '-device isa-serial,chardev=qga0'.format(self._qga_sock)
         # Graphic setup
         graphic = '-monitor none -display none -vga none'
+        # PID file
+        pid = '-pidfile {}'.format(self._pid_file)
+
         # Run QEMU
-        cmd = '{0} {1} {2} {3} {4} -hda {5} {6} {7} {8} {9}'.format(
-            self.__QEMU_BIN, self._qemu_opt.get('smp'), mem, ssh_fwd,
+        cmd = '{0} {1} {2} {3} {4} {5} {6} {7} {8} {9} {10}'.format(
+            self._qemu_bin, self._qemu_opt.get('smp'), mem, ssh_fwd,
             self._qemu_opt.get('options'),
-            self._qemu_opt.get('disk_image'), qmp, serial, qga, graphic)
-        (ret_code, _, stderr) = self._ssh.exec_command_sudo(cmd, timeout=300)
-        if int(ret_code) != 0:
-            logger.debug('QEMU start failed {0}'.format(stderr))
-            raise RuntimeError('QEMU start failed on {0}'.format(
-                self._node['host']))
-        logger.trace('QEMU running')
-        # Wait until VM boot
+            drive, qmp, serial, qga, graphic, pid)
         try:
+            (ret_code, _, _) = self._ssh.exec_command_sudo(cmd, timeout=300)
+            if int(ret_code) != 0:
+                raise RuntimeError('QEMU start failed on {0}'.format(
+                    self._node['host']))
+            # Wait until VM boot
             self._wait_until_vm_boot()
-        except RuntimeError:
-            self.qemu_kill()
+        except (RuntimeError, SSHTimeout):
+            self.qemu_kill_all()
             self.qemu_clear_socks()
             raise
+        logger.trace('QEMU started successfully.')
         # Update interface names in VM node dict
         self._update_vm_interfaces()
         # Return VM node dict
@@ -557,9 +595,23 @@ class QemuUtils(object):
 
     def qemu_kill(self):
         """Kill qemu process."""
-        # TODO: add PID storage so that we can kill specific PID
         # Note: in QEMU start phase there are 3 QEMU processes because we
         # daemonize QEMU
+        self._ssh.exec_command_sudo('chmod +r {}'.format(self._pid_file))
+        self._ssh.exec_command_sudo('kill -SIGKILL $(cat {})'
+                                    .format(self._pid_file))
+        # Delete PID file
+        cmd = 'rm -f {}'.format(self._pid_file)
+        self._ssh.exec_command_sudo(cmd)
+
+    def qemu_kill_all(self, node=None):
+        """Kill all qemu processes on DUT node if specified.
+
+        :param node: Node to kill all QEMU processes on.
+        :type node: dict
+        """
+        if node:
+            self.qemu_set_node(node)
         self._ssh.exec_command_sudo('pkill -SIGKILL qemu')
 
     def qemu_clear_socks(self):
@@ -607,19 +659,31 @@ class QemuUtils(object):
                 'error: {1}'.format(self._node['host'], json.dumps(err)))
 
     @staticmethod
-    def build_qemu(node):
+    def build_qemu(node, force_install=False, apply_patch=False):
         """Build QEMU from sources.
 
         :param node: Node to build QEMU on.
+        :param force_install: If True, then remove previous build.
+        :param apply_patch: If True, then apply patches from qemu_patches dir.
         :type node: dict
+        :type force_install: bool
+        :type apply_patch: bool
+        :raises: RuntimeError if building QEMU failed.
         """
         ssh = SSH()
         ssh.connect(node)
 
+        directory = ' --directory={0}'.format(Constants.QEMU_INSTALL_DIR)
+        version = ' --version={0}'.format(Constants.QEMU_INSTALL_VERSION)
+        force = ' --force' if force_install else ''
+        patch = ' --patch' if apply_patch else ''
+
         (ret_code, stdout, stderr) = \
-            ssh.exec_command('sudo -Sn bash {0}/{1}/qemu_build.sh'.format(
-                Constants.REMOTE_FW_DIR, Constants.RESOURCES_LIB_SH), 1000)
-        logger.trace(stdout)
+            ssh.exec_command(
+                "sudo -E sh -c '{0}/{1}/qemu_build.sh{2}{3}{4}{5}'"\
+                .format(Constants.REMOTE_FW_DIR, Constants.RESOURCES_LIB_SH,
+                        version, directory, force, patch), 1000)
+
         if int(ret_code) != 0:
-            logger.debug('QEMU build failed {0}'.format(stderr))
+            logger.debug('QEMU build failed {0}'.format(stdout + stderr))
             raise RuntimeError('QEMU build failed on {0}'.format(node['host']))