FIX: Force kill QEMU in case of failed initialization
[csit.git] / resources / libraries / python / QemuUtils.py
index 675f074..428599b 100644 (file)
@@ -26,14 +26,16 @@ from resources.libraries.python.topology import NodeType
 class QemuUtils(object):
     """QEMU utilities."""
 
-    __QEMU_BIN = '/usr/bin/qemu-system-x86_64'
-
     def __init__(self, qemu_id=1):
         self._qemu_id = qemu_id
+        # Path to QEMU binary
+        self._qemu_bin = '/usr/bin/qemu-system-x86_64'
         # QEMU Machine Protocol socket
         self._qmp_sock = '/tmp/qmp{0}.sock'.format(self._qemu_id)
         # QEMU Guest Agent socket
         self._qga_sock = '/tmp/qga{0}.sock'.format(self._qemu_id)
+        # QEMU PID file
+        self._pid_file = '/tmp/qemu{0}.pid'.format(self._qemu_id)
         self._qemu_opt = {}
         # Default 1 CPU.
         self._qemu_opt['smp'] = '-smp 1,sockets=1,cores=1,threads=1'
@@ -41,11 +43,11 @@ class QemuUtils(object):
         # management interface.
         self._qemu_opt['options'] = '-cpu host -daemonize -enable-kvm ' \
             '-machine pc,accel=kvm,usb=off,mem-merge=off ' \
-            '-net nic,macaddr=52:54:00:00:00:{0:02x} -balloon none'\
+            '-net nic,macaddr=52:54:00:00:{0:02x}:ff -balloon none'\
             .format(self._qemu_id)
-        self._qemu_opt['ssh_fwd_port'] = 10022
+        self._qemu_opt['ssh_fwd_port'] = 10021 + qemu_id
         # Default serial console port
-        self._qemu_opt['serial_port'] = 4556
+        self._qemu_opt['serial_port'] = 4555 + qemu_id
         # Default 512MB virtual RAM
         self._qemu_opt['mem_size'] = 512
         # Default huge page mount point, required for Vhost-user interfaces.
@@ -69,8 +71,16 @@ class QemuUtils(object):
         self._node = None
         self._socks = [self._qmp_sock, self._qga_sock]
 
+    def qemu_set_bin(self, path):
+        """Set binary path for QEMU.
+
+        :param path: Absolute path in filesystem.
+        :type path: str
+        """
+        self._qemu_bin = path
+
     def qemu_set_smp(self, cpus, cores, threads, sockets):
-        """Set SMP option for QEMU
+        """Set SMP option for QEMU.
 
         :param cpus: Number of CPUs.
         :param cores: Number of CPU cores on one socket.
@@ -197,8 +207,8 @@ class QemuUtils(object):
             chardev += ',server'
         self._qemu_opt['options'] += chardev
         # Create Vhost-user network backend.
-        netdev = ' -netdev vhost-user,id=vhost{0},chardev=char{0},'\
-            'queues={1}'.format(self._vhost_id, self._qemu_opt['queues'])
+        netdev = (' -netdev vhost-user,id=vhost{0},chardev=char{0},queues={1}'
+                  .format(self._vhost_id, self._qemu_opt['queues']))
         self._qemu_opt['options'] += netdev
         # If MAC is not specified use auto-generated MAC address based on
         # template 52:54:00:00:<qemu_id>:<vhost_id>, e.g. vhost1 MAC of QEMU
@@ -290,7 +300,8 @@ class QemuUtils(object):
     def _wait_until_vm_boot(self, timeout=60):
         """Wait until QEMU VM is booted.
 
-        Ping QEMU guest agent each 5s until VM booted or timeout.
+        First try to flush qga until there is output.
+        Then ping QEMU guest agent each 5s until VM booted or timeout.
 
         :param timeout: Waiting timeout in seconds (optional, default 60s).
         :type timeout: int
@@ -302,7 +313,20 @@ class QemuUtils(object):
                     self._qemu_opt['disk_image'], self._node['host']))
             out = None
             try:
-                self._qemu_qga_flush()
+                out = self._qemu_qga_flush()
+            except ValueError:
+                logger.trace('QGA qga flush unexpected output {}'.format(out))
+            # Empty output - VM not booted yet
+            if not out:
+                sleep(5)
+            else:
+                break
+        while True:
+            if time() - start > timeout:
+                raise RuntimeError('timeout, VM {0} not booted on {1}'.format(
+                    self._qemu_opt['disk_image'], self._node['host']))
+            out = None
+            try:
                 out = self._qemu_qga_exec('guest-ping')
             except ValueError:
                 logger.trace('QGA guest-ping unexpected output {}'.format(out))
@@ -517,25 +541,26 @@ class QemuUtils(object):
             '-device isa-serial,chardev=qga0'.format(self._qga_sock)
         # Graphic setup
         graphic = '-monitor none -display none -vga none'
+        # PID file
+        pid = '-pidfile {}'.format(self._pid_file)
 
         # Run QEMU
-        cmd = '{0} {1} {2} {3} {4} {5} {6} {7} {8} {9}'.format(
-            self.__QEMU_BIN, self._qemu_opt.get('smp'), mem, ssh_fwd,
+        cmd = '{0} {1} {2} {3} {4} {5} {6} {7} {8} {9} {10}'.format(
+            self._qemu_bin, self._qemu_opt.get('smp'), mem, ssh_fwd,
             self._qemu_opt.get('options'),
-            drive, qmp, serial, qga, graphic)
-        (ret_code, _, stderr) = self._ssh.exec_command_sudo(cmd, timeout=300)
-        if int(ret_code) != 0:
-            logger.debug('QEMU start failed {0}'.format(stderr))
-            raise RuntimeError('QEMU start failed on {0}'.format(
-                self._node['host']))
-        logger.trace('QEMU running')
-        # Wait until VM boot
+            drive, qmp, serial, qga, graphic, pid)
         try:
+            (ret_code, _, _) = self._ssh.exec_command_sudo(cmd, timeout=300)
+            if int(ret_code) != 0:
+                raise RuntimeError('QEMU start failed on {0}'.format(
+                    self._node['host']))
+            # Wait until VM boot
             self._wait_until_vm_boot()
         except (RuntimeError, SSHTimeout):
-            self.qemu_kill()
+            self.qemu_kill_all()
             self.qemu_clear_socks()
             raise
+        logger.trace('QEMU started successfully.')
         # Update interface names in VM node dict
         self._update_vm_interfaces()
         # Return VM node dict
@@ -570,9 +595,23 @@ class QemuUtils(object):
 
     def qemu_kill(self):
         """Kill qemu process."""
-        # TODO: add PID storage so that we can kill specific PID
         # Note: in QEMU start phase there are 3 QEMU processes because we
         # daemonize QEMU
+        self._ssh.exec_command_sudo('chmod +r {}'.format(self._pid_file))
+        self._ssh.exec_command_sudo('kill -SIGKILL $(cat {})'
+                                    .format(self._pid_file))
+        # Delete PID file
+        cmd = 'rm -f {}'.format(self._pid_file)
+        self._ssh.exec_command_sudo(cmd)
+
+    def qemu_kill_all(self, node=None):
+        """Kill all qemu processes on DUT node if specified.
+
+        :param node: Node to kill all QEMU processes on.
+        :type node: dict
+        """
+        if node:
+            self.qemu_set_node(node)
         self._ssh.exec_command_sudo('pkill -SIGKILL qemu')
 
     def qemu_clear_socks(self):
@@ -620,19 +659,31 @@ class QemuUtils(object):
                 'error: {1}'.format(self._node['host'], json.dumps(err)))
 
     @staticmethod
-    def build_qemu(node):
+    def build_qemu(node, force_install=False, apply_patch=False):
         """Build QEMU from sources.
 
         :param node: Node to build QEMU on.
+        :param force_install: If True, then remove previous build.
+        :param apply_patch: If True, then apply patches from qemu_patches dir.
         :type node: dict
+        :type force_install: bool
+        :type apply_patch: bool
+        :raises: RuntimeError if building QEMU failed.
         """
         ssh = SSH()
         ssh.connect(node)
 
+        directory = ' --directory={0}'.format(Constants.QEMU_INSTALL_DIR)
+        version = ' --version={0}'.format(Constants.QEMU_INSTALL_VERSION)
+        force = ' --force' if force_install else ''
+        patch = ' --patch' if apply_patch else ''
+
         (ret_code, stdout, stderr) = \
-            ssh.exec_command('sudo -Sn bash {0}/{1}/qemu_build.sh'.format(
-                Constants.REMOTE_FW_DIR, Constants.RESOURCES_LIB_SH), 1000)
-        logger.trace(stdout)
+            ssh.exec_command(
+                "sudo -E sh -c '{0}/{1}/qemu_build.sh{2}{3}{4}{5}'"\
+                .format(Constants.REMOTE_FW_DIR, Constants.RESOURCES_LIB_SH,
+                        version, directory, force, patch), 1000)
+
         if int(ret_code) != 0:
-            logger.debug('QEMU build failed {0}'.format(stderr))
+            logger.debug('QEMU build failed {0}'.format(stdout + stderr))
             raise RuntimeError('QEMU build failed on {0}'.format(node['host']))