QemuUtil lib change to work with ubuntu and centos
[csit.git] / resources / libraries / python / QemuUtils.py
index aa149da..acc00de 100644 (file)
 
 from time import time, sleep
 import json
-import re
 
 from robot.api import logger
 
-from resources.libraries.python.ssh import SSH
+from resources.libraries.python.ssh import SSH, SSHTimeout
 from resources.libraries.python.constants import Constants
 from resources.libraries.python.topology import NodeType
 
@@ -27,7 +26,7 @@ from resources.libraries.python.topology import NodeType
 class QemuUtils(object):
     """QEMU utilities."""
 
-    __QEMU_BIN = '/opt/qemu/bin/qemu-system-x86_64'
+    __QEMU_BIN = '/usr/bin/qemu-system-x86_64'
     # QEMU Machine Protocol socket
     __QMP_SOCK = '/tmp/qmp.sock'
     # QEMU Guest Agent socket
@@ -40,7 +39,7 @@ class QemuUtils(object):
         # Daemonize the QEMU process after initialization. Default one
         # management interface.
         self._qemu_opt['options'] = '-cpu host -daemonize -enable-kvm ' \
-            '-machine pc-1.0,accel=kvm,usb=off,mem-merge=off ' \
+            '-machine pc,accel=kvm,usb=off,mem-merge=off ' \
             '-net nic,macaddr=52:54:00:00:02:01 -balloon none'
         self._qemu_opt['ssh_fwd_port'] = 10022
         # Default serial console port
@@ -61,6 +60,8 @@ class QemuUtils(object):
             'password': 'cisco',
             'interfaces': {},
         }
+        # Virtio queue count
+        self._qemu_opt['queues'] = 1
         self._vhost_id = 0
         self._ssh = None
         self._node = None
@@ -141,14 +142,29 @@ class QemuUtils(object):
             raise ValueError('Host CPU count must match Qemu Thread count')
 
         for qemu_cpu, host_cpu in zip(qemu_cpus, host_cpus):
-            cmd = 'taskset -p {0} {1}'.format(hex(1 << int(host_cpu)),
-                                              qemu_cpu['thread_id'])
+            cmd = 'taskset -pc {0} {1}'.format(host_cpu, qemu_cpu['thread_id'])
             (ret_code, _, stderr) = self._ssh.exec_command_sudo(cmd)
             if int(ret_code) != 0:
                 logger.debug('Set affinity failed {0}'.format(stderr))
                 raise RuntimeError('Set affinity failed on {0}'.format(
                     self._node['host']))
 
+    def qemu_set_scheduler_policy(self):
+        """Set scheduler policy to SCHED_RR with priority 1 for all Qemu CPU
+        processes.
+
+       :raises RuntimeError: Set scheduler policy failed.
+        """
+        qemu_cpus = self._qemu_qmp_exec('query-cpus')['return']
+
+        for qemu_cpu in qemu_cpus:
+            cmd = 'chrt -r -p 1 {0}'.format(qemu_cpu['thread_id'])
+            (ret_code, _, stderr) = self._ssh.exec_command_sudo(cmd)
+            if int(ret_code) != 0:
+                logger.debug('Set SCHED_RR failed {0}'.format(stderr))
+                raise RuntimeError('Set SCHED_RR failed on {0}'.format(
+                    self._node['host']))
+
     def qemu_set_node(self, node):
         """Set node to run QEMU on.
 
@@ -179,15 +195,15 @@ class QemuUtils(object):
             chardev += ',server'
         self._qemu_opt['options'] += chardev
         # Create Vhost-user network backend.
-        netdev = ' -netdev vhost-user,id=vhost{0},chardev=char{0}'.format(
-            self._vhost_id)
+        netdev = ' -netdev vhost-user,id=vhost{0},chardev=char{0},'\
+            'queues={1}'.format(self._vhost_id, self._qemu_opt['queues'])
         self._qemu_opt['options'] += netdev
         # If MAC is not specified use autogenerated 52:54:00:00:04:<vhost_id>
         # e.g. vhost1 MAC is 52:54:00:00:04:01
         if mac is None:
             mac = '52:54:00:00:04:{0:02x}'.format(self._vhost_id)
-        extend_options = 'csum=off,gso=off,guest_tso4=off,guest_tso6=off,'\
-            'guest_ecn=off,mrg_rxbuf=off'
+        extend_options = 'mq=on,csum=off,gso=off,guest_tso4=off,'\
+            'guest_tso6=off,guest_ecn=off,mrg_rxbuf=off'
         # Create Virtio network device.
         device = ' -device virtio-net-pci,netdev=vhost{0},mac={1},{2}'.format(
             self._vhost_id, mac, extend_options)
@@ -211,9 +227,10 @@ class QemuUtils(object):
             response will contain the "error" keyword instead of "return".
         """
         # To enter command mode, the qmp_capabilities command must be issued.
-        qmp_cmd = 'echo "{ \\"execute\\": \\"qmp_capabilities\\" }' + \
-            '{ \\"execute\\": \\"' + cmd + '\\" }" | sudo -S nc -U ' + \
-            self.__QMP_SOCK
+        qmp_cmd = 'echo "{ \\"execute\\": \\"qmp_capabilities\\" }' \
+                  '{ \\"execute\\": \\"' + cmd + \
+                  '\\" }" | sudo -S socat - UNIX-CONNECT:' + self.__QMP_SOCK
+
         (ret_code, stdout, stderr) = self._ssh.exec_command(qmp_cmd)
         if int(ret_code) != 0:
             logger.debug('QMP execute failed {0}'.format(stderr))
@@ -230,13 +247,15 @@ class QemuUtils(object):
     def _qemu_qga_flush(self):
         """Flush the QGA parser state
         """
-        qga_cmd = 'printf "\xFF" | sudo -S nc ' \
-            '-q 1 -U ' + self.__QGA_SOCK
+        qga_cmd = '(printf "\xFF"; sleep 1) | sudo -S socat - UNIX-CONNECT:' + \
+                  self.__QGA_SOCK
+        #TODO: probably need something else
         (ret_code, stdout, stderr) = self._ssh.exec_command(qga_cmd)
         if int(ret_code) != 0:
             logger.debug('QGA execute failed {0}'.format(stderr))
             raise RuntimeError('QGA execute "{0}" '
-                               'failed on {1}'.format(cmd, self._node['host']))
+                               'failed on {1}'.format(qga_cmd,
+                                                      self._node['host']))
         logger.trace(stdout)
         if not stdout:
             return {}
@@ -250,8 +269,10 @@ class QemuUtils(object):
         :param cmd: QGA command to execute.
         :type cmd: str
         """
-        qga_cmd = 'echo "{ \\"execute\\": \\"' + cmd + '\\" }" | sudo -S nc ' \
-            '-q 1 -U ' + self.__QGA_SOCK
+        qga_cmd = '(echo "{ \\"execute\\": \\"' + \
+                  cmd + \
+                  '\\" }"; sleep 1) | sudo -S socat - UNIX-CONNECT:' + \
+                  self.__QGA_SOCK
         (ret_code, stdout, stderr) = self._ssh.exec_command(qga_cmd)
         if int(ret_code) != 0:
             logger.debug('QGA execute failed {0}'.format(stderr))
@@ -262,21 +283,24 @@ class QemuUtils(object):
             return {}
         return json.loads(stdout.split('\n', 1)[0])
 
-    def _wait_until_vm_boot(self, timeout=300):
+    def _wait_until_vm_boot(self, timeout=60):
         """Wait until QEMU VM is booted.
 
         Ping QEMU guest agent each 5s until VM booted or timeout.
 
-        :param timeout: Waiting timeout in seconds (optional, default 300s).
+        :param timeout: Waiting timeout in seconds (optional, default 60s).
         :type timeout: int
         """
         start = time()
-        while 1:
+        while True:
             if time() - start > timeout:
                 raise RuntimeError('timeout, VM {0} not booted on {1}'.format(
                     self._qemu_opt['disk_image'], self._node['host']))
-            self._qemu_qga_flush()
-            out = self._qemu_qga_exec('guest-ping')
+            try:
+                self._qemu_qga_flush()
+                out = self._qemu_qga_exec('guest-ping')
+            except ValueError:
+                logger.trace('QGA guest-ping unexpected output {}'.format(out))
             # Empty output - VM not booted yet
             if not out:
                 sleep(5)
@@ -287,8 +311,10 @@ class QemuUtils(object):
             elif out.get('error') is not None:
                 sleep(5)
             else:
-                raise RuntimeError('QGA guest-ping unexpected output {}'.format(
-                    out))
+                # If there is an unexpected output from QGA guest-info, try
+                # again until timeout.
+                logger.trace('QGA guest-ping unexpected output {}'.format(out))
+
         logger.trace('VM {0} booted on {1}'.format(self._qemu_opt['disk_image'],
                                                    self._node['host']))
 
@@ -321,17 +347,12 @@ class QemuUtils(object):
         """Huge page check."""
         huge_mnt = self._qemu_opt.get('huge_mnt')
         mem_size = self._qemu_opt.get('mem_size')
-        # Check size of free huge pages
-        (_, output, _) = self._ssh.exec_command('grep Huge /proc/meminfo')
-        regex = re.compile(r'HugePages_Free:\s+(\d+)')
-        match = regex.search(output)
-        huge_free = int(match.group(1))
-        regex = re.compile(r'HugePages_Total:\s+(\d+)')
-        match = regex.search(output)
-        huge_total = int(match.group(1))
-        regex = re.compile(r'Hugepagesize:\s+(\d+)')
-        match = regex.search(output)
-        huge_size = int(match.group(1))
+
+        # Get huge pages information
+        huge_size = self._get_huge_page_size()
+        huge_free = self._get_huge_page_free(huge_size)
+        huge_total = self._get_huge_page_total(huge_size)
+
         # Check if memory reqested by qemu is available on host
         if (mem_size * 1024) > (huge_free * huge_size):
             # If we want to allocate hugepage dynamically
@@ -383,6 +404,80 @@ class QemuUtils(object):
                 raise RuntimeError('Mount huge pages failed on {0}'.format(
                     self._node['host']))
 
+    def _get_huge_page_size(self):
+        """Get default size of huge pages in system.
+
+        :returns: Default size of free huge pages in system.
+        :rtype: int
+        :raises: RuntimeError if reading failed for three times.
+        """
+        # TODO: remove to dedicated library
+        cmd_huge_size = "grep Hugepagesize /proc/meminfo | awk '{ print $2 }'"
+        for _ in range(3):
+            (ret, out, _) = self._ssh.exec_command_sudo(cmd_huge_size)
+            if ret == 0:
+                try:
+                    huge_size = int(out)
+                except ValueError:
+                    logger.trace('Reading huge page size information failed')
+                else:
+                    break
+        else:
+            raise RuntimeError('Getting huge page size information failed.')
+        return huge_size
+
+    def _get_huge_page_free(self, huge_size):
+        """Get total number of huge pages in system.
+
+        :param huge_size: Size of hugepages.
+        :type huge_size: int
+        :returns: Number of free huge pages in system.
+        :rtype: int
+        :raises: RuntimeError if reading failed for three times.
+        """
+        # TODO: add numa aware option
+        # TODO: remove to dedicated library
+        cmd_huge_free = 'cat /sys/kernel/mm/hugepages/hugepages-{0}kB/'\
+            'free_hugepages'.format(huge_size)
+        for _ in range(3):
+            (ret, out, _) = self._ssh.exec_command_sudo(cmd_huge_free)
+            if ret == 0:
+                try:
+                    huge_free = int(out)
+                except ValueError:
+                    logger.trace('Reading free huge pages information failed')
+                else:
+                    break
+        else:
+            raise RuntimeError('Getting free huge pages information failed.')
+        return huge_free
+
+    def _get_huge_page_total(self, huge_size):
+        """Get total number of huge pages in system.
+
+        :param huge_size: Size of hugepages.
+        :type huge_size: int
+        :returns: Total number of huge pages in system.
+        :rtype: int
+        :raises: RuntimeError if reading failed for three times.
+        """
+        # TODO: add numa aware option
+        # TODO: remove to dedicated library
+        cmd_huge_total = 'cat /sys/kernel/mm/hugepages/hugepages-{0}kB/'\
+            'nr_hugepages'.format(huge_size)
+        for _ in range(3):
+            (ret, out, _) = self._ssh.exec_command_sudo(cmd_huge_total)
+            if ret == 0:
+                try:
+                    huge_total = int(out)
+                except ValueError:
+                    logger.trace('Reading total huge pages information failed')
+                else:
+                    break
+        else:
+            raise RuntimeError('Getting total huge pages information failed.')
+        return huge_total
+
     def qemu_start(self):
         """Start QEMU and wait until VM boot.
 
@@ -403,6 +498,9 @@ class QemuUtils(object):
         # If 'huge_allocate' is set to true try to allocate as well.
         self._huge_page_check(allocate=self._qemu_opt.get('huge_allocate'))
 
+        # Disk option
+        drive = '-drive file={},format=raw,cache=none,if=virtio'.format(
+            self._qemu_opt.get('disk_image'))
         # Setup QMP via unix socket
         qmp = '-qmp unix:{0},server,nowait'.format(self.__QMP_SOCK)
         # Setup serial console
@@ -414,11 +512,12 @@ class QemuUtils(object):
             '-device isa-serial,chardev=qga0'
         # Graphic setup
         graphic = '-monitor none -display none -vga none'
+
         # Run QEMU
-        cmd = '{0} {1} {2} {3} {4} -hda {5} {6} {7} {8} {9}'.format(
+        cmd = '{0} {1} {2} {3} {4} {5} {6} {7} {8} {9}'.format(
             self.__QEMU_BIN, self._qemu_opt.get('smp'), mem, ssh_fwd,
             self._qemu_opt.get('options'),
-            self._qemu_opt.get('disk_image'), qmp, serial, qga, graphic)
+            drive, qmp, serial, qga, graphic)
         (ret_code, _, stderr) = self._ssh.exec_command_sudo(cmd, timeout=300)
         if int(ret_code) != 0:
             logger.debug('QEMU start failed {0}'.format(stderr))
@@ -426,7 +525,12 @@ class QemuUtils(object):
                 self._node['host']))
         logger.trace('QEMU running')
         # Wait until VM boot
-        self._wait_until_vm_boot()
+        try:
+            self._wait_until_vm_boot()
+        except (RuntimeError, SSHTimeout):
+            self.qemu_kill()
+            self.qemu_clear_socks()
+            raise
         # Update interface names in VM node dict
         self._update_vm_interfaces()
         # Return VM node dict