HC Test: increase timeout for archiving HC log file
[csit.git] / resources / libraries / python / honeycomb / HoneycombSetup.py
index 4c438ff..b5e964d 100644 (file)
@@ -13,6 +13,9 @@
 
 """Implementation of keywords for Honeycomb setup."""
 
+from json import loads
+from time import time, sleep
+
 from ipaddress import IPv6Address, AddressValueError
 
 from robot.api import logger
@@ -134,106 +137,104 @@ class HoneycombSetup(object):
                     node['host']))
 
     @staticmethod
-    def check_honeycomb_startup_state(*nodes):
-        """Check state of Honeycomb service during startup on specified nodes.
-
-        Reads html path from template file oper_vpp_version.url.
-
-        Honeycomb nodes reply with connection refused or the following status
-        codes depending on startup progress: codes 200, 401, 403, 404, 500, 503
+    def check_honeycomb_startup_state(node, timeout=360, retries=20,
+                                      interval=15):
+        """Repeatedly check the status of Honeycomb startup until it is fully
+        started or until timeout or max retries is reached.
 
-        :param nodes: List of DUT nodes starting Honeycomb.
-        :type nodes: list
-        :return: True if all GETs returned code 200(OK).
-        :rtype bool
-        """
-        path = HcUtil.read_path_from_url_file("oper_vpp_version")
-        expected_status_codes = (HTTPCodes.UNAUTHORIZED,
-                                 HTTPCodes.FORBIDDEN,
-                                 HTTPCodes.NOT_FOUND,
-                                 HTTPCodes.SERVICE_UNAVAILABLE,
-                                 HTTPCodes.INTERNAL_SERVER_ERROR)
+        :param node: Honeycomb node.
+        :param timeout: Timeout value in seconds.
+        :param retries: Max number of retries.
+        :param interval: Interval between checks, in seconds.
+        :type node: dict
+        :type timeout: int
+        :type retries: int
+        :type interval: int
+        :raises HoneycombError: If the Honeycomb process IP cannot be found,
+        or if timeout or number of retries is exceeded."""
 
-        for node in nodes:
-            if node['type'] == NodeType.DUT:
-                HoneycombSetup.print_ports(node)
-                try:
-                    status_code, _ = HTTPRequest.get(node, path,
-                                                     enable_logging=False)
-                except HTTPRequestError:
-                    ssh = SSH()
-                    ssh.connect(node)
-                    ret_code, _, _ = ssh.exec_command_sudo(
-                        "tail -n 100 /var/log/syslog")
-                    if ret_code != 0:
-                        # It's probably Centos
-                        ssh.exec_command_sudo("tail -n 100 /var/log/messages")
-                    raise
-                if status_code == HTTPCodes.OK:
-                    logger.info("Honeycomb on node {0} is up and running".
-                                format(node['host']))
-                elif status_code in expected_status_codes:
-                    if status_code == HTTPCodes.UNAUTHORIZED:
-                        logger.info('Unauthorized. If this triggers keyword '
-                                    'timeout, verify Honeycomb username and '
-                                    'password.')
-                    raise HoneycombError('Honeycomb on node {0} running but '
-                                         'not yet ready.'.format(node['host']),
-                                         enable_logging=False)
-                else:
-                    raise HoneycombError('Unexpected return code: {0}.'.
-                                         format(status_code))
-
-                status_code, _ = HcUtil.get_honeycomb_data(
-                    node, "config_vpp_interfaces")
-                if status_code != HTTPCodes.OK:
-                    raise HoneycombError('Honeycomb on node {0} running but '
-                                         'not yet ready.'.format(node['host']),
-                                         enable_logging=False)
-        return True
+        ssh = SSH()
+        ssh.connect(node)
+        ret_code, pid, _ = ssh.exec_command("pgrep honeycomb")
+        if ret_code != 0:
+            raise HoneycombError("No process named 'honeycomb' found.")
+
+        pid = int(pid)
+        count = 0
+        start = time()
+        while time() - start < timeout and count < retries:
+            count += 1
+            ret_code, _, _ = ssh.exec_command(
+                " | ".join([
+                    "sudo tail -n 1000 /var/log/syslog",
+                    "grep {pid}".format(pid=pid),
+                    "grep 'Honeycomb started successfully!'"])
+            )
+            if ret_code != 0:
+                logger.debug(
+                    "Attempt #{count} failed on log check.".format(
+                        count=count))
+                sleep(interval)
+                continue
+            status_code_version, _ = HcUtil.get_honeycomb_data(
+                node, "oper_vpp_version")
+            status_code_if_cfg, _ = HcUtil.get_honeycomb_data(
+                node, "config_vpp_interfaces")
+            status_code_if_oper, _ = HcUtil.get_honeycomb_data(
+                node, "oper_vpp_interfaces")
+            if status_code_if_cfg == HTTPCodes.OK\
+                    and status_code_if_cfg == HTTPCodes.OK\
+                    and status_code_if_oper == HTTPCodes.OK:
+                logger.info("Check successful, Honeycomb is up and running.")
+                break
+            else:
+                logger.debug(
+                    "Attempt ${count} failed on Restconf check. Status codes:\n"
+                    "Version: {version}\n"
+                    "Interface config: {if_cfg}\n"
+                    "Interface operational: {if_oper}".format(
+                        count=count,
+                        version=status_code_version,
+                        if_cfg=status_code_if_cfg,
+                        if_oper=status_code_if_oper))
+                sleep(interval)
+                continue
+        else:
+            _, vpp_status, _ = ssh.exec_command("service vpp status")
+            ret_code, hc_log, _ = ssh.exec_command(
+                " | ".join([
+                    "sudo tail -n 1000 /var/log/syslog",
+                    "grep {pid}".format(pid=pid)]))
+            raise HoneycombError(
+                "Timeout or max retries exceeded. Status of VPP:\n"
+                "{vpp_status}\n"
+                "Syslog entries filtered by Honeycomb's pid:\n"
+                "{hc_log}".format(vpp_status=vpp_status, hc_log=hc_log))
 
     @staticmethod
-    def check_honeycomb_shutdown_state(*nodes):
+    def check_honeycomb_shutdown_state(node):
         """Check state of Honeycomb service during shutdown on specified nodes.
 
         Honeycomb nodes reply with connection refused or the following status
         codes depending on shutdown progress: codes 200, 404.
 
-        :param nodes: List of DUT nodes stopping Honeycomb.
-        :type nodes: list
+        :param node: List of DUT nodes stopping Honeycomb.
+        :type node: dict
         :return: True if all GETs fail to connect.
         :rtype bool
         """
-        cmd = "ps -ef | grep -v grep | grep honeycomb"
-        for node in nodes:
-            if node['type'] == NodeType.DUT:
-                try:
-                    status_code, _ = HTTPRequest.get(node, '/index.html',
-                                                     enable_logging=False)
-                    if status_code == HTTPCodes.OK:
-                        raise HoneycombError('Honeycomb on node {0} is still '
-                                             'running.'.format(node['host']),
-                                             enable_logging=False)
-                    elif status_code == HTTPCodes.NOT_FOUND:
-                        raise HoneycombError('Honeycomb on node {0} is shutting'
-                                             ' down.'.format(node['host']),
-                                             enable_logging=False)
-                    else:
-                        raise HoneycombError('Unexpected return code: {0}.'.
-                                             format(status_code))
-                except HTTPRequestError:
-                    logger.debug('Connection refused, checking the process '
-                                 'state ...')
-                    ssh = SSH()
-                    ssh.connect(node)
-                    (ret_code, _, _) = ssh.exec_command_sudo(cmd)
-                    if ret_code == 0:
-                        raise HoneycombError('Honeycomb on node {0} is still '
-                                             'running.'.format(node['host']),
-                                             enable_logging=False)
-                    else:
-                        logger.info("Honeycomb on node {0} has stopped".
-                                    format(node['host']))
+        cmd = "pgrep honeycomb"
+
+        ssh = SSH()
+        ssh.connect(node)
+        (ret_code, _, _) = ssh.exec_command_sudo(cmd)
+        if ret_code == 0:
+            raise HoneycombError('Honeycomb on node {0} is still '
+                                 'running.'.format(node['host']),
+                                 enable_logging=False)
+        else:
+            logger.info("Honeycomb on node {0} has stopped".
+                        format(node['host']))
         return True
 
     @staticmethod
@@ -255,7 +256,7 @@ class HoneycombSetup(object):
             replace = '\\"restconf-binding-address\\": \\"0.0.0.0\\",'
 
         argument = '"/{0}/c\\ {1}"'.format(find, replace)
-        path = "{0}/config/honeycomb.json".format(Const.REMOTE_HC_DIR)
+        path = "{0}/config/restconf.json".format(Const.REMOTE_HC_DIR)
         command = "sed -i {0} {1}".format(argument, path)
 
         ssh = SSH()
@@ -461,10 +462,11 @@ class HoneycombSetup(object):
         ssh = SSH()
         ssh.connect(node)
 
-        cmd = "cp -r {src}/*karaf_{odl_name}* {dst}".format(
-            src=src_path, odl_name=odl_name, dst=dst_path)
+        cmd = "sudo rm -rf {dst}/*karaf_{odl_name} && " \
+              "cp -r {src}/*karaf_{odl_name}* {dst}".format(
+                  src=src_path, odl_name=odl_name, dst=dst_path)
 
-        ret_code, _, _ = ssh.exec_command(cmd, timeout=60)
+        ret_code, _, _ = ssh.exec_command_sudo(cmd, timeout=180)
         if int(ret_code) != 0:
             raise HoneycombError(
                 "Failed to copy ODL client on node {0}".format(node["host"]))
@@ -512,11 +514,13 @@ class HoneycombSetup(object):
         ssh.connect(node)
 
         cmd = "{path}/*karaf*/bin/client -u karaf feature:install " \
-              "odl-restconf-all odl-netconf-connector-all".format(path=path)
+              "odl-restconf-all " \
+              "odl-netconf-connector-all " \
+              "odl-netconf-topology".format(path=path)
         for feature in features:
             cmd += " {0}".format(feature)
 
-        ret_code, _, _ = ssh.exec_command_sudo(cmd, timeout=120)
+        ret_code, _, _ = ssh.exec_command_sudo(cmd, timeout=250)
 
         if int(ret_code) != 0:
             raise HoneycombError("Feature install did not succeed.")
@@ -573,8 +577,7 @@ class HoneycombSetup(object):
             "odl_client/odl_netconf_connector")
 
         try:
-            status_code, _ = HTTPRequest.get(node, path, timeout=10,
-                                             enable_logging=False)
+            HTTPRequest.get(node, path, timeout=10, enable_logging=False)
             raise HoneycombError("ODL client is still running.")
         except HTTPRequestError:
             logger.debug("Connection refused, checking process state....")
@@ -599,14 +602,21 @@ class HoneycombSetup(object):
             "odl_client/odl_netconf_connector")
 
         url_file = "{0}/{1}".format(Const.RESOURCES_TPL_HC,
-                                    "odl_client/mount_honeycomb.xml")
+                                    "odl_client/mount_honeycomb.json")
 
         with open(url_file) as template:
             data = template.read()
 
+        data = loads(data)
+
         status_code, _ = HTTPRequest.post(
-            node, path, headers={"Content-Type": "application/xml"},
-            payload=data, timeout=10, enable_logging=False)
+            node,
+            path,
+            headers={"Content-Type": "application/json",
+                     "Accept": "text/plain"},
+            json=data,
+            timeout=10,
+            enable_logging=False)
 
         if status_code == HTTPCodes.OK:
             logger.info("ODL mount point configured successfully.")
@@ -660,6 +670,7 @@ class HoneycombSetup(object):
         if int(ret_code) != 0:
             logger.debug("VPP service refused to shut down.")
 
+
 class HoneycombStartupConfig(object):
     """Generator for Honeycomb startup configuration.
     """
@@ -682,7 +693,7 @@ class HoneycombStartupConfig(object):
         done
         """
 
-        self.java_call = "{scheduler} {affinity} java {jit_mode} {params}"
+        self.java_call = "{scheduler} {affinity} java{jit_mode}{params}"
 
         self.scheduler = ""
         self.core_affinity = ""
@@ -714,8 +725,8 @@ class HoneycombStartupConfig(object):
         self.ssh.connect(node)
         cmd = "echo '{config}' > /tmp/honeycomb " \
               "&& chmod +x /tmp/honeycomb " \
-              "&& sudo mv -f /tmp/honeycomb /opt/honeycomb".format(
-                config=self.config)
+              "&& sudo mv -f /tmp/honeycomb /opt/honeycomb".\
+            format(config=self.config)
         self.ssh.exec_command(cmd)
 
     def set_cpu_scheduler(self, scheduler="FIFO"):
@@ -752,9 +763,9 @@ class HoneycombStartupConfig(object):
         :type jit_mode: str
         """
 
-        modes = {"client": "-client",  # Default
-                 "server": "-server",  # Higher performance but longer warmup
-                 "classic": "-classic"  # Disables JIT compiler
+        modes = {"client": " -client",  # Default
+                 "server": " -server",  # Higher performance but longer warmup
+                 "classic": " -classic"  # Disables JIT compiler
                 }
 
         self.jit_mode = modes[jit_mode]
@@ -791,3 +802,10 @@ class HoneycombStartupConfig(object):
         architectures."""
 
         self.params += " -XX:+UseNUMA -XX:+UseParallelGC"
+
+    def set_ssh_security_provider(self):
+        """Disables BouncyCastle for SSHD."""
+        # Workaround for issue described in:
+        # https://wiki.fd.io/view/Honeycomb/Releases/1609/Honeycomb_and_ODL
+
+        self.params += " -Dorg.apache.sshd.registerBouncyCastle=false"