Update vpp-agent version
[csit.git] / resources / libraries / python / KubernetesUtils.py
index 5faa056..89c5bd2 100644 (file)
@@ -13,8 +13,7 @@
 
 """Library to control Kubernetes kubectl."""
 
-import time
-import yaml
+from time import sleep
 
 from resources.libraries.python.constants import Constants
 from resources.libraries.python.topology import NodeType
@@ -24,6 +23,8 @@ from resources.libraries.python.VppConfigGenerator import VppConfigGenerator
 
 __all__ = ["KubernetesUtils"]
 
+# Maximum number of retries to check if PODs are running or deleted.
+MAX_RETRY = 48
 
 class KubernetesUtils(object):
     """Kubernetes utilities class."""
@@ -43,13 +44,17 @@ class KubernetesUtils(object):
         ssh = SSH()
         ssh.connect(node)
 
-        cmd = '{dir}/{lib}/k8s_setup.sh '.format(dir=Constants.REMOTE_FW_DIR,
-                                                 lib=Constants.RESOURCES_LIB_SH)
-        (ret_code, _, _) = ssh.exec_command(cmd, timeout=120)
+        cmd = '{dir}/{lib}/k8s_setup.sh deploy_calico'\
+            .format(dir=Constants.REMOTE_FW_DIR,
+                    lib=Constants.RESOURCES_LIB_SH)
+        (ret_code, _, _) = ssh.exec_command(cmd, timeout=240)
         if int(ret_code) != 0:
             raise RuntimeError('Failed to setup Kubernetes on {node}.'
                                .format(node=node['host']))
 
+        KubernetesUtils.wait_for_kubernetes_pods_on_node(node,
+                                                         nspace='kube-system')
+
     @staticmethod
     def setup_kubernetes_on_all_duts(nodes):
         """Set up Kubernetes on all DUTs.
@@ -61,6 +66,36 @@ class KubernetesUtils(object):
             if node['type'] == NodeType.DUT:
                 KubernetesUtils.setup_kubernetes_on_node(node)
 
+    @staticmethod
+    def destroy_kubernetes_on_node(node):
+        """Destroy Kubernetes on node.
+
+        :param node: DUT node.
+        :type node: dict
+        :raises RuntimeError: If destroying Kubernetes failed.
+        """
+        ssh = SSH()
+        ssh.connect(node)
+
+        cmd = '{dir}/{lib}/k8s_setup.sh destroy'\
+            .format(dir=Constants.REMOTE_FW_DIR,
+                    lib=Constants.RESOURCES_LIB_SH)
+        (ret_code, _, _) = ssh.exec_command(cmd, timeout=120)
+        if int(ret_code) != 0:
+            raise RuntimeError('Failed to destroy Kubernetes on {node}.'
+                               .format(node=node['host']))
+
+    @staticmethod
+    def destroy_kubernetes_on_all_duts(nodes):
+        """Destroy Kubernetes on all DUTs.
+
+        :param nodes: Topology nodes.
+        :type nodes: dict
+        """
+        for node in nodes.values():
+            if node['type'] == NodeType.DUT:
+                KubernetesUtils.destroy_kubernetes_on_node(node)
+
     @staticmethod
     def apply_kubernetes_resource_on_node(node, yaml_file, **kwargs):
         """Apply Kubernetes resource on node.
@@ -76,18 +111,15 @@ class KubernetesUtils(object):
         ssh = SSH()
         ssh.connect(node)
 
-        stream = file('{tpl}/{yaml}'.format(tpl=Constants.RESOURCES_TPL_K8S,
-                                            yaml=yaml_file), 'r')
-
-        for data in yaml.load_all(stream):
+        fqn_file = '{tpl}/{yaml}'.format(tpl=Constants.RESOURCES_TPL_K8S,
+                                         yaml=yaml_file)
+        with open(fqn_file, 'r') as src_file:
+            stream = src_file.read()
             data = reduce(lambda a, kv: a.replace(*kv), kwargs.iteritems(),
-                          yaml.dump(data, default_flow_style=False))
-            # Workaround to avoid using RAW string anotated with | in YAML as
-            # library + bash is misinterpreting spaces.
-            data = data.replace('.conf:\n', '.conf: |\n')
+                          stream)
             cmd = 'cat <<EOF | kubectl apply -f - \n{data}\nEOF'.format(
                 data=data)
-            (ret_code, _, _) = ssh.exec_command_sudo(cmd, timeout=120)
+            (ret_code, _, _) = ssh.exec_command_sudo(cmd)
             if int(ret_code) != 0:
                 raise RuntimeError('Failed to apply Kubernetes template {yaml} '
                                    'on {node}.'.format(yaml=yaml_file,
@@ -111,202 +143,274 @@ class KubernetesUtils(object):
                                                                   **kwargs)
 
     @staticmethod
-    def create_kubernetes_cm_from_file_on_node(node, name, key, src_file):
+    def create_kubernetes_cm_from_file_on_node(node, nspace, name, **kwargs):
         """Create Kubernetes ConfigMap from file on node.
 
         :param node: DUT node.
+        :param nspace: Kubernetes namespace.
         :param name: ConfigMap name.
-        :param key: Key (destination file).
-        :param src_file: Source file.
+        :param kwargs: Named parameters.
         :type node: dict
+        :type nspace: str
         :type name: str
-        :type key: str
-        :type src_file: str
+        :param kwargs: dict
         :raises RuntimeError: If creating Kubernetes ConfigMap failed.
         """
         ssh = SSH()
         ssh.connect(node)
 
-        cmd = 'kubectl create -n csit configmap {name} --from-file={key}='\
-            '{src_file}'.format(name=name, key=key, src_file=src_file)
-        (ret_code, _, _) = ssh.exec_command_sudo(cmd, timeout=120)
+        nspace = '-n {nspace}'.format(nspace=nspace) if nspace else ''
+
+        from_file = '{0}'.format(' '.join('--from-file={0}={1} '\
+            .format(key, kwargs[key]) for key in kwargs))
+
+        cmd = 'kubectl create {nspace} configmap {name} {from_file}'\
+            .format(nspace=nspace, name=name, from_file=from_file)
+        (ret_code, _, _) = ssh.exec_command_sudo(cmd)
         if int(ret_code) != 0:
-            raise RuntimeError('Failed to create Kubernetes ConfigMap {name} '
-                               'on {node}.'.format(name=name,
-                                                   node=node['host']))
+            raise RuntimeError('Failed to create Kubernetes ConfigMap '
+                               'on {node}.'.format(node=node['host']))
 
     @staticmethod
-    def create_kubernetes_cm_from_file_on_all_duts(nodes, name, key, src_file):
+    def create_kubernetes_cm_from_file_on_all_duts(nodes, nspace, name,
+                                                   **kwargs):
         """Create Kubernetes ConfigMap from file on all DUTs.
 
         :param nodes: Topology nodes.
+        :param nspace: Kubernetes namespace.
         :param name: ConfigMap name.
-        :param key: Key (destination file).
-        :param src_file: Source file.
+        :param kwargs: Named parameters.
         :type nodes: dict
+        :type nspace: str
         :type name: str
-        :type key: str
-        :type src_file: str
+        :param kwargs: dict
         """
         for node in nodes.values():
             if node['type'] == NodeType.DUT:
                 KubernetesUtils.create_kubernetes_cm_from_file_on_node(node,
+                                                                       nspace,
                                                                        name,
-                                                                       key,
-                                                                       src_file)
+                                                                       **kwargs)
 
     @staticmethod
-    def delete_kubernetes_resource_on_node(node, rtype='po,cm', name=None):
+    def delete_kubernetes_resource_on_node(node, nspace, name=None,
+                                           rtype='po,cm,deploy,rs,rc,svc'):
         """Delete Kubernetes resource on node.
 
         :param node: DUT node.
+        :param nspace: Kubernetes namespace.
         :param rtype: Kubernetes resource type.
-        :param name: Name of resource.
+        :param name: Name of resource (Default: all).
         :type node: dict
+        :type nspace: str
         :type rtype: str
         :type name: str
-        :raises RuntimeError: If deleting Kubernetes resource failed.
+        :raises RuntimeError: If retrieving or deleting Kubernetes resource
+        failed.
         """
         ssh = SSH()
         ssh.connect(node)
 
         name = '{name}'.format(name=name) if name else '--all'
+        nspace = '-n {nspace}'.format(nspace=nspace) if nspace else ''
 
-        cmd = 'kubectl delete -n csit {rtype} {name}'\
-            .format(rtype=rtype, name=name)
-        (ret_code, _, _) = ssh.exec_command_sudo(cmd, timeout=120)
+        cmd = 'kubectl delete {nspace} {rtype} {name}'\
+            .format(nspace=nspace, rtype=rtype, name=name)
+        (ret_code, _, _) = ssh.exec_command_sudo(cmd)
         if int(ret_code) != 0:
-            raise RuntimeError('Failed to delete Kubernetes resources in CSIT '
-                               'namespace on {node}.'.format(node=node['host']))
+            raise RuntimeError('Failed to delete Kubernetes resources '
+                               'on {node}.'.format(node=node['host']))
 
-        cmd = 'kubectl get -n csit pods --no-headers'
-        for _ in range(24):
-            (ret_code, stdout, _) = ssh.exec_command_sudo(cmd, timeout=120)
-            if int(ret_code) == 0:
-                ready = True
+        cmd = 'kubectl get {nspace} pods -a --no-headers'\
+            .format(nspace=nspace)
+        for _ in range(MAX_RETRY):
+            (ret_code, stdout, stderr) = ssh.exec_command_sudo(cmd)
+            if int(ret_code) != 0:
+                raise RuntimeError('Failed to retrieve Kubernetes resources on '
+                                   '{node}.'.format(node=node['host']))
+            if name == '--all':
+                ready = False
+                for line in stderr.splitlines():
+                    if 'No resources found.' in line:
+                        ready = True
+                if ready:
+                    break
+            else:
+                ready = False
                 for line in stdout.splitlines():
-                    if 'No resources found.' not in line:
+                    try:
+                        state = line.split()[1].split('/')
+                        ready = True if 'Running' in line and\
+                            state == state[::-1] else False
+                        if not ready:
+                            break
+                    except (ValueError, IndexError):
                         ready = False
                 if ready:
                     break
-            time.sleep(5)
+            sleep(5)
         else:
-            raise RuntimeError('Failed to delete Kubernetes resources in CSIT '
-                               'namespace on {node}.'.format(node=node['host']))
+            raise RuntimeError('Failed to delete Kubernetes resources on '
+                               '{node}.'.format(node=node['host']))
 
     @staticmethod
-    def delete_kubernetes_resource_on_all_duts(nodes, rtype='po,cm', name=None):
+    def delete_kubernetes_resource_on_all_duts(nodes, nspace, name=None,
+                                               rtype='po,cm,deploy,rs,rc,svc'):
         """Delete all Kubernetes resource on all DUTs.
 
         :param nodes: Topology nodes.
+        :param nspace: Kubernetes namespace.
         :param rtype: Kubernetes resource type.
         :param name: Name of resource.
         :type nodes: dict
+        :type nspace: str
         :type rtype: str
         :type name: str
         """
         for node in nodes.values():
             if node['type'] == NodeType.DUT:
-                KubernetesUtils.delete_kubernetes_resource_on_node(node, rtype,
-                                                                   name)
+                KubernetesUtils.delete_kubernetes_resource_on_node(node, nspace,
+                                                                   name, rtype)
 
     @staticmethod
-    def describe_kubernetes_resource_on_node(node, rtype='po,cm'):
-        """Describe Kubernetes resource on node.
+    def describe_kubernetes_resource_on_node(node, nspace):
+        """Describe all Kubernetes PODs in namespace on node.
 
         :param node: DUT node.
-        :param rtype: Kubernetes resource type.
+        :param nspace: Kubernetes namespace.
         :type node: dict
-        :type rtype: str
-        :raises RuntimeError: If describing Kubernetes resource failed.
+        :type nspace: str
         """
         ssh = SSH()
         ssh.connect(node)
 
-        cmd = 'kubectl describe -n csit {rtype}'.format(rtype=rtype)
-        (ret_code, _, _) = ssh.exec_command_sudo(cmd, timeout=120)
-        if int(ret_code) != 0:
-            raise RuntimeError('Failed to describe Kubernetes resource on '
-                               '{node}.'.format(node=node['host']))
+        nspace = '-n {nspace}'.format(nspace=nspace) if nspace else ''
+
+        cmd = 'kubectl describe {nspace} all'.format(nspace=nspace)
+        ssh.exec_command_sudo(cmd)
 
     @staticmethod
-    def describe_kubernetes_resource_on_all_duts(nodes, rtype='po,cm'):
-        """Describe Kubernetes resource on all DUTs.
+    def describe_kubernetes_resource_on_all_duts(nodes, nspace):
+        """Describe all Kubernetes PODs in namespace on all DUTs.
 
         :param nodes: Topology nodes.
-        :param rtype: Kubernetes resource type.
+        :param nspace: Kubernetes namespace.
         :type nodes: dict
-        :type rtype: str
+        :type nspace: str
         """
         for node in nodes.values():
             if node['type'] == NodeType.DUT:
                 KubernetesUtils.describe_kubernetes_resource_on_node(node,
-                                                                     rtype)
+                                                                     nspace)
 
     @staticmethod
-    def reset_kubernetes_on_node(node):
-        """Reset Kubernetes on node.
+    def get_kubernetes_logs_on_node(node, nspace):
+        """Get Kubernetes logs from all PODs in namespace on node.
 
         :param node: DUT node.
+        :param nspace: Kubernetes namespace.
         :type node: dict
-        :raises RuntimeError: If resetting Kubernetes failed.
+        :type nspace: str
         """
         ssh = SSH()
         ssh.connect(node)
 
-        cmd = 'kubeadm reset && rm -rf $HOME/.kube'
-        (ret_code, _, _) = ssh.exec_command_sudo(cmd, timeout=120)
-        if int(ret_code) != 0:
-            raise RuntimeError('Failed to reset Kubernetes on {node}.'
-                               .format(node=node['host']))
+        nspace = '-n {nspace}'.format(nspace=nspace) if nspace else ''
+
+        cmd = "for p in $(kubectl get pods {nspace} -o jsonpath="\
+            "'{{.items[*].metadata.name}}'); do echo $p; kubectl logs "\
+            "{nspace} $p; done".format(nspace=nspace)
+        ssh.exec_command(cmd)
 
     @staticmethod
-    def reset_kubernetes_on_all_duts(nodes):
-        """Reset Kubernetes on all DUTs.
+    def get_kubernetes_logs_on_all_duts(nodes, nspace):
+        """Get Kubernetes logs from all PODs in namespace on all DUTs.
 
         :param nodes: Topology nodes.
+        :param nspace: Kubernetes namespace.
         :type nodes: dict
+        :type nspace: str
         """
         for node in nodes.values():
             if node['type'] == NodeType.DUT:
-                KubernetesUtils.reset_kubernetes_on_node(node)
+                KubernetesUtils.get_kubernetes_logs_on_node(node, nspace)
 
     @staticmethod
-    def wait_for_kubernetes_pods_on_node(node):
-        """Wait for Kubernetes PODs to become in 'Running' state on node.
+    def wait_for_kubernetes_pods_on_node(node, nspace):
+        """Wait for Kubernetes PODs to become ready on node.
 
         :param node: DUT node.
+        :param nspace: Kubernetes namespace.
         :type node: dict
-        :raises RuntimeError: If Kubernetes PODs are not ready.
+        :type nspace: str
+        :raises RuntimeError: If Kubernetes PODs are not in Running state.
         """
         ssh = SSH()
         ssh.connect(node)
 
-        cmd = 'kubectl get -n csit pods --no-headers'
-        for _ in range(48):
-            (ret_code, stdout, _) = ssh.exec_command_sudo(cmd, timeout=120)
+        nspace = '-n {nspace}'.format(nspace=nspace) if nspace \
+            else '--all-namespaces'
+
+        cmd = 'kubectl get {nspace} pods -a --no-headers' \
+            .format(nspace=nspace)
+        for _ in range(MAX_RETRY):
+            (ret_code, stdout, _) = ssh.exec_command_sudo(cmd)
             if int(ret_code) == 0:
-                ready = True
+                ready = False
                 for line in stdout.splitlines():
-                    if 'Running' not in line:
+                    try:
+                        state = line.split()[1].split('/')
+                        ready = True if 'Running' in line and \
+                            state == state[::-1] else False
+                        if not ready:
+                            break
+                    except (ValueError, IndexError):
                         ready = False
                 if ready:
                     break
-            time.sleep(5)
+            sleep(5)
         else:
-            raise RuntimeError('Kubernetes PODs are not ready on {node}.'
+            raise RuntimeError('Kubernetes PODs are not running on {node}.'
                                .format(node=node['host']))
 
     @staticmethod
-    def wait_for_kubernetes_pods_on_all_duts(nodes):
-        """Wait for Kubernetes PODs to become in Running state on all DUTs.
+    def wait_for_kubernetes_pods_on_all_duts(nodes, nspace):
+        """Wait for Kubernetes to become ready on all DUTs.
+
+        :param nodes: Topology nodes.
+        :param nspace: Kubernetes namespace.
+        :type nodes: dict
+        :type nspace: str
+        """
+        for node in nodes.values():
+            if node['type'] == NodeType.DUT:
+                KubernetesUtils.wait_for_kubernetes_pods_on_node(node, nspace)
+
+    @staticmethod
+    def set_kubernetes_pods_affinity_on_node(node):
+        """Set affinity for all Kubernetes PODs except VPP on node.
+
+        :param node: DUT node.
+        :type node: dict
+        """
+        ssh = SSH()
+        ssh.connect(node)
+
+        cmd = '{dir}/{lib}/k8s_setup.sh affinity_non_vpp'\
+            .format(dir=Constants.REMOTE_FW_DIR,
+                    lib=Constants.RESOURCES_LIB_SH)
+        ssh.exec_command(cmd)
+
+    @staticmethod
+    def set_kubernetes_pods_affinity_on_all_duts(nodes):
+        """Set affinity for all Kubernetes PODs except VPP on all DUTs.
 
         :param nodes: Topology nodes.
         :type nodes: dict
         """
         for node in nodes.values():
             if node['type'] == NodeType.DUT:
-                KubernetesUtils.wait_for_kubernetes_pods_on_node(node)
+                KubernetesUtils.set_kubernetes_pods_affinity_on_node(node)
 
     @staticmethod
     def create_kubernetes_vswitch_startup_config(**kwargs):
@@ -350,20 +454,27 @@ class KubernetesUtils(object):
         :param kwargs: Key-value pairs used to create configuration.
         :param kwargs: dict
         """
+        skip_cnt = kwargs['cpu_skip'] + (kwargs['i'] - 1) * \
+            (kwargs['cpu_cnt'] - 1)
         cpuset_cpus = \
             CpuUtils.cpu_slice_of_list_per_node(node=kwargs['node'],
                                                 cpu_node=kwargs['cpu_node'],
-                                                skip_cnt=kwargs['cpu_skip'],
-                                                cpu_cnt=kwargs['cpu_cnt'],
+                                                skip_cnt=skip_cnt,
+                                                cpu_cnt=kwargs['cpu_cnt']-1,
+                                                smt_used=kwargs['smt_used'])
+        cpuset_main = \
+            CpuUtils.cpu_slice_of_list_per_node(node=kwargs['node'],
+                                                cpu_node=kwargs['cpu_node'],
+                                                skip_cnt=1,
+                                                cpu_cnt=1,
                                                 smt_used=kwargs['smt_used'])
-
         # Create config instance
         vpp_config = VppConfigGenerator()
         vpp_config.set_node(kwargs['node'])
         vpp_config.add_unix_cli_listen(value='0.0.0.0:5002')
         vpp_config.add_unix_nodaemon()
         # We will pop first core from list to be main core
-        vpp_config.add_cpu_main_core(str(cpuset_cpus.pop(0)))
+        vpp_config.add_cpu_main_core(str(cpuset_main.pop(0)))
         # if this is not only core in list, the rest will be used as workers.
         if cpuset_cpus:
             corelist_workers = ','.join(str(cpu) for cpu in cpuset_cpus)