X-Git-Url: https://gerrit.fd.io/r/gitweb?p=csit.git;a=blobdiff_plain;f=resources%2Flibraries%2Fpython%2FDUTSetup.py;h=16acfba7adbc7de8bc14bea4a4ddf4267e907438;hp=5e07ee0e7a01685f58530ab2f31c6f1c21ff92f7;hb=6b86c6fa1315f5c12c55bdd289b4e2af7d710c39;hpb=ca163ffc171954c6b23fc8a715b2b7ca4c47cccf diff --git a/resources/libraries/python/DUTSetup.py b/resources/libraries/python/DUTSetup.py index 5e07ee0e7a..16acfba7ad 100644 --- a/resources/libraries/python/DUTSetup.py +++ b/resources/libraries/python/DUTSetup.py @@ -1,4 +1,4 @@ -# Copyright (c) 2018 Cisco and/or its affiliates. +# Copyright (c) 2021 Cisco and/or its affiliates. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at: @@ -13,42 +13,40 @@ """DUT setup library.""" +from time import sleep from robot.api import logger +from resources.libraries.python.Constants import Constants +from resources.libraries.python.ssh import SSH, exec_cmd, exec_cmd_no_error from resources.libraries.python.topology import NodeType, Topology -from resources.libraries.python.ssh import SSH -from resources.libraries.python.constants import Constants -from resources.libraries.python.VatExecutor import VatExecutor -from resources.libraries.python.VPPUtil import VPPUtil -class DUTSetup(object): +class DUTSetup: """Contains methods for setting up DUTs.""" @staticmethod def get_service_logs(node, service): - """Get specific service unit logs by journalctl from node. + """Get specific service unit logs from node. :param node: Node in the topology. :param service: Service unit name. :type node: dict :type service: str """ - ssh = SSH() - ssh.connect(node) - ret_code, _, _ = \ - ssh.exec_command_sudo('journalctl --no-pager --unit={name} ' - '--since="$(echo `systemctl show -p ' - 'ActiveEnterTimestamp {name}` | ' - 'awk \'{{print $2 $3}}\')"'. - format(name=service)) - if int(ret_code) != 0: - raise RuntimeError('DUT {host} failed to get logs from unit {name}'. - format(host=node['host'], name=service)) + command = u"cat /tmp/*supervisor*.log"\ + if DUTSetup.running_in_container(node) \ + else f"journalctl --no-pager _SYSTEMD_INVOCATION_ID=$(systemctl " \ + f"show -p InvocationID --value {service})" + + message = f"Node {node[u'host']} failed to get logs from unit {service}" + + exec_cmd_no_error( + node, command, timeout=30, sudo=True, message=message + ) @staticmethod def get_service_logs_on_all_duts(nodes, service): - """Get specific service unit logs by journalctl from all DUTs. + """Get specific service unit logs from all DUTs. :param nodes: Nodes in the topology. :param service: Service unit name. @@ -56,170 +54,195 @@ class DUTSetup(object): :type service: str """ for node in nodes.values(): - if node['type'] == NodeType.DUT: + if node[u"type"] == NodeType.DUT: DUTSetup.get_service_logs(node, service) @staticmethod - def start_service(node, service): - """Start up the named service on node. + def restart_service(node, service): + """Restart the named service on node. :param node: Node in the topology. :param service: Service unit name. :type node: dict :type service: str """ - ssh = SSH() - ssh.connect(node) - # We are doing restart. With this we do not care if service - # was running or not. - ret_code, _, _ = \ - ssh.exec_command_sudo('service {name} restart'. - format(name=service), timeout=120) - if int(ret_code) != 0: - raise RuntimeError('DUT {host} failed to start service {name}'. - format(host=node['host'], name=service)) + command = f"supervisorctl restart {service}" \ + if DUTSetup.running_in_container(node) \ + else f"service {service} restart" + message = f"Node {node[u'host']} failed to restart service {service}" + + exec_cmd_no_error( + node, command, timeout=180, sudo=True, message=message + ) DUTSetup.get_service_logs(node, service) @staticmethod - def start_vpp_service_on_all_duts(nodes): - """Start up the VPP service on all nodes. + def restart_service_on_all_duts(nodes, service): + """Restart the named service on all DUTs. :param nodes: Nodes in the topology. + :param service: Service unit name. :type nodes: dict + :type service: str """ for node in nodes.values(): - if node['type'] == NodeType.DUT: - DUTSetup.start_service(node, Constants.VPP_UNIT) + if node[u"type"] == NodeType.DUT: + DUTSetup.restart_service(node, service) @staticmethod - def vpp_show_version_verbose(node): - """Run "show version verbose" CLI command. + def start_service(node, service): + """Start up the named service on node. - :param node: Node to run command on. + :param node: Node in the topology. + :param service: Service unit name. :type node: dict + :type service: str """ - vat = VatExecutor() - vat.execute_script("show_version_verbose.vat", node, json_out=False) + # TODO: change command to start once all parent function updated. + command = f"supervisorctl restart {service}" \ + if DUTSetup.running_in_container(node) \ + else f"service {service} restart" + message = f"Node {node[u'host']} failed to start service {service}" - try: - vat.script_should_have_passed() - except AssertionError: - raise RuntimeError('Failed to get VPP version on host: {name}'. - format(name=node['host'])) + exec_cmd_no_error( + node, command, timeout=180, sudo=True, message=message + ) + + DUTSetup.get_service_logs(node, service) @staticmethod - def show_vpp_version_on_all_duts(nodes): - """Show VPP version verbose on all DUTs. + def start_service_on_all_duts(nodes, service): + """Start up the named service on all DUTs. - :param nodes: VPP nodes + :param nodes: Nodes in the topology. + :param service: Service unit name. :type nodes: dict + :type service: str """ for node in nodes.values(): - if node['type'] == NodeType.DUT: - DUTSetup.vpp_show_version_verbose(node) + if node[u"type"] == NodeType.DUT: + DUTSetup.start_service(node, service) @staticmethod - def vpp_show_interfaces(node): - """Run "show interface" CLI command. + def stop_service(node, service): + """Stop the named service on node. - :param node: Node to run command on. + :param node: Node in the topology. + :param service: Service unit name. :type node: dict + :type service: str """ - vat = VatExecutor() - vat.execute_script("show_interface.vat", node, json_out=False) + DUTSetup.get_service_logs(node, service) - try: - vat.script_should_have_passed() - except AssertionError: - raise RuntimeError('Failed to get VPP interfaces on host: {name}'. - format(name=node['host'])) + command = f"supervisorctl stop {service}" \ + if DUTSetup.running_in_container(node) \ + else f"service {service} stop" + message = f"Node {node[u'host']} failed to stop service {service}" + + exec_cmd_no_error( + node, command, timeout=180, sudo=True, message=message + ) @staticmethod - def vpp_api_trace_save(node): - """Run "api trace save" CLI command. + def stop_service_on_all_duts(nodes, service): + """Stop the named service on all DUTs. - :param node: Node to run command on. - :type node: dict + :param nodes: Nodes in the topology. + :param service: Service unit name. + :type nodes: dict + :type service: str """ - vat = VatExecutor() - vat.execute_script("api_trace_save.vat", node, json_out=False) + for node in nodes.values(): + if node[u"type"] == NodeType.DUT: + DUTSetup.stop_service(node, service) @staticmethod - def vpp_api_trace_dump(node): - """Run "api trace custom-dump" CLI command. + def kill_program(node, program, namespace=None): + """Kill program on the specified topology node. - :param node: Node to run command on. + :param node: Topology node. + :param program: Program name. + :param namespace: Namespace program is running in. :type node: dict + :type program: str + :type namespace: str """ - vat = VatExecutor() - vat.execute_script("api_trace_dump.vat", node, json_out=False) - - @staticmethod - def setup_all_duts(nodes): - """Prepare all DUTs in given topology for test execution.""" - for node in nodes.values(): - if node['type'] == NodeType.DUT: - DUTSetup.setup_dut(node) + host = node[u"host"] + cmd_timeout = 5 + if namespace in (None, u"default"): + shell_cmd = u"sh -c" + else: + shell_cmd = f"ip netns exec {namespace} sh -c" + + pgrep_cmd = f"{shell_cmd} \'pgrep -c {program}\'" + _, stdout, _ = exec_cmd(node, pgrep_cmd, timeout=cmd_timeout, + sudo=True) + if int(stdout) == 0: + logger.trace(f"{program} is not running on {host}") + return + exec_cmd(node, f"{shell_cmd} \'pkill {program}\'", + timeout=cmd_timeout, sudo=True) + for attempt in range(5): + _, stdout, _ = exec_cmd(node, pgrep_cmd, timeout=cmd_timeout, + sudo=True) + if int(stdout) == 0: + logger.trace(f"Attempt {attempt}: {program} is dead on {host}") + return + sleep(1) + logger.trace(f"SIGKILLing {program} on {host}") + exec_cmd(node, f"{shell_cmd} \'pkill -9 {program}\'", + timeout=cmd_timeout, sudo=True) @staticmethod - def setup_dut(node): - """Run script over SSH to setup the DUT node. + def verify_program_installed(node, program): + """Verify that program is installed on the specified topology node. - :param node: DUT node to set up. + :param node: Topology node. + :param program: Program name. :type node: dict - - :raises Exception: If the DUT setup fails. + :type program: str """ - ssh = SSH() - ssh.connect(node) - - ret_code, _, _ = \ - ssh.exec_command('sudo -Sn bash {0}/{1}/dut_setup.sh'. - format(Constants.REMOTE_FW_DIR, - Constants.RESOURCES_LIB_SH), timeout=120) - if int(ret_code) != 0: - raise RuntimeError('DUT test setup script failed at node {name}'. - format(name=node['host'])) + cmd = f"command -v {program}" + exec_cmd_no_error(node, cmd, message=f"{program} is not installed") @staticmethod - def get_vpp_pid(node): - """Get PID of running VPP process. + def get_pid(node, process): + """Get PID of running process. :param node: DUT node. + :param process: process name. :type node: dict + :type process: str :returns: PID :rtype: int :raises RuntimeError: If it is not possible to get the PID. """ - ssh = SSH() ssh.connect(node) + retval = None for i in range(3): - logger.trace('Try {}: Get VPP PID'.format(i)) - ret_code, stdout, stderr = ssh.exec_command('pidof vpp') - - if int(ret_code) != 0: - raise RuntimeError('Not possible to get PID of VPP process ' - 'on node: {0}\n {1}'. - format(node['host'], stdout + stderr)) - - if len(stdout.splitlines()) == 1: - return int(stdout) - elif len(stdout.splitlines()) == 0: - logger.debug("No VPP PID found on node {0}". - format(node['host'])) + logger.trace(f"Try {i}: Get {process} PID") + ret_code, stdout, stderr = ssh.exec_command(f"pidof {process}") + + if int(ret_code): + raise RuntimeError( + f"Not possible to get PID of {process} process on node: " + f"{node[u'host']}\n {stdout + stderr}" + ) + + pid_list = stdout.split() + if len(pid_list) == 1: + return [int(stdout)] + if not pid_list: + logger.debug(f"No {process} PID found on node {node[u'host']}") continue - else: - logger.debug("More then one VPP PID found on node {0}". - format(node['host'])) - ret_list = list() - for line in stdout.splitlines(): - ret_list.append(int(line)) - return ret_list + logger.debug(f"More than one {process} PID found " \ + f"on node {node[u'host']}") + retval = [int(pid) for pid in pid_list] - return None + return retval @staticmethod def get_vpp_pids(nodes): @@ -230,107 +253,176 @@ class DUTSetup(object): :returns: PIDs :rtype: dict """ - pids = dict() for node in nodes.values(): - if node['type'] == NodeType.DUT: - pids[node['host']] = DUTSetup.get_vpp_pid(node) + if node[u"type"] == NodeType.DUT: + pids[node[u"host"]] = DUTSetup.get_pid(node, u"vpp") return pids @staticmethod - def vpp_show_crypto_device_mapping(node): - """Run "show crypto device mapping" CLI command. - - :param node: Node to run command on. - :type node: dict - """ - vat = VatExecutor() - vat.execute_script("show_crypto_device_mapping.vat", node, - json_out=False) - - @staticmethod - def crypto_device_verify(node, force_init=False, numvfs=32): + def crypto_device_verify(node, crypto_type, numvfs, force_init=False): """Verify if Crypto QAT device virtual functions are initialized on all DUTs. If parameter force initialization is set to True, then try to - initialize or disable QAT. + initialize or remove VFs on QAT. :param node: DUT node. - :param force_init: If True then try to initialize to specific value. + :crypto_type: Crypto device type - HW_DH895xcc or HW_C3xxx. :param numvfs: Number of VFs to initialize, 0 - disable the VFs. + :param force_init: If True then try to initialize to specific value. :type node: dict - :type force_init: bool + :type crypto_type: string :type numvfs: int + :type force_init: bool :returns: nothing - :raises RuntimeError: If QAT is not initialized or failed to initialize. + :raises RuntimeError: If QAT VFs are not created and force init is set + to False. """ + pci_addr = Topology.get_cryptodev(node) + sriov_numvfs = DUTSetup.get_sriov_numvfs(node, pci_addr) - ssh = SSH() - ssh.connect(node) - - cryptodev = Topology.get_cryptodev(node) - cmd = 'cat /sys/bus/pci/devices/{0}/sriov_numvfs'.\ - format(cryptodev.replace(':', r'\:')) + if sriov_numvfs != numvfs: + if force_init: + # QAT is not initialized and we want to initialize with numvfs + DUTSetup.crypto_device_init(node, crypto_type, numvfs) + else: + raise RuntimeError( + f"QAT device failed to create VFs on {node[u'host']}" + ) - # Try to read number of VFs from PCI address of QAT device - for _ in range(3): - ret_code, stdout, _ = ssh.exec_command(cmd) - if int(ret_code) == 0: - try: - sriov_numvfs = int(stdout) - except ValueError: - logger.trace('Reading sriov_numvfs info failed on {0}'. - format(node['host'])) - else: - if sriov_numvfs != numvfs: - if force_init: - # QAT is not initialized and we want to initialize - # with numvfs - DUTSetup.crypto_device_init(node, numvfs) - else: - raise RuntimeError('QAT device {0} is not ' - 'initialized to {1} on host {2}' - .format(cryptodev, numvfs, - node['host'])) - break - - @staticmethod - def crypto_device_init(node, numvfs): + @staticmethod + def crypto_device_init(node, crypto_type, numvfs): """Init Crypto QAT device virtual functions on DUT. :param node: DUT node. + :crypto_type: Crypto device type - HW_DH895xcc or HW_C3xxx. :param numvfs: Number of VFs to initialize, 0 - disable the VFs. :type node: dict + :type crypto_type: string :type numvfs: int :returns: nothing :raises RuntimeError: If failed to stop VPP or QAT failed to initialize. """ - cryptodev = Topology.get_cryptodev(node) + if crypto_type == u"HW_DH895xcc": + kernel_mod = u"qat_dh895xcc" + kernel_drv = u"dh895xcc" + elif crypto_type == u"HW_C3xxx": + kernel_mod = u"qat_c3xxx" + kernel_drv = u"c3xxx" + else: + raise RuntimeError( + f"Unsupported crypto device type on {node[u'host']}" + ) - # QAT device must be re-bound to kernel driver before initialization - driver = 'dh895xcc' - kernel_module = 'qat_dh895xcc' - current_driver = DUTSetup.get_pci_dev_driver( - node, cryptodev.replace(':', r'\:')) + pci_addr = Topology.get_cryptodev(node) + + # QAT device must be re-bound to kernel driver before initialization. + DUTSetup.verify_kernel_module(node, kernel_mod, force_load=True) - DUTSetup.kernel_module_verify(node, kernel_module, force_load=True) + # Stop VPP to prevent deadlock. + DUTSetup.stop_service(node, Constants.VPP_UNIT) - VPPUtil.stop_vpp_service(node) + current_driver = DUTSetup.get_pci_dev_driver( + node, pci_addr.replace(u":", r"\:") + ) if current_driver is not None: - DUTSetup.pci_driver_unbind(node, cryptodev) - DUTSetup.pci_driver_bind(node, cryptodev, driver) + DUTSetup.pci_driver_unbind(node, pci_addr) - ssh = SSH() - ssh.connect(node) + # Bind to kernel driver. + DUTSetup.pci_driver_bind(node, pci_addr, kernel_drv) - # Initialize QAT VFs + # Initialize QAT VFs. if numvfs > 0: - cmd = 'echo "{0}" | tee /sys/bus/pci/devices/{1}/sriov_numvfs'.\ - format(numvfs, cryptodev.replace(':', r'\:'), timeout=180) - ret_code, _, _ = ssh.exec_command_sudo("sh -c '{0}'".format(cmd)) + DUTSetup.set_sriov_numvfs(node, pci_addr, numvfs) + + @staticmethod + def get_virtfn_pci_addr(node, pf_pci_addr, vf_id): + """Get PCI address of Virtual Function. + + :param node: DUT node. + :param pf_pci_addr: Physical Function PCI address. + :param vf_id: Virtual Function number. + :type node: dict + :type pf_pci_addr: str + :type vf_id: int + :returns: Virtual Function PCI address. + :rtype: str + :raises RuntimeError: If failed to get Virtual Function PCI address. + """ + command = f"sh -c \"basename $(readlink " \ + f"/sys/bus/pci/devices/{pf_pci_addr}/virtfn{vf_id})\"" + message = u"Failed to get virtual function PCI address." - if int(ret_code) != 0: - raise RuntimeError('Failed to initialize {0} VFs on QAT device ' - ' on host {1}'.format(numvfs, node['host'])) + stdout, _ = exec_cmd_no_error( + node, command, timeout=30, sudo=True, message=message + ) + + return stdout.strip() + + @staticmethod + def get_sriov_numvfs(node, pf_pci_addr): + """Get number of SR-IOV VFs. + + :param node: DUT node. + :param pf_pci_addr: Physical Function PCI device address. + :type node: dict + :type pf_pci_addr: str + :returns: Number of VFs. + :rtype: int + :raises RuntimeError: If PCI device is not SR-IOV capable. + """ + pci = pf_pci_addr.replace(u":", r"\:") + command = f"cat /sys/bus/pci/devices/{pci}/sriov_numvfs" + message = f"PCI device {pf_pci_addr} is not a SR-IOV device." + + for _ in range(3): + stdout, _ = exec_cmd_no_error( + node, command, timeout=30, sudo=True, message=message + ) + try: + sriov_numvfs = int(stdout) + except ValueError: + logger.trace( + f"Reading sriov_numvfs info failed on {node[u'host']}" + ) + else: + return sriov_numvfs + + @staticmethod + def set_sriov_numvfs(node, pf_pci_addr, numvfs=0): + """Init or reset SR-IOV virtual functions by setting its number on PCI + device on DUT. Setting to zero removes all VFs. + + :param node: DUT node. + :param pf_pci_addr: Physical Function PCI device address. + :param numvfs: Number of VFs to initialize, 0 - removes the VFs. + :type node: dict + :type pf_pci_addr: str + :type numvfs: int + :raises RuntimeError: Failed to create VFs on PCI. + """ + cmd = f"test -f /sys/bus/pci/devices/{pf_pci_addr}/sriov_numvfs" + sriov_unsupported, _, _ = exec_cmd(node, cmd) + # if sriov_numvfs doesn't exist, then sriov_unsupported != 0 + if int(sriov_unsupported): + if numvfs == 0: + # sriov is not supported and we want 0 VFs + # no need to do anything + return + + raise RuntimeError( + f"Can't configure {numvfs} VFs on {pf_pci_addr} device " + f"on {node[u'host']} since it doesn't support SR-IOV." + ) + + pci = pf_pci_addr.replace(u":", r"\:") + command = f"sh -c \"echo {numvfs} | " \ + f"tee /sys/bus/pci/devices/{pci}/sriov_numvfs\"" + message = f"Failed to create {numvfs} VFs on {pf_pci_addr} device " \ + f"on {node[u'host']}" + + exec_cmd_no_error( + node, command, timeout=120, sudo=True, message=message + ) @staticmethod def pci_driver_unbind(node, pci_addr): @@ -340,20 +432,28 @@ class DUTSetup(object): :param pci_addr: PCI device address. :type node: dict :type pci_addr: str - :returns: nothing :raises RuntimeError: If PCI device unbind failed. """ + pci = pci_addr.replace(u":", r"\:") + command = f"sh -c \"echo {pci_addr} | " \ + f"tee /sys/bus/pci/devices/{pci}/driver/unbind\"" + message = f"Failed to unbind PCI device {pci_addr} on {node[u'host']}" - ssh = SSH() - ssh.connect(node) + exec_cmd_no_error( + node, command, timeout=120, sudo=True, message=message + ) - ret_code, _, _ = ssh.exec_command_sudo( - "sh -c 'echo {0} | tee /sys/bus/pci/devices/{1}/driver/unbind'" - .format(pci_addr, pci_addr.replace(':', r'\:')), timeout=180) + @staticmethod + def pci_driver_unbind_list(node, *pci_addrs): + """Unbind PCI devices from current driver on node. - if int(ret_code) != 0: - raise RuntimeError('Failed to unbind PCI device {0} from driver on ' - 'host {1}'.format(pci_addr, node['host'])) + :param node: DUT node. + :param pci_addrs: PCI device addresses. + :type node: dict + :type pci_addrs: list + """ + for pci_addr in pci_addrs: + DUTSetup.pci_driver_unbind(node, pci_addr) @staticmethod def pci_driver_bind(node, pci_addr, driver): @@ -365,254 +465,376 @@ class DUTSetup(object): :type node: dict :type pci_addr: str :type driver: str - :returns: nothing :raises RuntimeError: If PCI device bind failed. """ + message = f"Failed to bind PCI device {pci_addr} to {driver} " \ + f"on host {node[u'host']}" + pci = pci_addr.replace(u":", r"\:") + command = f"sh -c \"echo {driver} | " \ + f"tee /sys/bus/pci/devices/{pci}/driver_override\"" - ssh = SSH() - ssh.connect(node) + exec_cmd_no_error( + node, command, timeout=120, sudo=True, message=message + ) + + command = f"sh -c \"echo {pci_addr} | " \ + f"tee /sys/bus/pci/drivers/{driver}/bind\"" + + exec_cmd_no_error( + node, command, timeout=120, sudo=True, message=message + ) + + command = f"sh -c \"echo | " \ + f"tee /sys/bus/pci/devices/{pci}/driver_override\"" + + exec_cmd_no_error( + node, command, timeout=120, sudo=True, message=message + ) + + @staticmethod + def pci_vf_driver_unbind(node, pf_pci_addr, vf_id): + """Unbind Virtual Function from driver on node. + + :param node: DUT node. + :param pf_pci_addr: PCI device address. + :param vf_id: Virtual Function ID. + :type node: dict + :type pf_pci_addr: str + :type vf_id: int + :raises RuntimeError: If Virtual Function unbind failed. + """ + vf_pci_addr = DUTSetup.get_virtfn_pci_addr(node, pf_pci_addr, vf_id) + pf_pci = pf_pci_addr.replace(u":", r"\:") + vf_path = f"/sys/bus/pci/devices/{pf_pci}/virtfn{vf_id}" - ret_code, _, _ = ssh.exec_command_sudo( - "sh -c 'echo {0} | tee /sys/bus/pci/drivers/{1}/bind'".format( - pci_addr, driver), timeout=180) + command = f"sh -c \"echo {vf_pci_addr} | tee {vf_path}/driver/unbind\"" + message = f"Failed to unbind VF {vf_pci_addr} on {node[u'host']}" - if int(ret_code) != 0: - raise RuntimeError('Failed to bind PCI device {0} to {1} driver on ' - 'host {2}'.format(pci_addr, driver, - node['host'])) + exec_cmd_no_error( + node, command, timeout=120, sudo=True, message=message + ) + + @staticmethod + def pci_vf_driver_bind(node, pf_pci_addr, vf_id, driver): + """Bind Virtual Function to driver on node. + + :param node: DUT node. + :param pf_pci_addr: PCI device address. + :param vf_id: Virtual Function ID. + :param driver: Driver to bind. + :type node: dict + :type pf_pci_addr: str + :type vf_id: int + :type driver: str + :raises RuntimeError: If PCI device bind failed. + """ + vf_pci_addr = DUTSetup.get_virtfn_pci_addr(node, pf_pci_addr, vf_id) + pf_pci = pf_pci_addr.replace(u":", r'\:') + vf_path = f"/sys/bus/pci/devices/{pf_pci}/virtfn{vf_id}" + + message = f"Failed to bind VF {vf_pci_addr} to {driver} " \ + f"on {node[u'host']}" + command = f"sh -c \"echo {driver} | tee {vf_path}/driver_override\"" + + exec_cmd_no_error( + node, command, timeout=120, sudo=True, message=message + ) + + command = f"sh -c \"echo {vf_pci_addr} | " \ + f"tee /sys/bus/pci/drivers/{driver}/bind\"" + + exec_cmd_no_error( + node, command, timeout=120, sudo=True, message=message + ) + + command = f"sh -c \"echo | tee {vf_path}/driver_override\"" + + exec_cmd_no_error( + node, command, timeout=120, sudo=True, message=message + ) @staticmethod def get_pci_dev_driver(node, pci_addr): """Get current PCI device driver on node. - .. note:: - # lspci -vmmks 0000:00:05.0 - Slot: 00:05.0 - Class: Ethernet controller - Vendor: Red Hat, Inc - Device: Virtio network device - SVendor: Red Hat, Inc - SDevice: Device 0001 - PhySlot: 5 - Driver: virtio-pci - :param node: DUT node. :param pci_addr: PCI device address. :type node: dict :type pci_addr: str :returns: Driver or None - :raises RuntimeError: If PCI rescan or lspci command execution failed. :raises RuntimeError: If it is not possible to get the interface driver information from the node. """ - ssh = SSH() - ssh.connect(node) + driver_path = f"/sys/bus/pci/devices/{pci_addr}/driver" + cmd = f"test -d {driver_path}" + ret_code, ret_val, _ = exec_cmd(node, cmd) + if int(ret_code): + # the directory doesn't exist which means the device is not bound + # to any driver + return None + cmd = f"basename $(readlink -f {driver_path})" + ret_val, _ = exec_cmd_no_error(node, cmd) + return ret_val.strip() - for i in range(3): - logger.trace('Try number {0}: Get PCI device driver'.format(i)) - cmd = 'lspci -vmmks {0}'.format(pci_addr) - ret_code, stdout, _ = ssh.exec_command(cmd) - if int(ret_code) != 0: - raise RuntimeError("'{0}' failed on '{1}'" - .format(cmd, node['host'])) - - for line in stdout.splitlines(): - if len(line) == 0: - continue - name = None - value = None - try: - name, value = line.split("\t", 1) - except ValueError: - if name == "Driver:": - return None - if name == 'Driver:': - return value - - if i < 2: - logger.trace('Driver for PCI device {} not found, executing ' - 'pci rescan and retrying'.format(pci_addr)) - cmd = 'sh -c "echo 1 > /sys/bus/pci/rescan"' - ret_code, _, _ = ssh.exec_command_sudo(cmd) - if int(ret_code) != 0: - raise RuntimeError("'{0}' failed on '{1}'" - .format(cmd, node['host'])) - - return None - - @staticmethod - def kernel_module_verify(node, module, force_load=False): - """Verify if kernel module is loaded on all DUTs. If parameter force + @staticmethod + def verify_kernel_module(node, module, force_load=False): + """Verify if kernel module is loaded on node. If parameter force load is set to True, then try to load the modules. - :param node: DUT node. + :param node: Node. :param module: Module to verify. :param force_load: If True then try to load module. :type node: dict :type module: str :type force_load: bool - :returns: nothing :raises RuntimeError: If module is not loaded or failed to load. """ + command = f"grep -w {module} /proc/modules" + message = f"Kernel module {module} is not loaded " \ + f"on host {node[u'host']}" - ssh = SSH() - ssh.connect(node) - - cmd = 'grep -w {0} /proc/modules'.format(module) - ret_code, _, _ = ssh.exec_command(cmd) - - if int(ret_code) != 0: + try: + exec_cmd_no_error( + node, command, timeout=30, sudo=False, message=message + ) + except RuntimeError: if force_load: # Module is not loaded and we want to load it - DUTSetup.kernel_module_load(node, module) + DUTSetup.load_kernel_module(node, module) else: - raise RuntimeError('Kernel module {0} is not loaded on host ' - '{1}'.format(module, node['host'])) + raise @staticmethod - def kernel_module_load(node, module): - """Load kernel module on node. + def verify_kernel_module_on_all_duts(nodes, module, force_load=False): + """Verify if kernel module is loaded on all DUTs. If parameter force + load is set to True, then try to load the modules. - :param node: DUT node. - :param module: Module to load. - :type node: dict + :param nodes: DUT nodes. + :param module: Module to verify. + :param force_load: If True then try to load module. + :type nodes: dict :type module: str - :returns: nothing - :raises RuntimeError: If loading failed. + :type force_load: bool """ - - ssh = SSH() - ssh.connect(node) - - ret_code, _, _ = ssh.exec_command_sudo("modprobe {0}".format(module)) - - if int(ret_code) != 0: - raise RuntimeError('Failed to load {0} kernel module on host {1}'. - format(module, node['host'])) + for node in nodes.values(): + if node[u"type"] == NodeType.DUT: + DUTSetup.verify_kernel_module(node, module, force_load) @staticmethod - def vpp_enable_traces_on_all_duts(nodes): - """Enable vpp packet traces on all DUTs in the given topology. + def verify_uio_driver_on_all_duts(nodes): + """Verify if uio driver kernel module is loaded on all DUTs. If module + is not present it will try to load it. - :param nodes: Nodes in the topology. + :param nodes: DUT nodes. :type nodes: dict """ for node in nodes.values(): - if node['type'] == NodeType.DUT: - DUTSetup.vpp_enable_traces_on_dut(node) + if node[u"type"] == NodeType.DUT: + uio_driver = Topology.get_uio_driver(node) + DUTSetup.verify_kernel_module(node, uio_driver, force_load=True) @staticmethod - def vpp_enable_traces_on_dut(node): - """Enable vpp packet traces on the DUT node. + def load_kernel_module(node, module): + """Load kernel module on node. - :param node: DUT node to set up. + :param node: DUT node. + :param module: Module to load. :type node: dict + :type module: str + :returns: nothing + :raises RuntimeError: If loading failed. """ + command = f"modprobe {module}" + message = f"Failed to load {module} on host {node[u'host']}" - vat = VatExecutor() - vat.execute_script("enable_dpdk_traces.vat", node, json_out=False) - vat.execute_script("enable_vhost_user_traces.vat", node, json_out=False) - vat.execute_script("enable_memif_traces.vat", node, json_out=False) + exec_cmd_no_error(node, command, timeout=30, sudo=True, message=message) @staticmethod - def install_vpp_on_all_duts(nodes, vpp_pkg_dir, vpp_rpm_pkgs, vpp_deb_pkgs): - """Install VPP on all DUT nodes. + def install_vpp_on_all_duts(nodes, vpp_pkg_dir): + """Install VPP on all DUT nodes. Start the VPP service in case of + systemd is not available or does not support autostart. :param nodes: Nodes in the topology. :param vpp_pkg_dir: Path to directory where VPP packages are stored. - :param vpp_rpm_pkgs: List of VPP rpm packages to be installed. - :param vpp_deb_pkgs: List of VPP deb packages to be installed. :type nodes: dict :type vpp_pkg_dir: str - :type vpp_rpm_pkgs: list - :type vpp_deb_pkgs: list :raises RuntimeError: If failed to remove or install VPP. """ - - logger.debug("Installing VPP") - for node in nodes.values(): - if node['type'] == NodeType.DUT: - logger.debug("Installing VPP on node {0}".format(node['host'])) - - ssh = SSH() - ssh.connect(node) - - cmd = "[[ -f /etc/redhat-release ]]" - return_code, _, _ = ssh.exec_command(cmd) - if int(return_code) == 0: - # workaroud - uninstall existing vpp installation until - # start-testcase script is updated on all virl servers - rpm_pkgs_remove = "vpp*" - cmd_u = 'yum -y remove "{0}"'.format(rpm_pkgs_remove) - r_rcode, _, r_err = ssh.exec_command_sudo(cmd_u, timeout=90) - if int(r_rcode) != 0: - raise RuntimeError('Failed to remove previous VPP' - 'installation on host {0}:\n{1}' - .format(node['host'], r_err)) - - rpm_pkgs = "*.rpm ".join(str(vpp_pkg_dir + pkg) - for pkg in vpp_rpm_pkgs) + "*.rpm" - cmd_i = "rpm -ivh {0}".format(rpm_pkgs) - ret_code, _, err = ssh.exec_command_sudo(cmd_i, timeout=90) - if int(ret_code) != 0: - raise RuntimeError('Failed to install VPP on host {0}:' - '\n{1}'.format(node['host'], err)) - else: - ssh.exec_command_sudo("rpm -qai vpp*") - logger.info("VPP installed on node {0}". - format(node['host'])) + message = f"Failed to install VPP on host {node[u'host']}!" + if node[u"type"] == NodeType.DUT: + command = u"ln -s /dev/null /etc/sysctl.d/80-vpp.conf || true" + exec_cmd_no_error(node, command, sudo=True) + + command = u". /etc/lsb-release; echo \"${DISTRIB_ID}\"" + stdout, _ = exec_cmd_no_error(node, command) + + if stdout.strip() == u"Ubuntu": + exec_cmd_no_error( + node, u"apt-get purge -y '*vpp*' || true", + timeout=120, sudo=True + ) + # workaround to avoid installation of vpp-api-python + exec_cmd_no_error( + node, f"rm -f {vpp_pkg_dir}vpp-api-python.deb", + timeout=120, sudo=True + ) + exec_cmd_no_error( + node, f"dpkg -i --force-all {vpp_pkg_dir}*.deb", + timeout=120, sudo=True, message=message + ) + exec_cmd_no_error(node, u"dpkg -l | grep vpp", sudo=True) + if DUTSetup.running_in_container(node): + DUTSetup.restart_service(node, Constants.VPP_UNIT) else: - # workaroud - uninstall existing vpp installation until - # start-testcase script is updated on all virl servers - deb_pkgs_remove = "vpp*" - cmd_u = 'apt-get purge -y "{0}"'.format(deb_pkgs_remove) - r_rcode, _, r_err = ssh.exec_command_sudo(cmd_u, timeout=90) - if int(r_rcode) != 0: - raise RuntimeError('Failed to remove previous VPP' - 'installation on host {0}:\n{1}' - .format(node['host'], r_err)) - deb_pkgs = "*.deb ".join(str(vpp_pkg_dir + pkg) - for pkg in vpp_deb_pkgs) + "*.deb" - cmd_i = "dpkg -i --force-all {0}".format(deb_pkgs) - ret_code, _, err = ssh.exec_command_sudo(cmd_i, timeout=90) - if int(ret_code) != 0: - raise RuntimeError('Failed to install VPP on host {0}:' - '\n{1}'.format(node['host'], err)) - else: - ssh.exec_command_sudo("dpkg -l | grep vpp") - logger.info("VPP installed on node {0}". - format(node['host'])) - - ssh.disconnect(node) - - @staticmethod - def verify_vpp_on_all_duts(nodes): - """Verify that VPP is installed on all DUT nodes. + exec_cmd_no_error( + node, u"yum -y remove '*vpp*' || true", + timeout=120, sudo=True + ) + # workaround to avoid installation of vpp-api-python + exec_cmd_no_error( + node, f"rm -f {vpp_pkg_dir}vpp-api-python.rpm", + timeout=120, sudo=True + ) + exec_cmd_no_error( + node, f"rpm -ivh {vpp_pkg_dir}*.rpm", + timeout=120, sudo=True, message=message + ) + exec_cmd_no_error(node, u"rpm -qai '*vpp*'", sudo=True) + DUTSetup.restart_service(node, Constants.VPP_UNIT) - :param nodes: Nodes in the topology. - :type nodes: dict + @staticmethod + def running_in_container(node): + """This method tests if topology node is running inside container. + + :param node: Topology node. + :type node: dict + :returns: True if running in docker container, false if not or failed + to detect. + :rtype: bool """ + command = u"fgrep docker /proc/1/cgroup" + message = u"Failed to get cgroup settings." + try: + exec_cmd_no_error( + node, command, timeout=30, sudo=False, message=message + ) + except RuntimeError: + return False + return True - logger.debug("Verify VPP on all DUTs") + @staticmethod + def get_docker_mergeddir(node, uuid): + """Get Docker overlay for MergedDir diff. - DUTSetup.start_vpp_service_on_all_duts(nodes) + :param node: DUT node. + :param uuid: Docker UUID. + :type node: dict + :type uuid: str + :returns: Docker container MergedDir. + :rtype: str + :raises RuntimeError: If getting output failed. + """ + command = f"docker inspect " \ + f"--format='{{{{.GraphDriver.Data.MergedDir}}}}' {uuid}" + message = f"Failed to get directory of {uuid} on host {node[u'host']}" - for node in nodes.values(): - if node['type'] == NodeType.DUT: - DUTSetup.verify_vpp_on_dut(node) + stdout, _ = exec_cmd_no_error(node, command, sudo=True, message=message) + return stdout.strip() @staticmethod - def verify_vpp_on_dut(node): - """Verify that VPP is installed on DUT node. + def get_hugepages_info(node, hugesize=None): + """Get number of huge pages in system. - :param node: DUT node. + :param node: Node in the topology. + :param hugesize: Size of hugepages. Default system huge size if None. :type node: dict - :raises RuntimeError: If failed to restart VPP, get VPP version - or get VPP interfaces. + :type hugesize: int + :returns: Number of huge pages in system. + :rtype: dict + :raises RuntimeError: If reading failed. """ + if not hugesize: + hugesize = "$(grep Hugepagesize /proc/meminfo | awk '{ print $2 }')" + command = f"cat /sys/kernel/mm/hugepages/hugepages-{hugesize}kB/*" + stdout, _ = exec_cmd_no_error(node, command) + try: + line = stdout.splitlines() + return { + "free_hugepages": int(line[0]), + "nr_hugepages": int(line[1]), + "nr_hugepages_mempolicy": int(line[2]), + "nr_overcommit_hugepages": int(line[3]), + "resv_hugepages": int(line[4]), + "surplus_hugepages": int(line[5]) + } + except ValueError: + logger.trace(u"Reading huge pages information failed!") - logger.debug("Verify VPP on node {0}".format(node['host'])) + @staticmethod + def check_huge_page( + node, huge_mnt, mem_size, hugesize=2048, allocate=False): + """Check if there is enough HugePages in system. If allocate is set to + true, try to allocate more HugePages. - DUTSetup.vpp_show_version_verbose(node) - DUTSetup.vpp_show_interfaces(node) + :param node: Node in the topology. + :param huge_mnt: HugePage mount point. + :param mem_size: Reqeusted memory in MB. + :param hugesize: HugePage size in KB. + :param allocate: Whether to allocate more memory if not enough. + :type node: dict + :type huge_mnt: str + :type mem_size: int + :type hugesize: int + :type allocate: bool + :raises RuntimeError: Mounting hugetlbfs failed or not enough HugePages + or increasing map count failed. + """ + # Get huge pages information. + hugepages = DUTSetup.get_hugepages_info(node, hugesize=hugesize) + + # Check if hugepages requested are available on node. + if hugepages[u"nr_overcommit_hugepages"]: + # If overcommit is used, we need to know how many additional pages + # we can allocate + huge_available = hugepages[u"nr_overcommit_hugepages"] - \ + hugepages[u"surplus_hugepages"] + else: + # Fallbacking to free_hugepages which were used before to detect. + huge_available = hugepages[u"free_hugepages"] + + if ((mem_size * 1024) // hugesize) > huge_available: + # If we want to allocate hugepage dynamically. + if allocate: + huge_needed = ((mem_size * 1024) // hugesize) - huge_available + huge_to_allocate = huge_needed + hugepages[u"nr_hugepages"] + max_map_count = huge_to_allocate * 4 + # Check if huge pages mount point exist. + try: + exec_cmd_no_error(node, u"fgrep 'hugetlbfs' /proc/mounts") + except RuntimeError: + exec_cmd_no_error(node, f"mkdir -p {huge_mnt}", sudo=True) + exec_cmd_no_error( + node, + f"mount -t hugetlbfs -o pagesize={hugesize}k none " + f"{huge_mnt}", + sudo=True) + # Increase maximum number of memory map areas for process. + exec_cmd_no_error( + node, + f"echo \"{max_map_count}\" | " + f"sudo tee /proc/sys/vm/max_map_count", + message=f"Increase map count failed on {node[u'host']}!" + ) + # Increase hugepage count. + exec_cmd_no_error( + node, + f"echo \"{huge_to_allocate}\" | " + f"sudo tee /proc/sys/vm/nr_hugepages", + message=f"Mount huge pages failed on {node[u'host']}!" + ) + # If we do not want to allocate dynamically end with error. + else: + raise RuntimeError( + f"Not enough availablehuge pages: {huge_available}!" + )