-# Copyright (c) 2019 Cisco and/or its affiliates.
+# Copyright (c) 2021 Cisco and/or its affiliates.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at:
"""DUT setup library."""
+from time import sleep
from robot.api import logger
from resources.libraries.python.Constants import Constants
-from resources.libraries.python.ssh import SSH, exec_cmd_no_error
+from resources.libraries.python.ssh import SSH, exec_cmd, exec_cmd_no_error
from resources.libraries.python.topology import NodeType, Topology
:type node: dict
:type service: str
"""
- command = u"echo $(< /tmp/*supervisor*.log)"\
+ command = u"cat /tmp/*supervisor*.log"\
if DUTSetup.running_in_container(node) \
- else f"journalctl --no-pager --unit={service} " \
- f"--since=\"$(echo `systemctl show -p ActiveEnterTimestamp " \
- f"{service}` | awk \'{{print $2 $3}}\')\""
+ else f"journalctl --no-pager _SYSTEMD_INVOCATION_ID=$(systemctl " \
+ f"show -p InvocationID --value {service})"
+
message = f"Node {node[u'host']} failed to get logs from unit {service}"
exec_cmd_no_error(
:type node: dict
:type service: str
"""
+ DUTSetup.get_service_logs(node, service)
+
command = f"supervisorctl stop {service}" \
if DUTSetup.running_in_container(node) \
else f"service {service} stop"
node, command, timeout=180, sudo=True, message=message
)
- DUTSetup.get_service_logs(node, service)
-
@staticmethod
def stop_service_on_all_duts(nodes, service):
"""Stop the named service on all DUTs.
DUTSetup.stop_service(node, service)
@staticmethod
- def get_vpp_pid(node):
- """Get PID of running VPP process.
+ def kill_program(node, program, namespace=None):
+ """Kill program on the specified topology node.
+
+ :param node: Topology node.
+ :param program: Program name.
+ :param namespace: Namespace program is running in.
+ :type node: dict
+ :type program: str
+ :type namespace: str
+ """
+ host = node[u"host"]
+ cmd_timeout = 5
+ if namespace in (None, u"default"):
+ shell_cmd = u"sh -c"
+ else:
+ shell_cmd = f"ip netns exec {namespace} sh -c"
+
+ pgrep_cmd = f"{shell_cmd} \'pgrep -c {program}\'"
+ _, stdout, _ = exec_cmd(node, pgrep_cmd, timeout=cmd_timeout,
+ sudo=True)
+ if int(stdout) == 0:
+ logger.trace(f"{program} is not running on {host}")
+ return
+ exec_cmd(node, f"{shell_cmd} \'pkill {program}\'",
+ timeout=cmd_timeout, sudo=True)
+ for attempt in range(5):
+ _, stdout, _ = exec_cmd(node, pgrep_cmd, timeout=cmd_timeout,
+ sudo=True)
+ if int(stdout) == 0:
+ logger.trace(f"Attempt {attempt}: {program} is dead on {host}")
+ return
+ sleep(1)
+ logger.trace(f"SIGKILLing {program} on {host}")
+ exec_cmd(node, f"{shell_cmd} \'pkill -9 {program}\'",
+ timeout=cmd_timeout, sudo=True)
+
+ @staticmethod
+ def verify_program_installed(node, program):
+ """Verify that program is installed on the specified topology node.
+
+ :param node: Topology node.
+ :param program: Program name.
+ :type node: dict
+ :type program: str
+ """
+ cmd = f"command -v {program}"
+ exec_cmd_no_error(node, cmd, message=f"{program} is not installed")
+
+ @staticmethod
+ def get_pid(node, process):
+ """Get PID of running process.
:param node: DUT node.
+ :param process: process name.
:type node: dict
+ :type process: str
:returns: PID
:rtype: int
:raises RuntimeError: If it is not possible to get the PID.
retval = None
for i in range(3):
- logger.trace(f"Try {i}: Get VPP PID")
- ret_code, stdout, stderr = ssh.exec_command(u"pidof vpp")
+ logger.trace(f"Try {i}: Get {process} PID")
+ ret_code, stdout, stderr = ssh.exec_command(f"pidof {process}")
if int(ret_code):
raise RuntimeError(
- f"Not possible to get PID of VPP process on node: "
+ f"Not possible to get PID of {process} process on node: "
f"{node[u'host']}\n {stdout + stderr}"
)
pid_list = stdout.split()
if len(pid_list) == 1:
- retval = int(stdout)
- elif not pid_list:
- logger.debug(f"No VPP PID found on node {node[u'host']}")
+ return [int(stdout)]
+ if not pid_list:
+ logger.debug(f"No {process} PID found on node {node[u'host']}")
continue
- else:
- logger.debug(
- f"More then one VPP PID found on node {node[u'host']}"
- )
- retval = [int(pid) for pid in pid_list]
+ logger.debug(f"More than one {process} PID found " \
+ f"on node {node[u'host']}")
+ retval = [int(pid) for pid in pid_list]
return retval
pids = dict()
for node in nodes.values():
if node[u"type"] == NodeType.DUT:
- pids[node[u"host"]] = DUTSetup.get_vpp_pid(node)
+ pids[node[u"host"]] = DUTSetup.get_pid(node, u"vpp")
return pids
@staticmethod
:type pf_pci_addr: str
:type vf_id: int
:returns: Virtual Function PCI address.
- :rtype: int
+ :rtype: str
:raises RuntimeError: If failed to get Virtual Function PCI address.
"""
command = f"sh -c \"basename $(readlink " \
:type numvfs: int
:raises RuntimeError: Failed to create VFs on PCI.
"""
+ cmd = f"test -f /sys/bus/pci/devices/{pf_pci_addr}/sriov_numvfs"
+ sriov_unsupported, _, _ = exec_cmd(node, cmd)
+ # if sriov_numvfs doesn't exist, then sriov_unsupported != 0
+ if int(sriov_unsupported):
+ if numvfs == 0:
+ # sriov is not supported and we want 0 VFs
+ # no need to do anything
+ return
+ else:
+ raise RuntimeError(
+ f"Can't configure {numvfs} VFs on {pf_pci_addr} device "
+ f"on {node[u'host']} since it doesn't support SR-IOV."
+ )
+
pci = pf_pci_addr.replace(u":", r"\:")
command = f"sh -c \"echo {numvfs} | " \
f"tee /sys/bus/pci/devices/{pci}/sriov_numvfs\""
node, command, timeout=120, sudo=True, message=message
)
+ @staticmethod
+ def pci_driver_unbind_list(node, *pci_addrs):
+ """Unbind PCI devices from current driver on node.
+
+ :param node: DUT node.
+ :param pci_addrs: PCI device addresses.
+ :type node: dict
+ :type pci_addrs: list
+ """
+ for pci_addr in pci_addrs:
+ DUTSetup.pci_driver_unbind(node, pci_addr)
+
@staticmethod
def pci_driver_bind(node, pci_addr, driver):
"""Bind PCI device to driver on node.
def get_pci_dev_driver(node, pci_addr):
"""Get current PCI device driver on node.
- .. note::
- # lspci -vmmks 0000:00:05.0
- Slot: 00:05.0
- Class: Ethernet controller
- Vendor: Red Hat, Inc
- Device: Virtio network device
- SVendor: Red Hat, Inc
- SDevice: Device 0001
- PhySlot: 5
- Driver: virtio-pci
-
:param node: DUT node.
:param pci_addr: PCI device address.
:type node: dict
:type pci_addr: str
:returns: Driver or None
- :raises RuntimeError: If PCI rescan or lspci command execution failed.
:raises RuntimeError: If it is not possible to get the interface driver
information from the node.
"""
- ssh = SSH()
- ssh.connect(node)
-
- for i in range(3):
- logger.trace(f"Try number {i}: Get PCI device driver")
-
- cmd = f"lspci -vmmks {pci_addr}"
- ret_code, stdout, _ = ssh.exec_command(cmd)
- if int(ret_code):
- raise RuntimeError(f"'{cmd}' failed on '{node[u'host']}'")
-
- for line in stdout.splitlines():
- if not line:
- continue
- name = None
- value = None
- try:
- name, value = line.split(u"\t", 1)
- except ValueError:
- if name == u"Driver:":
- return None
- if name == u"Driver:":
- return value
-
- if i < 2:
- logger.trace(
- f"Driver for PCI device {pci_addr} not found, "
- f"executing pci rescan and retrying"
- )
- cmd = u"sh -c \"echo 1 > /sys/bus/pci/rescan\""
- ret_code, _, _ = ssh.exec_command_sudo(cmd)
- if int(ret_code) != 0:
- raise RuntimeError(f"'{cmd}' failed on '{node[u'host']}'")
-
- return None
+ driver_path = f"/sys/bus/pci/devices/{pci_addr}/driver"
+ cmd = f"test -d {driver_path}"
+ ret_code, ret_val, _ = exec_cmd(node, cmd)
+ if int(ret_code):
+ # the directory doesn't exist which means the device is not bound
+ # to any driver
+ return None
+ else:
+ cmd = f"basename $(readlink -f {driver_path})"
+ ret_val, _ = exec_cmd_no_error(node, cmd)
+ return ret_val.strip()
@staticmethod
def verify_kernel_module(node, module, force_load=False):
node, f"rpm -ivh {vpp_pkg_dir}*.rpm",
timeout=120, sudo=True, message=message
)
- exec_cmd_no_error(node, u"rpm -qai *vpp*", sudo=True)
+ exec_cmd_no_error(node, u"rpm -qai '*vpp*'", sudo=True)
DUTSetup.restart_service(node, Constants.VPP_UNIT)
@staticmethod
return stdout.strip()
@staticmethod
- def get_huge_page_size(node):
- """Get default size of huge pages in system.
-
- :param node: Node in the topology.
- :type node: dict
- :returns: Default size of free huge pages in system.
- :rtype: int
- :raises RuntimeError: If reading failed for three times.
- """
- ssh = SSH()
- ssh.connect(node)
-
- for _ in range(3):
- ret_code, stdout, _ = ssh.exec_command_sudo(
- u"grep Hugepagesize /proc/meminfo | awk '{ print $2 }'"
- )
- if ret_code == 0:
- try:
- huge_size = int(stdout)
- except ValueError:
- logger.trace(u"Reading huge page size information failed")
- else:
- break
- else:
- raise RuntimeError(u"Getting huge page size information failed.")
- return huge_size
-
- @staticmethod
- def get_huge_page_free(node, huge_size):
- """Get number of free huge pages in system.
-
- :param node: Node in the topology.
- :param huge_size: Size of hugepages.
- :type node: dict
- :type huge_size: int
- :returns: Number of free huge pages in system.
- :rtype: int
- :raises RuntimeError: If reading failed for three times.
- """
- # TODO: add numa aware option
- ssh = SSH()
- ssh.connect(node)
-
- for _ in range(3):
- ret_code, stdout, _ = ssh.exec_command_sudo(
- f"cat /sys/kernel/mm/hugepages/hugepages-{huge_size}kB/"
- f"free_hugepages"
- )
- if ret_code == 0:
- try:
- huge_free = int(stdout)
- except ValueError:
- logger.trace(u"Reading free huge pages information failed")
- else:
- break
- else:
- raise RuntimeError(u"Getting free huge pages information failed.")
- return huge_free
-
- @staticmethod
- def get_huge_page_total(node, huge_size):
- """Get total number of huge pages in system.
+ def get_hugepages_info(node, hugesize=None):
+ """Get number of huge pages in system.
:param node: Node in the topology.
- :param huge_size: Size of hugepages.
+ :param hugesize: Size of hugepages. Default system huge size if None.
:type node: dict
- :type huge_size: int
- :returns: Total number of huge pages in system.
- :rtype: int
- :raises RuntimeError: If reading failed for three times.
+ :type hugesize: int
+ :returns: Number of huge pages in system.
+ :rtype: dict
+ :raises RuntimeError: If reading failed.
"""
- # TODO: add numa aware option
- ssh = SSH()
- ssh.connect(node)
-
- for _ in range(3):
- ret_code, stdout, _ = ssh.exec_command_sudo(
- f"cat /sys/kernel/mm/hugepages/hugepages-{huge_size}kB/"
- f"nr_hugepages"
- )
- if ret_code == 0:
- try:
- huge_total = int(stdout)
- except ValueError:
- logger.trace(u"Reading total huge pages information failed")
- else:
- break
- else:
- raise RuntimeError(u"Getting total huge pages information failed.")
- return huge_total
+ if not hugesize:
+ hugesize = "$(grep Hugepagesize /proc/meminfo | awk '{ print $2 }')"
+ command = f"cat /sys/kernel/mm/hugepages/hugepages-{hugesize}kB/*"
+ stdout, _ = exec_cmd_no_error(node, command)
+ try:
+ line = stdout.splitlines()
+ return {
+ "free_hugepages": int(line[0]),
+ "nr_hugepages": int(line[1]),
+ "nr_hugepages_mempolicy": int(line[2]),
+ "nr_overcommit_hugepages": int(line[3]),
+ "resv_hugepages": int(line[4]),
+ "surplus_hugepages": int(line[5])
+ }
+ except ValueError:
+ logger.trace(u"Reading huge pages information failed!")
@staticmethod
- def check_huge_page(node, huge_mnt, mem_size, allocate=False):
+ def check_huge_page(
+ node, huge_mnt, mem_size, hugesize=2048, allocate=False):
"""Check if there is enough HugePages in system. If allocate is set to
true, try to allocate more HugePages.
:param node: Node in the topology.
:param huge_mnt: HugePage mount point.
- :param mem_size: Requested memory in MB.
+ :param mem_size: Reqeusted memory in MB.
+ :param hugesize: HugePage size in KB.
:param allocate: Whether to allocate more memory if not enough.
:type node: dict
:type huge_mnt: str
- :type mem_size: str
+ :type mem_size: int
+ :type hugesize: int
:type allocate: bool
:raises RuntimeError: Mounting hugetlbfs failed or not enough HugePages
or increasing map count failed.
"""
- # TODO: split function into smaller parts.
- ssh = SSH()
- ssh.connect(node)
-
- # Get huge pages information
- huge_size = DUTSetup.get_huge_page_size(node)
- huge_free = DUTSetup.get_huge_page_free(node, huge_size)
- huge_total = DUTSetup.get_huge_page_total(node, huge_size)
+ # Get huge pages information.
+ hugepages = DUTSetup.get_hugepages_info(node, hugesize=hugesize)
+
+ # Check if hugepages requested are available on node.
+ if hugepages[u"nr_overcommit_hugepages"]:
+ # If overcommit is used, we need to know how many additional pages
+ # we can allocate
+ huge_available = hugepages[u"nr_overcommit_hugepages"] - \
+ hugepages[u"surplus_hugepages"]
+ else:
+ # Fallbacking to free_hugepages which were used before to detect.
+ huge_available = hugepages[u"free_hugepages"]
- # Check if memory requested is available on
- mem_size = int(mem_size)
- if (mem_size * 1024) > (huge_free * huge_size):
- # If we want to allocate hugepage dynamically
+ if ((mem_size * 1024) // hugesize) > huge_available:
+ # If we want to allocate hugepage dynamically.
if allocate:
- mem_needed = (mem_size * 1024) - (huge_free * huge_size)
- huge_to_allocate = ((mem_needed / huge_size) * 2) + huge_total
- max_map_count = huge_to_allocate*4
- # Increase maximum number of memory map areas a process may have
- ret_code, _, _ = ssh.exec_command_sudo(
+ huge_needed = ((mem_size * 1024) // hugesize) - huge_available
+ huge_to_allocate = huge_needed + hugepages[u"nr_hugepages"]
+ max_map_count = huge_to_allocate * 4
+ # Check if huge pages mount point exist.
+ try:
+ exec_cmd_no_error(node, u"fgrep 'hugetlbfs' /proc/mounts")
+ except RuntimeError:
+ exec_cmd_no_error(node, f"mkdir -p {huge_mnt}", sudo=True)
+ exec_cmd_no_error(
+ node,
+ f"mount -t hugetlbfs -o pagesize={hugesize}k none "
+ f"{huge_mnt}",
+ sudo=True)
+ # Increase maximum number of memory map areas for process.
+ exec_cmd_no_error(
+ node,
f"echo \"{max_map_count}\" | "
- f"sudo tee /proc/sys/vm/max_map_count"
+ f"sudo tee /proc/sys/vm/max_map_count",
+ message=f"Increase map count failed on {node[u'host']}!"
)
- if int(ret_code) != 0:
- raise RuntimeError(
- f"Increase map count failed on {node[u'host']}"
- )
- # Increase hugepage count
- ret_code, _, _ = ssh.exec_command_sudo(
+ # Increase hugepage count.
+ exec_cmd_no_error(
+ node,
f"echo \"{huge_to_allocate}\" | "
- f"sudo tee /proc/sys/vm/nr_hugepages"
+ f"sudo tee /proc/sys/vm/nr_hugepages",
+ message=f"Mount huge pages failed on {node[u'host']}!"
)
- if int(ret_code) != 0:
- raise RuntimeError(
- f"Mount huge pages failed on {node[u'host']}"
- )
- # If we do not want to allocate dynamically end with error
+ # If we do not want to allocate dynamically end with error.
else:
raise RuntimeError(
- f"Not enough free huge pages: {huge_free}, "
- f"{huge_free * huge_size} MB"
- )
- # Check if huge pages mount point exist
- has_huge_mnt = False
- ret_code, stdout, _ = ssh.exec_command(u"cat /proc/mounts")
- if int(ret_code) == 0:
- for line in stdout.splitlines():
- # Try to find something like:
- # none /mnt/huge hugetlbfs rw,realtime,pagesize=2048k 0 0
- mount = line.split()
- if mount[2] == u"hugetlbfs" and mount[1] == huge_mnt:
- has_huge_mnt = True
- break
- # If huge page mount point not exist create one
- if not has_huge_mnt:
- ret_code, _, _ = ssh.exec_command_sudo(f"mkdir -p {huge_mnt}")
- if int(ret_code) != 0:
- raise RuntimeError(
- f"Create mount dir failed on {node[u'host']}"
- )
- ret_code, _, _ = ssh.exec_command_sudo(
- f"mount -t hugetlbfs -o pagesize=2048k none {huge_mnt}"
- )
- if int(ret_code) != 0:
- raise RuntimeError(
- f"Mount huge pages failed on {node[u'host']}"
+ f"Not enough availablehuge pages: {huge_available}!"
)