1 # Copyright (c) 2022 Cisco and/or its affiliates.
2 # Licensed under the Apache License, Version 2.0 (the "License");
3 # you may not use this file except in compliance with the License.
4 # You may obtain a copy of the License at:
6 # http://www.apache.org/licenses/LICENSE-2.0
8 # Unless required by applicable law or agreed to in writing, software
9 # distributed under the License is distributed on an "AS IS" BASIS,
10 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 # See the License for the specific language governing permissions and
12 # limitations under the License.
14 """DUT setup library."""
16 from time import sleep
17 from robot.api import logger
19 from resources.libraries.python.Constants import Constants
20 from resources.libraries.python.ssh import SSH, exec_cmd, exec_cmd_no_error
21 from resources.libraries.python.topology import NodeType, Topology
25 """Contains methods for setting up DUTs."""
28 def get_service_logs(node, service):
29 """Get specific service unit logs from node.
31 :param node: Node in the topology.
32 :param service: Service unit name.
36 command = u"cat /tmp/*supervisor*.log"\
37 if DUTSetup.running_in_container(node) \
38 else f"journalctl --no-pager _SYSTEMD_INVOCATION_ID=$(systemctl " \
39 f"show -p InvocationID --value {service})"
41 message = f"Node {node[u'host']} failed to get logs from unit {service}"
44 node, command, timeout=30, sudo=True, message=message
48 def get_service_logs_on_all_duts(nodes, service):
49 """Get specific service unit logs from all DUTs.
51 :param nodes: Nodes in the topology.
52 :param service: Service unit name.
56 for node in nodes.values():
57 if node[u"type"] == NodeType.DUT:
58 DUTSetup.get_service_logs(node, service)
61 def restart_service(node, service):
62 """Restart the named service on node.
64 :param node: Node in the topology.
65 :param service: Service unit name.
69 command = f"supervisorctl restart {service}" \
70 if DUTSetup.running_in_container(node) \
71 else f"service {service} restart"
72 message = f"Node {node[u'host']} failed to restart service {service}"
75 node, command, timeout=180, sudo=True, message=message
78 DUTSetup.get_service_logs(node, service)
81 def restart_service_on_all_duts(nodes, service):
82 """Restart the named service on all DUTs.
84 :param nodes: Nodes in the topology.
85 :param service: Service unit name.
89 for node in nodes.values():
90 if node[u"type"] == NodeType.DUT:
91 DUTSetup.restart_service(node, service)
94 def start_service(node, service):
95 """Start up the named service on node.
97 :param node: Node in the topology.
98 :param service: Service unit name.
102 # TODO: change command to start once all parent function updated.
103 command = f"supervisorctl restart {service}" \
104 if DUTSetup.running_in_container(node) \
105 else f"service {service} restart"
106 message = f"Node {node[u'host']} failed to start service {service}"
109 node, command, timeout=180, sudo=True, message=message
112 DUTSetup.get_service_logs(node, service)
115 def start_service_on_all_duts(nodes, service):
116 """Start up the named service on all DUTs.
118 :param nodes: Nodes in the topology.
119 :param service: Service unit name.
123 for node in nodes.values():
124 if node[u"type"] == NodeType.DUT:
125 DUTSetup.start_service(node, service)
128 def stop_service(node, service):
129 """Stop the named service on node.
131 :param node: Node in the topology.
132 :param service: Service unit name.
136 DUTSetup.get_service_logs(node, service)
138 command = f"supervisorctl stop {service}" \
139 if DUTSetup.running_in_container(node) \
140 else f"service {service} stop"
141 message = f"Node {node[u'host']} failed to stop service {service}"
144 node, command, timeout=180, sudo=True, message=message
148 def stop_service_on_all_duts(nodes, service):
149 """Stop the named service on all DUTs.
151 :param nodes: Nodes in the topology.
152 :param service: Service unit name.
156 for node in nodes.values():
157 if node[u"type"] == NodeType.DUT:
158 DUTSetup.stop_service(node, service)
161 def kill_program(node, program, namespace=None):
162 """Kill program on the specified topology node.
164 :param node: Topology node.
165 :param program: Program name.
166 :param namespace: Namespace program is running in.
173 if namespace in (None, u"default"):
176 shell_cmd = f"ip netns exec {namespace} sh -c"
178 pgrep_cmd = f"{shell_cmd} \'pgrep -c {program}\'"
179 _, stdout, _ = exec_cmd(node, pgrep_cmd, timeout=cmd_timeout,
182 logger.trace(f"{program} is not running on {host}")
184 exec_cmd(node, f"{shell_cmd} \'pkill {program}\'",
185 timeout=cmd_timeout, sudo=True)
186 for attempt in range(5):
187 _, stdout, _ = exec_cmd(node, pgrep_cmd, timeout=cmd_timeout,
190 logger.trace(f"Attempt {attempt}: {program} is dead on {host}")
193 logger.trace(f"SIGKILLing {program} on {host}")
194 exec_cmd(node, f"{shell_cmd} \'pkill -9 {program}\'",
195 timeout=cmd_timeout, sudo=True)
198 def verify_program_installed(node, program):
199 """Verify that program is installed on the specified topology node.
201 :param node: Topology node.
202 :param program: Program name.
206 cmd = f"command -v {program}"
207 exec_cmd_no_error(node, cmd, message=f"{program} is not installed")
210 def get_pid(node, process):
211 """Get PID of running process.
213 :param node: DUT node.
214 :param process: process name.
219 :raises RuntimeError: If it is not possible to get the PID.
226 logger.trace(f"Try {i}: Get {process} PID")
227 ret_code, stdout, stderr = ssh.exec_command(f"pidof {process}")
231 f"Not possible to get PID of {process} process on node: "
232 f"{node[u'host']}\n {stdout + stderr}"
235 pid_list = stdout.split()
236 if len(pid_list) == 1:
239 logger.debug(f"No {process} PID found on node {node[u'host']}")
241 logger.debug(f"More than one {process} PID found " \
242 f"on node {node[u'host']}")
243 retval = [int(pid) for pid in pid_list]
248 def get_vpp_pids(nodes):
249 """Get PID of running VPP process on all DUTs.
251 :param nodes: DUT nodes.
257 for node in nodes.values():
258 if node[u"type"] == NodeType.DUT:
259 pids[node[u"host"]] = DUTSetup.get_pid(node, u"vpp")
263 def crypto_device_verify(node, crypto_type, numvfs, force_init=False):
264 """Verify if Crypto QAT device virtual functions are initialized on all
265 DUTs. If parameter force initialization is set to True, then try to
266 initialize or remove VFs on QAT.
268 :param node: DUT node.
269 :crypto_type: Crypto device type - HW_DH895xcc, HW_C3xxx or HW_C4xxx.
270 :param numvfs: Number of VFs to initialize, 0 - disable the VFs.
271 :param force_init: If True then try to initialize to specific value.
273 :type crypto_type: string
275 :type force_init: bool
277 :raises RuntimeError: If QAT VFs are not created and force init is set
280 pci_addr = Topology.get_cryptodev(node)
281 sriov_numvfs = DUTSetup.get_sriov_numvfs(node, pci_addr)
283 if sriov_numvfs != numvfs:
285 # QAT is not initialized and we want to initialize with numvfs
286 DUTSetup.crypto_device_init(node, crypto_type, numvfs)
289 f"QAT device failed to create VFs on {node[u'host']}"
293 def crypto_device_init(node, crypto_type, numvfs):
294 """Init Crypto QAT device virtual functions on DUT.
296 :param node: DUT node.
297 :crypto_type: Crypto device type - HW_DH895xcc, HW_C3xxx or HW_C4xxx.
298 :param numvfs: Number of VFs to initialize, 0 - disable the VFs.
300 :type crypto_type: string
303 :raises RuntimeError: If failed to stop VPP or QAT failed to initialize.
305 if crypto_type == u"HW_DH895xcc":
306 kernel_mod = u"qat_dh895xcc"
307 kernel_drv = u"dh895xcc"
308 elif crypto_type == u"HW_C3xxx":
309 kernel_mod = u"qat_c3xxx"
310 kernel_drv = u"c3xxx"
311 elif crypto_type == u"HW_C4xxx":
312 kernel_mod = u"qat_c4xxx"
313 kernel_drv = u"c4xxx"
316 f"Unsupported crypto device type on {node[u'host']}"
319 pci_addr = Topology.get_cryptodev(node)
321 # QAT device must be re-bound to kernel driver before initialization.
322 DUTSetup.verify_kernel_module(node, kernel_mod, force_load=True)
324 # Stop VPP to prevent deadlock.
325 DUTSetup.stop_service(node, Constants.VPP_UNIT)
327 current_driver = DUTSetup.get_pci_dev_driver(
328 node, pci_addr.replace(u":", r"\:")
330 if current_driver is not None:
331 DUTSetup.pci_driver_unbind(node, pci_addr)
333 # Bind to kernel driver.
334 DUTSetup.pci_driver_bind(node, pci_addr, kernel_drv)
336 # Initialize QAT VFs.
338 DUTSetup.set_sriov_numvfs(node, pci_addr, numvfs)
341 def get_virtfn_pci_addr(node, pf_pci_addr, vf_id):
342 """Get PCI address of Virtual Function.
344 :param node: DUT node.
345 :param pf_pci_addr: Physical Function PCI address.
346 :param vf_id: Virtual Function number.
348 :type pf_pci_addr: str
350 :returns: Virtual Function PCI address.
352 :raises RuntimeError: If failed to get Virtual Function PCI address.
354 command = f"sh -c \"basename $(readlink " \
355 f"/sys/bus/pci/devices/{pf_pci_addr}/virtfn{vf_id})\""
356 message = u"Failed to get virtual function PCI address."
358 stdout, _ = exec_cmd_no_error(
359 node, command, timeout=30, sudo=True, message=message
362 return stdout.strip()
365 def get_sriov_numvfs(node, pf_pci_addr):
366 """Get number of SR-IOV VFs.
368 :param node: DUT node.
369 :param pf_pci_addr: Physical Function PCI device address.
371 :type pf_pci_addr: str
372 :returns: Number of VFs.
374 :raises RuntimeError: If PCI device is not SR-IOV capable.
376 pci = pf_pci_addr.replace(u":", r"\:")
377 command = f"cat /sys/bus/pci/devices/{pci}/sriov_numvfs"
378 message = f"PCI device {pf_pci_addr} is not a SR-IOV device."
381 stdout, _ = exec_cmd_no_error(
382 node, command, timeout=30, sudo=True, message=message
385 sriov_numvfs = int(stdout)
388 f"Reading sriov_numvfs info failed on {node[u'host']}"
394 def set_sriov_numvfs(node, pf_pci_addr, numvfs=0):
395 """Init or reset SR-IOV virtual functions by setting its number on PCI
396 device on DUT. Setting to zero removes all VFs.
398 :param node: DUT node.
399 :param pf_pci_addr: Physical Function PCI device address.
400 :param numvfs: Number of VFs to initialize, 0 - removes the VFs.
402 :type pf_pci_addr: str
404 :raises RuntimeError: Failed to create VFs on PCI.
406 cmd = f"test -f /sys/bus/pci/devices/{pf_pci_addr}/sriov_numvfs"
407 sriov_unsupported, _, _ = exec_cmd(node, cmd)
408 # if sriov_numvfs doesn't exist, then sriov_unsupported != 0
409 if int(sriov_unsupported):
411 # sriov is not supported and we want 0 VFs
412 # no need to do anything
416 f"Can't configure {numvfs} VFs on {pf_pci_addr} device "
417 f"on {node[u'host']} since it doesn't support SR-IOV."
420 pci = pf_pci_addr.replace(u":", r"\:")
421 command = f"sh -c \"echo {numvfs} | " \
422 f"tee /sys/bus/pci/devices/{pci}/sriov_numvfs\""
423 message = f"Failed to create {numvfs} VFs on {pf_pci_addr} device " \
424 f"on {node[u'host']}"
427 node, command, timeout=120, sudo=True, message=message
431 def pci_driver_unbind(node, pci_addr):
432 """Unbind PCI device from current driver on node.
434 :param node: DUT node.
435 :param pci_addr: PCI device address.
438 :raises RuntimeError: If PCI device unbind failed.
440 pci = pci_addr.replace(u":", r"\:")
441 command = f"sh -c \"echo {pci_addr} | " \
442 f"tee /sys/bus/pci/devices/{pci}/driver/unbind\""
443 message = f"Failed to unbind PCI device {pci_addr} on {node[u'host']}"
446 node, command, timeout=120, sudo=True, message=message
450 def unbind_pci_devices_from_other_driver(node, driver, *pci_addrs):
451 """Unbind PCI devices from driver other than input driver on node.
453 :param node: DUT node.
454 :param driver: Driver to not unbind from. If None or empty string,
455 will attempt to unbind from the current driver.
456 :param pci_addrs: PCI device addresses.
459 :type pci_addrs: list
461 for pci_addr in pci_addrs:
463 DUTSetup.get_pci_dev_driver(node, pci_addr) != driver:
464 DUTSetup.pci_driver_unbind(node, pci_addr)
467 def pci_driver_bind(node, pci_addr, driver):
468 """Bind PCI device to driver on node.
470 :param node: DUT node.
471 :param pci_addr: PCI device address.
472 :param driver: Driver to bind.
476 :raises RuntimeError: If PCI device bind failed.
478 message = f"Failed to bind PCI device {pci_addr} to {driver} " \
479 f"on host {node[u'host']}"
480 pci = pci_addr.replace(u":", r"\:")
481 command = f"sh -c \"echo {driver} | " \
482 f"tee /sys/bus/pci/devices/{pci}/driver_override\""
485 node, command, timeout=120, sudo=True, message=message
488 command = f"sh -c \"echo {pci_addr} | " \
489 f"tee /sys/bus/pci/drivers/{driver}/bind\""
492 node, command, timeout=120, sudo=True, message=message
495 command = f"sh -c \"echo | " \
496 f"tee /sys/bus/pci/devices/{pci}/driver_override\""
499 node, command, timeout=120, sudo=True, message=message
503 def pci_vf_driver_unbind(node, pf_pci_addr, vf_id):
504 """Unbind Virtual Function from driver on node.
506 :param node: DUT node.
507 :param pf_pci_addr: PCI device address.
508 :param vf_id: Virtual Function ID.
510 :type pf_pci_addr: str
512 :raises RuntimeError: If Virtual Function unbind failed.
514 vf_pci_addr = DUTSetup.get_virtfn_pci_addr(node, pf_pci_addr, vf_id)
515 pf_pci = pf_pci_addr.replace(u":", r"\:")
516 vf_path = f"/sys/bus/pci/devices/{pf_pci}/virtfn{vf_id}"
518 command = f"sh -c \"echo {vf_pci_addr} | tee {vf_path}/driver/unbind\""
519 message = f"Failed to unbind VF {vf_pci_addr} on {node[u'host']}"
522 node, command, timeout=120, sudo=True, message=message
526 def pci_vf_driver_bind(node, pf_pci_addr, vf_id, driver):
527 """Bind Virtual Function to driver on node.
529 :param node: DUT node.
530 :param pf_pci_addr: PCI device address.
531 :param vf_id: Virtual Function ID.
532 :param driver: Driver to bind.
534 :type pf_pci_addr: str
537 :raises RuntimeError: If PCI device bind failed.
539 vf_pci_addr = DUTSetup.get_virtfn_pci_addr(node, pf_pci_addr, vf_id)
540 pf_pci = pf_pci_addr.replace(u":", r'\:')
541 vf_path = f"/sys/bus/pci/devices/{pf_pci}/virtfn{vf_id}"
543 message = f"Failed to bind VF {vf_pci_addr} to {driver} " \
544 f"on {node[u'host']}"
545 command = f"sh -c \"echo {driver} | tee {vf_path}/driver_override\""
548 node, command, timeout=120, sudo=True, message=message
551 command = f"sh -c \"echo {vf_pci_addr} | " \
552 f"tee /sys/bus/pci/drivers/{driver}/bind\""
555 node, command, timeout=120, sudo=True, message=message
558 command = f"sh -c \"echo | tee {vf_path}/driver_override\""
561 node, command, timeout=120, sudo=True, message=message
565 def get_pci_dev_driver(node, pci_addr):
566 """Get current PCI device driver on node.
568 :param node: DUT node.
569 :param pci_addr: PCI device address.
572 :returns: Driver or None
573 :raises RuntimeError: If it is not possible to get the interface driver
574 information from the node.
576 driver_path = f"/sys/bus/pci/devices/{pci_addr}/driver"
577 cmd = f"test -d {driver_path}"
578 ret_code, ret_val, _ = exec_cmd(node, cmd)
580 # the directory doesn't exist which means the device is not bound
583 cmd = f"basename $(readlink -f {driver_path})"
584 ret_val, _ = exec_cmd_no_error(node, cmd)
585 return ret_val.strip()
588 def verify_kernel_module(node, module, force_load=False):
589 """Verify if kernel module is loaded on node. If parameter force
590 load is set to True, then try to load the modules.
593 :param module: Module to verify.
594 :param force_load: If True then try to load module.
597 :type force_load: bool
598 :raises RuntimeError: If module is not loaded or failed to load.
600 command = f"grep -w {module} /proc/modules"
601 message = f"Kernel module {module} is not loaded " \
602 f"on host {node[u'host']}"
606 node, command, timeout=30, sudo=False, message=message
610 # Module is not loaded and we want to load it
611 DUTSetup.load_kernel_module(node, module)
616 def verify_kernel_module_on_all_duts(nodes, module, force_load=False):
617 """Verify if kernel module is loaded on all DUTs. If parameter force
618 load is set to True, then try to load the modules.
620 :param nodes: DUT nodes.
621 :param module: Module to verify.
622 :param force_load: If True then try to load module.
625 :type force_load: bool
627 for node in nodes.values():
628 if node[u"type"] == NodeType.DUT:
629 DUTSetup.verify_kernel_module(node, module, force_load)
632 def verify_uio_driver_on_all_duts(nodes):
633 """Verify if uio driver kernel module is loaded on all DUTs. If module
634 is not present it will try to load it.
636 :param nodes: DUT nodes.
639 for node in nodes.values():
640 if node[u"type"] == NodeType.DUT:
641 uio_driver = Topology.get_uio_driver(node)
642 DUTSetup.verify_kernel_module(node, uio_driver, force_load=True)
645 def load_kernel_module(node, module):
646 """Load kernel module on node.
648 :param node: DUT node.
649 :param module: Module to load.
653 :raises RuntimeError: If loading failed.
655 command = f"modprobe {module}"
656 message = f"Failed to load {module} on host {node[u'host']}"
658 exec_cmd_no_error(node, command, timeout=30, sudo=True, message=message)
661 def install_vpp_on_all_duts(nodes, vpp_pkg_dir):
662 """Install VPP on all DUT nodes. Start the VPP service in case of
663 systemd is not available or does not support autostart.
665 :param nodes: Nodes in the topology.
666 :param vpp_pkg_dir: Path to directory where VPP packages are stored.
668 :type vpp_pkg_dir: str
669 :raises RuntimeError: If failed to remove or install VPP.
671 for node in nodes.values():
672 message = f"Failed to install VPP on host {node[u'host']}!"
673 if node[u"type"] == NodeType.DUT:
674 command = u"ln -s /dev/null /etc/sysctl.d/80-vpp.conf || true"
675 exec_cmd_no_error(node, command, sudo=True)
677 command = u". /etc/lsb-release; echo \"${DISTRIB_ID}\""
678 stdout, _ = exec_cmd_no_error(node, command)
680 if stdout.strip() == u"Ubuntu":
682 node, u"apt-get purge -y '*vpp*' || true",
683 timeout=120, sudo=True
685 # workaround to avoid installation of vpp-api-python
687 node, f"rm -f {vpp_pkg_dir}vpp-api-python.deb",
688 timeout=120, sudo=True
691 node, f"dpkg -i --force-all {vpp_pkg_dir}*.deb",
692 timeout=120, sudo=True, message=message
694 exec_cmd_no_error(node, u"dpkg -l | grep vpp", sudo=True)
695 if DUTSetup.running_in_container(node):
696 DUTSetup.restart_service(node, Constants.VPP_UNIT)
699 node, u"yum -y remove '*vpp*' || true",
700 timeout=120, sudo=True
702 # workaround to avoid installation of vpp-api-python
704 node, f"rm -f {vpp_pkg_dir}vpp-api-python.rpm",
705 timeout=120, sudo=True
708 node, f"rpm -ivh {vpp_pkg_dir}*.rpm",
709 timeout=120, sudo=True, message=message
711 exec_cmd_no_error(node, u"rpm -qai '*vpp*'", sudo=True)
712 DUTSetup.restart_service(node, Constants.VPP_UNIT)
715 def running_in_container(node):
716 """This method tests if topology node is running inside container.
718 :param node: Topology node.
720 :returns: True if running in docker container, false if not or failed
724 command = u"fgrep docker /proc/1/cgroup"
725 message = u"Failed to get cgroup settings."
728 node, command, timeout=30, sudo=False, message=message
735 def get_docker_mergeddir(node, uuid):
736 """Get Docker overlay for MergedDir diff.
738 :param node: DUT node.
739 :param uuid: Docker UUID.
742 :returns: Docker container MergedDir.
744 :raises RuntimeError: If getting output failed.
746 command = f"docker inspect " \
747 f"--format='{{{{.GraphDriver.Data.MergedDir}}}}' {uuid}"
748 message = f"Failed to get directory of {uuid} on host {node[u'host']}"
750 stdout, _ = exec_cmd_no_error(node, command, sudo=True, message=message)
751 return stdout.strip()
754 def get_hugepages_info(node, hugesize=None):
755 """Get number of huge pages in system.
757 :param node: Node in the topology.
758 :param hugesize: Size of hugepages. Default system huge size if None.
761 :returns: Number of huge pages in system.
763 :raises RuntimeError: If reading failed.
766 hugesize = "$(grep Hugepagesize /proc/meminfo | awk '{ print $2 }')"
767 command = f"cat /sys/kernel/mm/hugepages/hugepages-{hugesize}kB/*"
768 stdout, _ = exec_cmd_no_error(node, command)
770 line = stdout.splitlines()
772 "free_hugepages": int(line[0]),
773 "nr_hugepages": int(line[1]),
774 "nr_hugepages_mempolicy": int(line[2]),
775 "nr_overcommit_hugepages": int(line[3]),
776 "resv_hugepages": int(line[4]),
777 "surplus_hugepages": int(line[5])
780 logger.trace(u"Reading huge pages information failed!")
784 node, huge_mnt, mem_size, hugesize=2048, allocate=False):
785 """Check if there is enough HugePages in system. If allocate is set to
786 true, try to allocate more HugePages.
788 :param node: Node in the topology.
789 :param huge_mnt: HugePage mount point.
790 :param mem_size: Reqeusted memory in MB.
791 :param hugesize: HugePage size in KB.
792 :param allocate: Whether to allocate more memory if not enough.
798 :raises RuntimeError: Mounting hugetlbfs failed or not enough HugePages
799 or increasing map count failed.
801 # Get huge pages information.
802 hugepages = DUTSetup.get_hugepages_info(node, hugesize=hugesize)
804 # Check if hugepages requested are available on node.
805 if hugepages[u"nr_overcommit_hugepages"]:
806 # If overcommit is used, we need to know how many additional pages
808 huge_available = hugepages[u"nr_overcommit_hugepages"] - \
809 hugepages[u"surplus_hugepages"]
811 # Fallbacking to free_hugepages which were used before to detect.
812 huge_available = hugepages[u"free_hugepages"]
814 if ((mem_size * 1024) // hugesize) > huge_available:
815 # If we want to allocate hugepage dynamically.
817 huge_needed = ((mem_size * 1024) // hugesize) - huge_available
818 huge_to_allocate = huge_needed + hugepages[u"nr_hugepages"]
819 max_map_count = huge_to_allocate * 4
820 # Check if huge pages mount point exist.
822 exec_cmd_no_error(node, u"fgrep 'hugetlbfs' /proc/mounts")
824 exec_cmd_no_error(node, f"mkdir -p {huge_mnt}", sudo=True)
827 f"mount -t hugetlbfs -o pagesize={hugesize}k none "
830 # Increase maximum number of memory map areas for process.
833 f"echo \"{max_map_count}\" | "
834 f"sudo tee /proc/sys/vm/max_map_count",
835 message=f"Increase map count failed on {node[u'host']}!"
837 # Increase hugepage count.
840 f"echo \"{huge_to_allocate}\" | "
841 f"sudo tee /proc/sys/vm/nr_hugepages",
842 message=f"Mount huge pages failed on {node[u'host']}!"
844 # If we do not want to allocate dynamically end with error.
847 f"Not enough availablehuge pages: {huge_available}!"