1 # Copyright (c) 2023 Cisco and/or its affiliates.
2 # Licensed under the Apache License, Version 2.0 (the "License");
3 # you may not use this file except in compliance with the License.
4 # You may obtain a copy of the License at:
6 # http://www.apache.org/licenses/LICENSE-2.0
8 # Unless required by applicable law or agreed to in writing, software
9 # distributed under the License is distributed on an "AS IS" BASIS,
10 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 # See the License for the specific language governing permissions and
12 # limitations under the License.
14 """DUT setup library."""
16 from time import sleep
17 from robot.api import logger
19 from resources.libraries.python.Constants import Constants
20 from resources.libraries.python.ssh import exec_cmd, exec_cmd_no_error
21 from resources.libraries.python.topology import NodeType, Topology
25 """Contains methods for setting up DUTs."""
28 def get_service_logs(node, service):
29 """Get specific service unit logs from node.
31 :param node: Node in the topology.
32 :param service: Service unit name.
36 if DUTSetup.running_in_container(node):
37 command = u"cat /var/log/vpp/vpp.log"
40 f"journalctl --no-pager _SYSTEMD_INVOCATION_ID=$(systemctl "
41 f"show -p InvocationID --value {service})"
43 message = f"Node {node[u'host']} failed to get logs from unit {service}"
46 node, command, timeout=30, sudo=True, message=message
50 def get_service_logs_on_all_duts(nodes, service):
51 """Get specific service unit logs from all DUTs.
53 :param nodes: Nodes in the topology.
54 :param service: Service unit name.
58 for node in nodes.values():
59 if node[u"type"] == NodeType.DUT:
60 DUTSetup.get_service_logs(node, service)
63 def restart_service(node, service):
64 """Restart the named service on node.
66 :param node: Node in the topology.
67 :param service: Service unit name.
71 if DUTSetup.running_in_container(node):
72 command = f"supervisorctl restart {service}"
74 command = f"systemctl restart {service}"
75 message = f"Node {node[u'host']} failed to restart service {service}"
78 node, command, timeout=180, sudo=True, message=message
81 DUTSetup.get_service_logs(node, service)
84 def restart_service_on_all_duts(nodes, service):
85 """Restart the named service on all DUTs.
87 :param nodes: Nodes in the topology.
88 :param service: Service unit name.
92 for node in nodes.values():
93 if node[u"type"] == NodeType.DUT:
94 DUTSetup.restart_service(node, service)
97 def start_service(node, service):
98 """Start up the named service on node.
100 :param node: Node in the topology.
101 :param service: Service unit name.
105 if DUTSetup.running_in_container(node):
106 command = f"supervisorctl restart {service}"
108 command = f"systemctl restart {service}"
109 message = f"Node {node[u'host']} failed to start service {service}"
112 node, command, timeout=180, sudo=True, message=message
115 DUTSetup.get_service_logs(node, service)
118 def start_service_on_all_duts(nodes, service):
119 """Start up the named service on all DUTs.
121 :param nodes: Nodes in the topology.
122 :param service: Service unit name.
126 for node in nodes.values():
127 if node[u"type"] == NodeType.DUT:
128 DUTSetup.start_service(node, service)
131 def stop_service(node, service):
132 """Stop the named service on node.
134 :param node: Node in the topology.
135 :param service: Service unit name.
139 DUTSetup.get_service_logs(node, service)
141 if DUTSetup.running_in_container(node):
142 command = f"supervisorctl stop {service}"
144 command = f"systemctl stop {service}"
145 message = f"Node {node[u'host']} failed to stop service {service}"
148 node, command, timeout=180, sudo=True, message=message
152 def stop_service_on_all_duts(nodes, service):
153 """Stop the named service on all DUTs.
155 :param nodes: Nodes in the topology.
156 :param service: Service unit name.
160 for node in nodes.values():
161 if node[u"type"] == NodeType.DUT:
162 DUTSetup.stop_service(node, service)
165 def kill_program(node, program, namespace=None):
166 """Kill program on the specified topology node.
168 :param node: Topology node.
169 :param program: Program name.
170 :param namespace: Namespace program is running in.
177 if namespace in (None, u"default"):
180 shell_cmd = f"ip netns exec {namespace} sh -c"
182 pgrep_cmd = f"{shell_cmd} \'pgrep -c {program}\'"
183 _, stdout, _ = exec_cmd(node, pgrep_cmd, timeout=cmd_timeout,
186 logger.trace(f"{program} is not running on {host}")
188 exec_cmd(node, f"{shell_cmd} \'pkill {program}\'",
189 timeout=cmd_timeout, sudo=True)
190 for attempt in range(5):
191 _, stdout, _ = exec_cmd(node, pgrep_cmd, timeout=cmd_timeout,
194 logger.trace(f"Attempt {attempt}: {program} is dead on {host}")
197 logger.trace(f"SIGKILLing {program} on {host}")
198 exec_cmd(node, f"{shell_cmd} \'pkill -9 {program}\'",
199 timeout=cmd_timeout, sudo=True)
202 def verify_program_installed(node, program):
203 """Verify that program is installed on the specified topology node.
205 :param node: Topology node.
206 :param program: Program name.
210 cmd = f"command -v {program}"
211 exec_cmd_no_error(node, cmd, message=f"{program} is not installed")
214 def get_pid(node, process, retries=3):
215 """Get PID of running process.
217 :param node: DUT node.
218 :param process: process name.
219 :param retries: How many times to retry on failure.
225 :raises RuntimeError: If it is not possible to get the PID.
227 cmd = f"pidof {process}"
228 stdout, _ = exec_cmd_no_error(
229 node, cmd, retries=retries,
230 message=f"No {process} PID found on node {node[u'host']}")
231 pid_list = stdout.split()
232 return [int(pid) for pid in pid_list]
235 def get_vpp_pids(nodes):
236 """Get PID of running VPP process on all DUTs.
238 :param nodes: DUT nodes.
244 for node in nodes.values():
245 if node[u"type"] == NodeType.DUT:
246 pids[node[u"host"]] = DUTSetup.get_pid(node, u"vpp")
250 def crypto_device_verify(node, crypto_type, numvfs, force_init=False):
251 """Verify if Crypto QAT device virtual functions are initialized on all
252 DUTs. If parameter force initialization is set to True, then try to
253 initialize or remove VFs on QAT.
255 :param node: DUT node.
256 :crypto_type: Crypto device type - HW_DH895xcc, HW_C3xxx, HW_C4xxx
258 :param numvfs: Number of VFs to initialize, 0 - disable the VFs.
259 :param force_init: If True then try to initialize to specific value.
261 :type crypto_type: string
263 :type force_init: bool
265 :raises RuntimeError: If QAT VFs are not created and force init is set
268 pci_addr = Topology.get_cryptodev(node)
269 sriov_numvfs = DUTSetup.get_sriov_numvfs(node, pci_addr)
271 if sriov_numvfs != numvfs:
273 # QAT is not initialized and we want to initialize with numvfs
274 DUTSetup.crypto_device_init(node, crypto_type, numvfs)
277 f"QAT device failed to create VFs on {node[u'host']}"
281 def crypto_device_init(node, crypto_type, numvfs):
282 """Init Crypto QAT device virtual functions on DUT.
284 :param node: DUT node.
285 :crypto_type: Crypto device type - HW_DH895xcc, HW_C3xxx, HW_C4xxx
287 :param numvfs: Number of VFs to initialize, 0 - disable the VFs.
289 :type crypto_type: string
292 :raises RuntimeError: If failed to stop VPP or QAT failed to initialize.
294 if crypto_type == u"HW_DH895xcc":
295 kernel_mod = u"qat_dh895xcc"
296 kernel_drv = u"dh895xcc"
297 elif crypto_type == u"HW_C3xxx":
298 kernel_mod = u"qat_c3xxx"
299 kernel_drv = u"c3xxx"
300 elif crypto_type == u"HW_C4xxx":
301 kernel_mod = u"qat_c4xxx"
302 kernel_drv = u"c4xxx"
303 elif crypto_type == u"HW_4xxx":
304 kernel_mod = u"qat_4xxx"
308 f"Unsupported crypto device type on {node[u'host']}"
311 pci_addr = Topology.get_cryptodev(node)
313 # QAT device must be re-bound to kernel driver before initialization.
314 DUTSetup.verify_kernel_module(node, kernel_mod, force_load=True)
316 # Stop VPP to prevent deadlock.
317 DUTSetup.stop_service(node, Constants.VPP_UNIT)
319 current_driver = DUTSetup.get_pci_dev_driver(
320 node, pci_addr.replace(u":", r"\:")
322 if current_driver is not None:
323 DUTSetup.pci_driver_unbind(node, pci_addr)
325 # Bind to kernel driver.
326 DUTSetup.pci_driver_bind(node, pci_addr, kernel_drv)
328 # Initialize QAT VFs.
330 DUTSetup.set_sriov_numvfs(node, pci_addr, numvfs)
333 def get_virtfn_pci_addr(node, pf_pci_addr, vf_id):
334 """Get PCI address of Virtual Function.
336 :param node: DUT node.
337 :param pf_pci_addr: Physical Function PCI address.
338 :param vf_id: Virtual Function number.
340 :type pf_pci_addr: str
342 :returns: Virtual Function PCI address.
344 :raises RuntimeError: If failed to get Virtual Function PCI address.
346 command = f"sh -c \"basename $(readlink " \
347 f"/sys/bus/pci/devices/{pf_pci_addr}/virtfn{vf_id})\""
348 message = u"Failed to get virtual function PCI address."
350 stdout, _ = exec_cmd_no_error(
351 node, command, timeout=30, sudo=True, message=message
354 return stdout.strip()
357 def get_sriov_numvfs(node, pf_pci_addr):
358 """Get number of SR-IOV VFs.
360 :param node: DUT node.
361 :param pf_pci_addr: Physical Function PCI device address.
363 :type pf_pci_addr: str
364 :returns: Number of VFs.
366 :raises RuntimeError: If PCI device is not SR-IOV capable.
368 pci = pf_pci_addr.replace(u":", r"\:")
369 command = f"cat /sys/bus/pci/devices/{pci}/sriov_numvfs"
370 message = f"PCI device {pf_pci_addr} is not a SR-IOV device."
373 stdout, _ = exec_cmd_no_error(
374 node, command, timeout=30, sudo=True, message=message
377 sriov_numvfs = int(stdout)
380 f"Reading sriov_numvfs info failed on {node[u'host']}"
386 def set_sriov_numvfs(node, pf_pci_addr, numvfs=0):
387 """Init or reset SR-IOV virtual functions by setting its number on PCI
388 device on DUT. Setting to zero removes all VFs.
390 :param node: DUT node.
391 :param pf_pci_addr: Physical Function PCI device address.
392 :param numvfs: Number of VFs to initialize, 0 - removes the VFs.
394 :type pf_pci_addr: str
396 :raises RuntimeError: Failed to create VFs on PCI.
398 cmd = f"test -f /sys/bus/pci/devices/{pf_pci_addr}/sriov_numvfs"
399 sriov_unsupported, _, _ = exec_cmd(node, cmd)
400 # if sriov_numvfs doesn't exist, then sriov_unsupported != 0
401 if int(sriov_unsupported):
403 # sriov is not supported and we want 0 VFs
404 # no need to do anything
408 f"Can't configure {numvfs} VFs on {pf_pci_addr} device "
409 f"on {node[u'host']} since it doesn't support SR-IOV."
412 pci = pf_pci_addr.replace(u":", r"\:")
413 command = f"sh -c \"echo {numvfs} | " \
414 f"tee /sys/bus/pci/devices/{pci}/sriov_numvfs\""
415 message = f"Failed to create {numvfs} VFs on {pf_pci_addr} device " \
416 f"on {node[u'host']}"
419 node, command, timeout=120, sudo=True, message=message
423 def pci_driver_unbind(node, pci_addr):
424 """Unbind PCI device from current driver on node.
426 :param node: DUT node.
427 :param pci_addr: PCI device address.
430 :raises RuntimeError: If PCI device unbind failed.
432 pci = pci_addr.replace(u":", r"\:")
433 command = f"sh -c \"echo {pci_addr} | " \
434 f"tee /sys/bus/pci/devices/{pci}/driver/unbind\""
435 message = f"Failed to unbind PCI device {pci_addr} on {node[u'host']}"
438 node, command, timeout=120, sudo=True, message=message
442 def unbind_pci_devices_from_other_driver(node, driver, *pci_addrs):
443 """Unbind PCI devices from driver other than input driver on node.
445 :param node: DUT node.
446 :param driver: Driver to not unbind from. If None or empty string,
447 will attempt to unbind from the current driver.
448 :param pci_addrs: PCI device addresses.
451 :type pci_addrs: list
453 for pci_addr in pci_addrs:
454 cur_driver = DUTSetup.get_pci_dev_driver(node, pci_addr)
457 if not driver or cur_driver != driver:
458 DUTSetup.pci_driver_unbind(node, pci_addr)
461 def pci_driver_bind(node, pci_addr, driver):
462 """Bind PCI device to driver on node.
464 :param node: DUT node.
465 :param pci_addr: PCI device address.
466 :param driver: Driver to bind.
470 :raises RuntimeError: If PCI device bind failed.
472 message = f"Failed to bind PCI device {pci_addr} to {driver} " \
473 f"on host {node[u'host']}"
474 pci = pci_addr.replace(u":", r"\:")
475 command = f"sh -c \"echo {driver} | " \
476 f"tee /sys/bus/pci/devices/{pci}/driver_override\""
479 node, command, timeout=120, sudo=True, message=message
482 command = f"sh -c \"echo {pci_addr} | " \
483 f"tee /sys/bus/pci/drivers/{driver}/bind\""
486 node, command, timeout=120, sudo=True, message=message
489 command = f"sh -c \"echo | " \
490 f"tee /sys/bus/pci/devices/{pci}/driver_override\""
493 node, command, timeout=120, sudo=True, message=message
497 def pci_vf_driver_unbind(node, pf_pci_addr, vf_id):
498 """Unbind Virtual Function from driver on node.
500 :param node: DUT node.
501 :param pf_pci_addr: PCI device address.
502 :param vf_id: Virtual Function ID.
504 :type pf_pci_addr: str
506 :raises RuntimeError: If Virtual Function unbind failed.
508 vf_pci_addr = DUTSetup.get_virtfn_pci_addr(node, pf_pci_addr, vf_id)
509 pf_pci = pf_pci_addr.replace(u":", r"\:")
510 vf_path = f"/sys/bus/pci/devices/{pf_pci}/virtfn{vf_id}"
512 command = f"sh -c \"echo {vf_pci_addr} | tee {vf_path}/driver/unbind\""
513 message = f"Failed to unbind VF {vf_pci_addr} on {node[u'host']}"
516 node, command, timeout=120, sudo=True, message=message
520 def pci_vf_driver_bind(node, pf_pci_addr, vf_id, driver):
521 """Bind Virtual Function to driver on node.
523 :param node: DUT node.
524 :param pf_pci_addr: PCI device address.
525 :param vf_id: Virtual Function ID.
526 :param driver: Driver to bind.
528 :type pf_pci_addr: str
531 :raises RuntimeError: If PCI device bind failed.
533 vf_pci_addr = DUTSetup.get_virtfn_pci_addr(node, pf_pci_addr, vf_id)
534 pf_pci = pf_pci_addr.replace(u":", r'\:')
535 vf_path = f"/sys/bus/pci/devices/{pf_pci}/virtfn{vf_id}"
537 message = f"Failed to bind VF {vf_pci_addr} to {driver} " \
538 f"on {node[u'host']}"
539 command = f"sh -c \"echo {driver} | tee {vf_path}/driver_override\""
542 node, command, timeout=120, sudo=True, message=message
545 command = f"sh -c \"echo {vf_pci_addr} | " \
546 f"tee /sys/bus/pci/drivers/{driver}/bind\""
549 node, command, timeout=120, sudo=True, message=message
552 command = f"sh -c \"echo | tee {vf_path}/driver_override\""
555 node, command, timeout=120, sudo=True, message=message
559 def get_pci_dev_driver(node, pci_addr):
560 """Get current PCI device driver on node.
562 :param node: DUT node.
563 :param pci_addr: PCI device address.
566 :returns: Driver or None
567 :raises RuntimeError: If it is not possible to get the interface driver
568 information from the node.
570 driver_path = f"/sys/bus/pci/devices/{pci_addr}/driver"
571 cmd = f"test -d {driver_path}"
572 ret_code, ret_val, _ = exec_cmd(node, cmd)
574 # the directory doesn't exist which means the device is not bound
577 cmd = f"basename $(readlink -f {driver_path})"
578 ret_val, _ = exec_cmd_no_error(node, cmd)
579 return ret_val.strip()
582 def verify_kernel_module(node, module, force_load=False):
583 """Verify if kernel module is loaded on node. If parameter force
584 load is set to True, then try to load the modules.
587 :param module: Module to verify.
588 :param force_load: If True then try to load module.
591 :type force_load: bool
592 :raises RuntimeError: If module is not loaded or failed to load.
594 command = f"grep -w {module} /proc/modules"
595 message = f"Kernel module {module} is not loaded " \
596 f"on host {node[u'host']}"
600 node, command, timeout=30, sudo=False, message=message
604 # Module is not loaded and we want to load it
605 DUTSetup.load_kernel_module(node, module)
610 def verify_kernel_module_on_all_duts(nodes, module, force_load=False):
611 """Verify if kernel module is loaded on all DUTs. If parameter force
612 load is set to True, then try to load the modules.
614 :param nodes: DUT nodes.
615 :param module: Module to verify.
616 :param force_load: If True then try to load module.
619 :type force_load: bool
621 for node in nodes.values():
622 if node[u"type"] == NodeType.DUT:
623 DUTSetup.verify_kernel_module(node, module, force_load)
626 def verify_uio_driver_on_all_duts(nodes):
627 """Verify if uio driver kernel module is loaded on all DUTs. If module
628 is not present it will try to load it.
630 :param nodes: DUT nodes.
633 for node in nodes.values():
634 if node[u"type"] == NodeType.DUT:
635 uio_driver = Topology.get_uio_driver(node)
636 DUTSetup.verify_kernel_module(node, uio_driver, force_load=True)
639 def load_kernel_module(node, module):
640 """Load kernel module on node.
642 :param node: DUT node.
643 :param module: Module to load.
647 :raises RuntimeError: If loading failed.
649 command = f"modprobe {module}"
650 message = f"Failed to load {module} on host {node[u'host']}"
652 exec_cmd_no_error(node, command, timeout=30, sudo=True, message=message)
655 def running_in_container(node):
656 """This method tests if topology node is running inside container.
658 :param node: Topology node.
660 :returns: True if running in docker container, false if not or failed
664 command = "cat /.dockerenv"
666 exec_cmd_no_error(node, command, timeout=30)
672 def get_docker_mergeddir(node, uuid=None):
673 """Get Docker overlay for MergedDir diff.
675 :param node: DUT node.
676 :param uuid: Docker UUID.
679 :returns: Docker container MergedDir.
681 :raises RuntimeError: If getting output failed.
684 command = 'fgrep "hostname" /proc/self/mountinfo | cut -f 4 -d" "'
685 message = "Failed to get UUID!"
686 stdout, _ = exec_cmd_no_error(node, command, message=message)
687 uuid = stdout.split(sep="/")[-2]
690 f"--format='{{{{.GraphDriver.Data.MergedDir}}}}' {uuid}"
692 message = f"Failed to get directory of {uuid} on host {node[u'host']}"
694 stdout, _ = exec_cmd_no_error(node, command, sudo=True, message=message)
695 return stdout.strip()
698 def get_hugepages_info(node, hugesize=None):
699 """Get number of huge pages in system.
701 :param node: Node in the topology.
702 :param hugesize: Size of hugepages. Default system huge size if None.
705 :returns: Number of huge pages in system.
707 :raises RuntimeError: If reading failed.
710 hugesize = "$(grep Hugepagesize /proc/meminfo | awk '{ print $2 }')"
711 command = f"cat /sys/kernel/mm/hugepages/hugepages-{hugesize}kB/*"
712 stdout, _ = exec_cmd_no_error(node, command)
714 line = stdout.splitlines()
716 "free_hugepages": int(line[0]),
717 "nr_hugepages": int(line[1]),
718 "nr_hugepages_mempolicy": int(line[2]),
719 "nr_overcommit_hugepages": int(line[3]),
720 "resv_hugepages": int(line[4]),
721 "surplus_hugepages": int(line[5])
724 logger.trace(u"Reading huge pages information failed!")
728 node, huge_mnt, mem_size, hugesize=2048, allocate=False):
729 """Check if there is enough HugePages in system. If allocate is set to
730 true, try to allocate more HugePages.
732 :param node: Node in the topology.
733 :param huge_mnt: HugePage mount point.
734 :param mem_size: Reqeusted memory in MB.
735 :param hugesize: HugePage size in KB.
736 :param allocate: Whether to allocate more memory if not enough.
742 :raises RuntimeError: Mounting hugetlbfs failed or not enough HugePages
743 or increasing map count failed.
745 # Get huge pages information.
746 hugepages = DUTSetup.get_hugepages_info(node, hugesize=hugesize)
748 # Check if hugepages requested are available on node.
749 if hugepages[u"nr_overcommit_hugepages"]:
750 # If overcommit is used, we need to know how many additional pages
752 huge_available = hugepages[u"nr_overcommit_hugepages"] - \
753 hugepages[u"surplus_hugepages"]
755 # Fallbacking to free_hugepages which were used before to detect.
756 huge_available = hugepages[u"free_hugepages"]
758 if ((mem_size * 1024) // hugesize) > huge_available:
759 # If we want to allocate hugepage dynamically.
761 huge_needed = ((mem_size * 1024) // hugesize) - huge_available
762 huge_to_allocate = huge_needed + hugepages[u"nr_hugepages"]
763 max_map_count = huge_to_allocate * 4
764 # Check if huge pages mount point exist.
766 exec_cmd_no_error(node, u"fgrep 'hugetlbfs' /proc/mounts")
768 exec_cmd_no_error(node, f"mkdir -p {huge_mnt}", sudo=True)
771 f"mount -t hugetlbfs -o pagesize={hugesize}k none "
774 # Increase maximum number of memory map areas for process.
777 f"echo \"{max_map_count}\" | "
778 f"sudo tee /proc/sys/vm/max_map_count",
779 message=f"Increase map count failed on {node[u'host']}!"
781 # Increase hugepage count.
784 f"echo \"{huge_to_allocate}\" | "
785 f"sudo tee /proc/sys/vm/nr_hugepages",
786 message=f"Mount huge pages failed on {node[u'host']}!"
788 # If we do not want to allocate dynamically end with error.
791 f"Not enough availablehuge pages: {huge_available}!"