1 # Copyright (c) 2023 Cisco and/or its affiliates.
2 # Licensed under the Apache License, Version 2.0 (the "License");
3 # you may not use this file except in compliance with the License.
4 # You may obtain a copy of the License at:
6 # http://www.apache.org/licenses/LICENSE-2.0
8 # Unless required by applicable law or agreed to in writing, software
9 # distributed under the License is distributed on an "AS IS" BASIS,
10 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 # See the License for the specific language governing permissions and
12 # limitations under the License.
14 """DUT setup library."""
16 from time import sleep
17 from robot.api import logger
19 from resources.libraries.python.ssh import exec_cmd, exec_cmd_no_error
20 from resources.libraries.python.topology import NodeType, Topology
24 """Contains methods for setting up DUTs."""
27 def get_service_logs(node, service):
28 """Get specific service unit logs from node.
30 :param node: Node in the topology.
31 :param service: Service unit name.
35 if DUTSetup.running_in_container(node):
38 f"journalctl --no-pager _SYSTEMD_INVOCATION_ID=$(systemctl "
39 f"show -p InvocationID --value {service})"
41 message = f"Node {node[u'host']} failed to get logs from unit {service}"
44 node, command, timeout=30, sudo=True, message=message
48 def get_service_logs_on_all_duts(nodes, service):
49 """Get specific service unit logs from all DUTs.
51 :param nodes: Nodes in the topology.
52 :param service: Service unit name.
56 for node in nodes.values():
57 if node[u"type"] == NodeType.DUT:
58 DUTSetup.get_service_logs(node, service)
61 def restart_service(node, service):
62 """Restart the named service on node.
64 :param node: Node in the topology.
65 :param service: Service unit name.
69 if DUTSetup.running_in_container(node):
70 command = f"supervisorctl restart {service}"
72 command = f"systemctl restart {service}"
73 message = f"Node {node[u'host']} failed to restart service {service}"
76 node, command, timeout=180, sudo=True, message=message
79 DUTSetup.get_service_logs(node, service)
82 def restart_service_on_all_duts(nodes, service):
83 """Restart the named service on all DUTs.
85 :param nodes: Nodes in the topology.
86 :param service: Service unit name.
90 for node in nodes.values():
91 if node[u"type"] == NodeType.DUT:
92 DUTSetup.restart_service(node, service)
95 def start_service(node, service):
96 """Start up the named service on node.
98 :param node: Node in the topology.
99 :param service: Service unit name.
103 if DUTSetup.running_in_container(node):
104 command = f"supervisorctl restart {service}"
106 command = f"systemctl restart {service}"
107 message = f"Node {node[u'host']} failed to start service {service}"
110 node, command, timeout=180, sudo=True, message=message
113 DUTSetup.get_service_logs(node, service)
116 def start_service_on_all_duts(nodes, service):
117 """Start up the named service on all DUTs.
119 :param nodes: Nodes in the topology.
120 :param service: Service unit name.
124 for node in nodes.values():
125 if node[u"type"] == NodeType.DUT:
126 DUTSetup.start_service(node, service)
129 def stop_service(node, service):
130 """Stop the named service on node.
132 :param node: Node in the topology.
133 :param service: Service unit name.
137 DUTSetup.get_service_logs(node, service)
139 if DUTSetup.running_in_container(node):
140 command = f"supervisorctl stop {service}"
142 command = f"systemctl stop {service}"
143 message = f"Node {node[u'host']} failed to stop service {service}"
146 node, command, timeout=180, sudo=True, message=message
150 def stop_service_on_all_duts(nodes, service):
151 """Stop the named service on all DUTs.
153 :param nodes: Nodes in the topology.
154 :param service: Service unit name.
158 for node in nodes.values():
159 if node[u"type"] == NodeType.DUT:
160 DUTSetup.stop_service(node, service)
163 def kill_program(node, program, namespace=None):
164 """Kill program on the specified topology node.
166 :param node: Topology node.
167 :param program: Program name.
168 :param namespace: Namespace program is running in.
175 if namespace in (None, u"default"):
178 shell_cmd = f"ip netns exec {namespace} sh -c"
180 pgrep_cmd = f"{shell_cmd} \'pgrep -c {program}\'"
181 _, stdout, _ = exec_cmd(node, pgrep_cmd, timeout=cmd_timeout,
184 logger.trace(f"{program} is not running on {host}")
186 exec_cmd(node, f"{shell_cmd} \'pkill {program}\'",
187 timeout=cmd_timeout, sudo=True)
188 for attempt in range(5):
189 _, stdout, _ = exec_cmd(node, pgrep_cmd, timeout=cmd_timeout,
192 logger.trace(f"Attempt {attempt}: {program} is dead on {host}")
195 logger.trace(f"SIGKILLing {program} on {host}")
196 exec_cmd(node, f"{shell_cmd} \'pkill -9 {program}\'",
197 timeout=cmd_timeout, sudo=True)
200 def verify_program_installed(node, program):
201 """Verify that program is installed on the specified topology node.
203 :param node: Topology node.
204 :param program: Program name.
208 cmd = f"command -v {program}"
209 exec_cmd_no_error(node, cmd, message=f"{program} is not installed")
212 def get_pid(node, process, retries=3):
213 """Get PID of running process.
215 :param node: DUT node.
216 :param process: process name.
217 :param retries: How many times to retry on failure.
223 :raises RuntimeError: If it is not possible to get the PID.
225 cmd = f"pidof {process}"
226 stdout, _ = exec_cmd_no_error(
227 node, cmd, retries=retries,
228 message=f"No {process} PID found on node {node[u'host']}")
229 pid_list = stdout.split()
230 return [int(pid) for pid in pid_list]
233 def get_vpp_pids(nodes):
234 """Get PID of running VPP process on all DUTs.
236 :param nodes: DUT nodes.
242 for node in nodes.values():
243 if node[u"type"] == NodeType.DUT:
244 pids[node[u"host"]] = DUTSetup.get_pid(node, u"vpp")
248 def get_virtfn_pci_addr(node, pf_pci_addr, vf_id):
249 """Get PCI address of Virtual Function.
251 :param node: DUT node.
252 :param pf_pci_addr: Physical Function PCI address.
253 :param vf_id: Virtual Function number.
255 :type pf_pci_addr: str
257 :returns: Virtual Function PCI address.
259 :raises RuntimeError: If failed to get Virtual Function PCI address.
261 command = f"sh -c \"basename $(readlink " \
262 f"/sys/bus/pci/devices/{pf_pci_addr}/virtfn{vf_id})\""
263 message = u"Failed to get virtual function PCI address."
265 stdout, _ = exec_cmd_no_error(
266 node, command, timeout=30, sudo=True, message=message
269 return stdout.strip()
272 def get_sriov_numvfs(node, pf_pci_addr):
273 """Get number of SR-IOV VFs.
275 :param node: DUT node.
276 :param pf_pci_addr: Physical Function PCI device address.
278 :type pf_pci_addr: str
279 :returns: Number of VFs.
281 :raises RuntimeError: If PCI device is not SR-IOV capable.
283 pci = pf_pci_addr.replace(u":", r"\:")
284 command = f"cat /sys/bus/pci/devices/{pci}/sriov_numvfs"
285 message = f"PCI device {pf_pci_addr} is not a SR-IOV device."
288 stdout, _ = exec_cmd_no_error(
289 node, command, timeout=30, sudo=True, message=message
292 sriov_numvfs = int(stdout)
295 f"Reading sriov_numvfs info failed on {node[u'host']}"
301 def set_sriov_numvfs(node, pf_pci_addr, path="devices", numvfs=0):
302 """Init or reset SR-IOV virtual functions by setting its number on PCI
303 device on DUT. Setting to zero removes all VFs.
305 :param node: DUT node.
306 :param pf_pci_addr: Physical Function PCI device address.
307 :param path: Either device or driver.
308 :param numvfs: Number of VFs to initialize, 0 - removes the VFs.
310 :type pf_pci_addr: str
313 :raises RuntimeError: Failed to create VFs on PCI.
315 cmd = f"test -f /sys/bus/pci/{path}/{pf_pci_addr}/sriov_numvfs"
316 sriov_unsupported, _, _ = exec_cmd(node, cmd)
317 # if sriov_numvfs doesn't exist, then sriov_unsupported != 0
318 if int(sriov_unsupported):
320 # sriov is not supported and we want 0 VFs
321 # no need to do anything
325 f"Can't configure {numvfs} VFs on {pf_pci_addr} device "
326 f"on {node[u'host']} since it doesn't support SR-IOV."
329 pci = pf_pci_addr.replace(u":", r"\:")
330 command = f"sh -c \"echo {numvfs} | " \
331 f"tee /sys/bus/pci/{path}/{pci}/sriov_numvfs\""
332 message = f"Failed to create {numvfs} VFs on {pf_pci_addr} device " \
333 f"on {node[u'host']}"
336 node, command, timeout=120, sudo=True, message=message
340 def pci_driver_unbind(node, pci_addr):
341 """Unbind PCI device from current driver on node.
343 :param node: DUT node.
344 :param pci_addr: PCI device address.
347 :raises RuntimeError: If PCI device unbind failed.
349 pci = pci_addr.replace(u":", r"\:")
350 command = f"sh -c \"echo {pci_addr} | " \
351 f"tee /sys/bus/pci/devices/{pci}/driver/unbind\""
352 message = f"Failed to unbind PCI device {pci_addr} on {node[u'host']}"
355 node, command, timeout=120, sudo=True, message=message
359 def unbind_pci_devices_from_other_driver(node, driver, *pci_addrs):
360 """Unbind PCI devices from driver other than input driver on node.
362 :param node: DUT node.
363 :param driver: Driver to not unbind from. If None or empty string,
364 will attempt to unbind from the current driver.
365 :param pci_addrs: PCI device addresses.
368 :type pci_addrs: list
370 for pci_addr in pci_addrs:
371 cur_driver = DUTSetup.get_pci_dev_driver(node, pci_addr)
374 if not driver or cur_driver != driver:
375 DUTSetup.pci_driver_unbind(node, pci_addr)
378 def pci_driver_bind(node, pci_addr, driver):
379 """Bind PCI device to driver on node.
381 :param node: DUT node.
382 :param pci_addr: PCI device address.
383 :param driver: Driver to bind.
387 :raises RuntimeError: If PCI device bind failed.
389 message = f"Failed to bind PCI device {pci_addr} to {driver} " \
390 f"on host {node[u'host']}"
391 pci = pci_addr.replace(u":", r"\:")
392 command = f"sh -c \"echo {driver} | " \
393 f"tee /sys/bus/pci/devices/{pci}/driver_override\""
396 node, command, timeout=120, sudo=True, message=message
399 command = f"sh -c \"echo {pci_addr} | " \
400 f"tee /sys/bus/pci/drivers/{driver}/bind\""
403 node, command, timeout=120, sudo=True, message=message
406 command = f"sh -c \"echo | " \
407 f"tee /sys/bus/pci/devices/{pci}/driver_override\""
410 node, command, timeout=120, sudo=True, message=message
414 def pci_vf_driver_unbind(node, pf_pci_addr, vf_id):
415 """Unbind Virtual Function from driver on node.
417 :param node: DUT node.
418 :param pf_pci_addr: PCI device address.
419 :param vf_id: Virtual Function ID.
421 :type pf_pci_addr: str
423 :raises RuntimeError: If Virtual Function unbind failed.
425 vf_pci_addr = DUTSetup.get_virtfn_pci_addr(node, pf_pci_addr, vf_id)
426 pf_pci = pf_pci_addr.replace(u":", r"\:")
427 vf_path = f"/sys/bus/pci/devices/{pf_pci}/virtfn{vf_id}"
429 command = f"sh -c \"echo {vf_pci_addr} | tee {vf_path}/driver/unbind\""
430 message = f"Failed to unbind VF {vf_pci_addr} on {node[u'host']}"
433 node, command, timeout=120, sudo=True, message=message
437 def pci_vf_driver_bind(node, pf_pci_addr, vf_id, driver):
438 """Bind Virtual Function to driver on node.
440 :param node: DUT node.
441 :param pf_pci_addr: PCI device address.
442 :param vf_id: Virtual Function ID.
443 :param driver: Driver to bind.
445 :type pf_pci_addr: str
448 :raises RuntimeError: If PCI device bind failed.
450 vf_pci_addr = DUTSetup.get_virtfn_pci_addr(node, pf_pci_addr, vf_id)
451 pf_pci = pf_pci_addr.replace(u":", r'\:')
452 vf_path = f"/sys/bus/pci/devices/{pf_pci}/virtfn{vf_id}"
454 message = f"Failed to bind VF {vf_pci_addr} to {driver} " \
455 f"on {node[u'host']}"
456 command = f"sh -c \"echo {driver} | tee {vf_path}/driver_override\""
459 node, command, timeout=120, sudo=True, message=message
462 command = f"sh -c \"echo {vf_pci_addr} | " \
463 f"tee /sys/bus/pci/drivers/{driver}/bind\""
466 node, command, timeout=120, sudo=True, message=message
469 command = f"sh -c \"echo | tee {vf_path}/driver_override\""
472 node, command, timeout=120, sudo=True, message=message
476 def get_pci_dev_driver(node, pci_addr):
477 """Get current PCI device driver on node.
479 :param node: DUT node.
480 :param pci_addr: PCI device address.
483 :returns: Driver or None
484 :raises RuntimeError: If it is not possible to get the interface driver
485 information from the node.
487 driver_path = f"/sys/bus/pci/devices/{pci_addr}/driver"
488 cmd = f"test -d {driver_path}"
489 ret_code, ret_val, _ = exec_cmd(node, cmd)
491 # the directory doesn't exist which means the device is not bound
494 cmd = f"basename $(readlink -f {driver_path})"
495 ret_val, _ = exec_cmd_no_error(node, cmd)
496 return ret_val.strip()
499 def verify_kernel_module(node, module, force_load=False):
500 """Verify if kernel module is loaded on node. If parameter force
501 load is set to True, then try to load the modules.
504 :param module: Module to verify.
505 :param force_load: If True then try to load module.
508 :type force_load: bool
509 :raises RuntimeError: If module is not loaded or failed to load.
511 command = f"grep -w {module} /proc/modules"
512 message = f"Kernel module {module} is not loaded " \
513 f"on host {node[u'host']}"
517 node, command, timeout=30, sudo=False, message=message
521 # Module is not loaded and we want to load it
522 DUTSetup.load_kernel_module(node, module)
527 def verify_kernel_module_on_all_duts(nodes, module, force_load=False):
528 """Verify if kernel module is loaded on all DUTs. If parameter force
529 load is set to True, then try to load the modules.
531 :param nodes: DUT nodes.
532 :param module: Module to verify.
533 :param force_load: If True then try to load module.
536 :type force_load: bool
538 for node in nodes.values():
539 if node[u"type"] == NodeType.DUT:
540 DUTSetup.verify_kernel_module(node, module, force_load)
543 def verify_uio_driver_on_all_duts(nodes):
544 """Verify if uio driver kernel module is loaded on all DUTs. If module
545 is not present it will try to load it.
547 :param nodes: DUT nodes.
550 for node in nodes.values():
551 if node[u"type"] == NodeType.DUT:
552 uio_driver = Topology.get_uio_driver(node)
553 DUTSetup.verify_kernel_module(node, uio_driver, force_load=True)
556 def load_kernel_module(node, module):
557 """Load kernel module on node.
559 :param node: DUT node.
560 :param module: Module to load.
564 :raises RuntimeError: If loading failed.
566 command = f"modprobe {module}"
567 message = f"Failed to load {module} on host {node[u'host']}"
569 exec_cmd_no_error(node, command, timeout=30, sudo=True, message=message)
572 def running_in_container(node):
573 """This method tests if topology node is running inside container.
575 :param node: Topology node.
577 :returns: True if running in docker container, false if not or failed
581 command = "cat /.dockerenv"
583 exec_cmd_no_error(node, command, timeout=30)
589 def get_docker_mergeddir(node, uuid=None):
590 """Get Docker overlay for MergedDir diff.
592 :param node: DUT node.
593 :param uuid: Docker UUID.
596 :returns: Docker container MergedDir.
598 :raises RuntimeError: If getting output failed.
601 command = 'fgrep "hostname" /proc/self/mountinfo | cut -f 4 -d" "'
602 message = "Failed to get UUID!"
603 stdout, _ = exec_cmd_no_error(node, command, message=message)
604 uuid = stdout.split(sep="/")[-2]
607 f"--format='{{{{.GraphDriver.Data.MergedDir}}}}' {uuid}"
609 message = f"Failed to get directory of {uuid} on host {node[u'host']}"
611 stdout, _ = exec_cmd_no_error(node, command, sudo=True, message=message)
612 return stdout.strip()
615 def get_hugepages_info(node, hugesize=None):
616 """Get number of huge pages in system.
618 :param node: Node in the topology.
619 :param hugesize: Size of hugepages. Default system huge size if None.
622 :returns: Number of huge pages in system.
624 :raises RuntimeError: If reading failed.
627 hugesize = "$(grep Hugepagesize /proc/meminfo | awk '{ print $2 }')"
628 command = f"cat /sys/kernel/mm/hugepages/hugepages-{hugesize}kB/*"
629 stdout, _ = exec_cmd_no_error(node, command)
631 line = stdout.splitlines()
633 "free_hugepages": int(line[0]),
634 "nr_hugepages": int(line[1]),
635 "nr_hugepages_mempolicy": int(line[2]),
636 "nr_overcommit_hugepages": int(line[3]),
637 "resv_hugepages": int(line[4]),
638 "surplus_hugepages": int(line[5])
641 logger.trace(u"Reading huge pages information failed!")
645 node, huge_mnt, mem_size, hugesize=2048, allocate=False):
646 """Check if there is enough HugePages in system. If allocate is set to
647 true, try to allocate more HugePages.
649 :param node: Node in the topology.
650 :param huge_mnt: HugePage mount point.
651 :param mem_size: Reqeusted memory in MB.
652 :param hugesize: HugePage size in KB.
653 :param allocate: Whether to allocate more memory if not enough.
659 :raises RuntimeError: Mounting hugetlbfs failed or not enough HugePages
660 or increasing map count failed.
662 # Get huge pages information.
663 hugepages = DUTSetup.get_hugepages_info(node, hugesize=hugesize)
665 # Check if hugepages requested are available on node.
666 if hugepages[u"nr_overcommit_hugepages"]:
667 # If overcommit is used, we need to know how many additional pages
669 huge_available = hugepages[u"nr_overcommit_hugepages"] - \
670 hugepages[u"surplus_hugepages"]
672 # Fallbacking to free_hugepages which were used before to detect.
673 huge_available = hugepages[u"free_hugepages"]
675 if ((mem_size * 1024) // hugesize) > huge_available:
676 # If we want to allocate hugepage dynamically.
678 huge_needed = ((mem_size * 1024) // hugesize) - huge_available
679 huge_to_allocate = huge_needed + hugepages[u"nr_hugepages"]
680 max_map_count = huge_to_allocate * 4
681 # Check if huge pages mount point exist.
683 exec_cmd_no_error(node, u"fgrep 'hugetlbfs' /proc/mounts")
685 exec_cmd_no_error(node, f"mkdir -p {huge_mnt}", sudo=True)
688 f"mount -t hugetlbfs -o pagesize={hugesize}k none "
691 # Increase maximum number of memory map areas for process.
694 f"echo \"{max_map_count}\" | "
695 f"sudo tee /proc/sys/vm/max_map_count",
696 message=f"Increase map count failed on {node[u'host']}!"
698 # Increase hugepage count.
701 f"echo \"{huge_to_allocate}\" | "
702 f"sudo tee /proc/sys/vm/nr_hugepages",
703 message=f"Mount huge pages failed on {node[u'host']}!"
705 # If we do not want to allocate dynamically end with error.
708 f"Not enough availablehuge pages: {huge_available}!"