1 # Copyright (c) 2023 Cisco and/or its affiliates.
2 # Licensed under the Apache License, Version 2.0 (the "License");
3 # you may not use this file except in compliance with the License.
4 # You may obtain a copy of the License at:
6 # http://www.apache.org/licenses/LICENSE-2.0
8 # Unless required by applicable law or agreed to in writing, software
9 # distributed under the License is distributed on an "AS IS" BASIS,
10 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 # See the License for the specific language governing permissions and
12 # limitations under the License.
14 """DUT setup library."""
16 from time import sleep
17 from robot.api import logger
19 from resources.libraries.python.Constants import Constants
20 from resources.libraries.python.ssh import exec_cmd, exec_cmd_no_error
21 from resources.libraries.python.topology import NodeType, Topology
25 """Contains methods for setting up DUTs."""
28 def get_service_logs(node, service):
29 """Get specific service unit logs from node.
31 :param node: Node in the topology.
32 :param service: Service unit name.
36 if DUTSetup.running_in_container(node):
40 f"journalctl --no-pager _SYSTEMD_INVOCATION_ID=$(systemctl "
41 f"show -p InvocationID --value {service})"
43 message = f"Node {node[u'host']} failed to get logs from unit {service}"
46 node, command, timeout=30, sudo=True, message=message
50 def get_service_logs_on_all_duts(nodes, service):
51 """Get specific service unit logs from all DUTs.
53 :param nodes: Nodes in the topology.
54 :param service: Service unit name.
58 for node in nodes.values():
59 if node[u"type"] == NodeType.DUT:
60 DUTSetup.get_service_logs(node, service)
63 def restart_service(node, service):
64 """Restart the named service on node.
66 :param node: Node in the topology.
67 :param service: Service unit name.
71 if DUTSetup.running_in_container(node):
72 command = f"supervisorctl restart {service}"
74 command = f"systemctl restart {service}"
75 message = f"Node {node[u'host']} failed to restart service {service}"
78 node, command, timeout=180, sudo=True, message=message
81 DUTSetup.get_service_logs(node, service)
84 def restart_service_on_all_duts(nodes, service):
85 """Restart the named service on all DUTs.
87 :param nodes: Nodes in the topology.
88 :param service: Service unit name.
92 for node in nodes.values():
93 if node[u"type"] == NodeType.DUT:
94 DUTSetup.restart_service(node, service)
97 def start_service(node, service):
98 """Start up the named service on node.
100 :param node: Node in the topology.
101 :param service: Service unit name.
105 if DUTSetup.running_in_container(node):
106 command = f"supervisorctl restart {service}"
108 command = f"systemctl restart {service}"
109 message = f"Node {node[u'host']} failed to start service {service}"
112 node, command, timeout=180, sudo=True, message=message
115 DUTSetup.get_service_logs(node, service)
118 def start_service_on_all_duts(nodes, service):
119 """Start up the named service on all DUTs.
121 :param nodes: Nodes in the topology.
122 :param service: Service unit name.
126 for node in nodes.values():
127 if node[u"type"] == NodeType.DUT:
128 DUTSetup.start_service(node, service)
131 def stop_service(node, service):
132 """Stop the named service on node.
134 :param node: Node in the topology.
135 :param service: Service unit name.
139 DUTSetup.get_service_logs(node, service)
141 if DUTSetup.running_in_container(node):
142 command = f"supervisorctl stop {service}"
144 command = f"systemctl stop {service}"
145 message = f"Node {node[u'host']} failed to stop service {service}"
148 node, command, timeout=180, sudo=True, message=message
152 def stop_service_on_all_duts(nodes, service):
153 """Stop the named service on all DUTs.
155 :param nodes: Nodes in the topology.
156 :param service: Service unit name.
160 for node in nodes.values():
161 if node[u"type"] == NodeType.DUT:
162 DUTSetup.stop_service(node, service)
165 def kill_program(node, program, namespace=None):
166 """Kill program on the specified topology node.
168 :param node: Topology node.
169 :param program: Program name.
170 :param namespace: Namespace program is running in.
177 if namespace in (None, u"default"):
180 shell_cmd = f"ip netns exec {namespace} sh -c"
182 pgrep_cmd = f"{shell_cmd} \'pgrep -c {program}\'"
183 _, stdout, _ = exec_cmd(node, pgrep_cmd, timeout=cmd_timeout,
186 logger.trace(f"{program} is not running on {host}")
188 exec_cmd(node, f"{shell_cmd} \'pkill {program}\'",
189 timeout=cmd_timeout, sudo=True)
190 for attempt in range(5):
191 _, stdout, _ = exec_cmd(node, pgrep_cmd, timeout=cmd_timeout,
194 logger.trace(f"Attempt {attempt}: {program} is dead on {host}")
197 logger.trace(f"SIGKILLing {program} on {host}")
198 exec_cmd(node, f"{shell_cmd} \'pkill -9 {program}\'",
199 timeout=cmd_timeout, sudo=True)
202 def verify_program_installed(node, program):
203 """Verify that program is installed on the specified topology node.
205 :param node: Topology node.
206 :param program: Program name.
210 cmd = f"command -v {program}"
211 exec_cmd_no_error(node, cmd, message=f"{program} is not installed")
214 def get_pid(node, process, retries=3):
215 """Get PID of running process.
217 :param node: DUT node.
218 :param process: process name.
219 :param retries: How many times to retry on failure.
225 :raises RuntimeError: If it is not possible to get the PID.
227 cmd = f"pidof {process}"
228 stdout, _ = exec_cmd_no_error(
229 node, cmd, retries=retries,
230 message=f"No {process} PID found on node {node[u'host']}")
231 pid_list = stdout.split()
232 return [int(pid) for pid in pid_list]
235 def get_vpp_pids(nodes):
236 """Get PID of running VPP process on all DUTs.
238 :param nodes: DUT nodes.
244 for node in nodes.values():
245 if node[u"type"] == NodeType.DUT:
246 pids[node[u"host"]] = DUTSetup.get_pid(node, u"vpp")
250 def get_virtfn_pci_addr(node, pf_pci_addr, vf_id):
251 """Get PCI address of Virtual Function.
253 :param node: DUT node.
254 :param pf_pci_addr: Physical Function PCI address.
255 :param vf_id: Virtual Function number.
257 :type pf_pci_addr: str
259 :returns: Virtual Function PCI address.
261 :raises RuntimeError: If failed to get Virtual Function PCI address.
263 command = f"sh -c \"basename $(readlink " \
264 f"/sys/bus/pci/devices/{pf_pci_addr}/virtfn{vf_id})\""
265 message = u"Failed to get virtual function PCI address."
267 stdout, _ = exec_cmd_no_error(
268 node, command, timeout=30, sudo=True, message=message
271 return stdout.strip()
274 def get_sriov_numvfs(node, pf_pci_addr):
275 """Get number of SR-IOV VFs.
277 :param node: DUT node.
278 :param pf_pci_addr: Physical Function PCI device address.
280 :type pf_pci_addr: str
281 :returns: Number of VFs.
283 :raises RuntimeError: If PCI device is not SR-IOV capable.
285 pci = pf_pci_addr.replace(u":", r"\:")
286 command = f"cat /sys/bus/pci/devices/{pci}/sriov_numvfs"
287 message = f"PCI device {pf_pci_addr} is not a SR-IOV device."
290 stdout, _ = exec_cmd_no_error(
291 node, command, timeout=30, sudo=True, message=message
294 sriov_numvfs = int(stdout)
297 f"Reading sriov_numvfs info failed on {node[u'host']}"
303 def set_sriov_numvfs(node, pf_pci_addr, path="devices", numvfs=0):
304 """Init or reset SR-IOV virtual functions by setting its number on PCI
305 device on DUT. Setting to zero removes all VFs.
307 :param node: DUT node.
308 :param pf_pci_addr: Physical Function PCI device address.
309 :param path: Either device or driver.
310 :param numvfs: Number of VFs to initialize, 0 - removes the VFs.
312 :type pf_pci_addr: str
315 :raises RuntimeError: Failed to create VFs on PCI.
317 cmd = f"test -f /sys/bus/pci/{path}/{pf_pci_addr}/sriov_numvfs"
318 sriov_unsupported, _, _ = exec_cmd(node, cmd)
319 # if sriov_numvfs doesn't exist, then sriov_unsupported != 0
320 if int(sriov_unsupported):
322 # sriov is not supported and we want 0 VFs
323 # no need to do anything
327 f"Can't configure {numvfs} VFs on {pf_pci_addr} device "
328 f"on {node[u'host']} since it doesn't support SR-IOV."
331 pci = pf_pci_addr.replace(u":", r"\:")
332 command = f"sh -c \"echo {numvfs} | " \
333 f"tee /sys/bus/pci/{path}/{pci}/sriov_numvfs\""
334 message = f"Failed to create {numvfs} VFs on {pf_pci_addr} device " \
335 f"on {node[u'host']}"
338 node, command, timeout=120, sudo=True, message=message
342 def pci_driver_unbind(node, pci_addr):
343 """Unbind PCI device from current driver on node.
345 :param node: DUT node.
346 :param pci_addr: PCI device address.
349 :raises RuntimeError: If PCI device unbind failed.
351 pci = pci_addr.replace(u":", r"\:")
352 command = f"sh -c \"echo {pci_addr} | " \
353 f"tee /sys/bus/pci/devices/{pci}/driver/unbind\""
354 message = f"Failed to unbind PCI device {pci_addr} on {node[u'host']}"
357 node, command, timeout=120, sudo=True, message=message
361 def unbind_pci_devices_from_other_driver(node, driver, *pci_addrs):
362 """Unbind PCI devices from driver other than input driver on node.
364 :param node: DUT node.
365 :param driver: Driver to not unbind from. If None or empty string,
366 will attempt to unbind from the current driver.
367 :param pci_addrs: PCI device addresses.
370 :type pci_addrs: list
372 for pci_addr in pci_addrs:
373 cur_driver = DUTSetup.get_pci_dev_driver(node, pci_addr)
376 if not driver or cur_driver != driver:
377 DUTSetup.pci_driver_unbind(node, pci_addr)
380 def pci_driver_bind(node, pci_addr, driver):
381 """Bind PCI device to driver on node.
383 :param node: DUT node.
384 :param pci_addr: PCI device address.
385 :param driver: Driver to bind.
389 :raises RuntimeError: If PCI device bind failed.
391 message = f"Failed to bind PCI device {pci_addr} to {driver} " \
392 f"on host {node[u'host']}"
393 pci = pci_addr.replace(u":", r"\:")
394 command = f"sh -c \"echo {driver} | " \
395 f"tee /sys/bus/pci/devices/{pci}/driver_override\""
398 node, command, timeout=120, sudo=True, message=message
401 command = f"sh -c \"echo {pci_addr} | " \
402 f"tee /sys/bus/pci/drivers/{driver}/bind\""
405 node, command, timeout=120, sudo=True, message=message
408 command = f"sh -c \"echo | " \
409 f"tee /sys/bus/pci/devices/{pci}/driver_override\""
412 node, command, timeout=120, sudo=True, message=message
416 def pci_vf_driver_unbind(node, pf_pci_addr, vf_id):
417 """Unbind Virtual Function from driver on node.
419 :param node: DUT node.
420 :param pf_pci_addr: PCI device address.
421 :param vf_id: Virtual Function ID.
423 :type pf_pci_addr: str
425 :raises RuntimeError: If Virtual Function unbind failed.
427 vf_pci_addr = DUTSetup.get_virtfn_pci_addr(node, pf_pci_addr, vf_id)
428 pf_pci = pf_pci_addr.replace(u":", r"\:")
429 vf_path = f"/sys/bus/pci/devices/{pf_pci}/virtfn{vf_id}"
431 command = f"sh -c \"echo {vf_pci_addr} | tee {vf_path}/driver/unbind\""
432 message = f"Failed to unbind VF {vf_pci_addr} on {node[u'host']}"
435 node, command, timeout=120, sudo=True, message=message
439 def pci_vf_driver_bind(node, pf_pci_addr, vf_id, driver):
440 """Bind Virtual Function to driver on node.
442 :param node: DUT node.
443 :param pf_pci_addr: PCI device address.
444 :param vf_id: Virtual Function ID.
445 :param driver: Driver to bind.
447 :type pf_pci_addr: str
450 :raises RuntimeError: If PCI device bind failed.
452 vf_pci_addr = DUTSetup.get_virtfn_pci_addr(node, pf_pci_addr, vf_id)
453 pf_pci = pf_pci_addr.replace(u":", r'\:')
454 vf_path = f"/sys/bus/pci/devices/{pf_pci}/virtfn{vf_id}"
456 message = f"Failed to bind VF {vf_pci_addr} to {driver} " \
457 f"on {node[u'host']}"
458 command = f"sh -c \"echo {driver} | tee {vf_path}/driver_override\""
461 node, command, timeout=120, sudo=True, message=message
464 command = f"sh -c \"echo {vf_pci_addr} | " \
465 f"tee /sys/bus/pci/drivers/{driver}/bind\""
468 node, command, timeout=120, sudo=True, message=message
471 command = f"sh -c \"echo | tee {vf_path}/driver_override\""
474 node, command, timeout=120, sudo=True, message=message
478 def get_pci_dev_driver(node, pci_addr):
479 """Get current PCI device driver on node.
481 :param node: DUT node.
482 :param pci_addr: PCI device address.
485 :returns: Driver or None
486 :raises RuntimeError: If it is not possible to get the interface driver
487 information from the node.
489 driver_path = f"/sys/bus/pci/devices/{pci_addr}/driver"
490 cmd = f"test -d {driver_path}"
491 ret_code, ret_val, _ = exec_cmd(node, cmd)
493 # the directory doesn't exist which means the device is not bound
496 cmd = f"basename $(readlink -f {driver_path})"
497 ret_val, _ = exec_cmd_no_error(node, cmd)
498 return ret_val.strip()
501 def verify_kernel_module(node, module, force_load=False):
502 """Verify if kernel module is loaded on node. If parameter force
503 load is set to True, then try to load the modules.
506 :param module: Module to verify.
507 :param force_load: If True then try to load module.
510 :type force_load: bool
511 :raises RuntimeError: If module is not loaded or failed to load.
513 command = f"grep -w {module} /proc/modules"
514 message = f"Kernel module {module} is not loaded " \
515 f"on host {node[u'host']}"
519 node, command, timeout=30, sudo=False, message=message
523 # Module is not loaded and we want to load it
524 DUTSetup.load_kernel_module(node, module)
529 def verify_kernel_module_on_all_duts(nodes, module, force_load=False):
530 """Verify if kernel module is loaded on all DUTs. If parameter force
531 load is set to True, then try to load the modules.
533 :param nodes: DUT nodes.
534 :param module: Module to verify.
535 :param force_load: If True then try to load module.
538 :type force_load: bool
540 for node in nodes.values():
541 if node[u"type"] == NodeType.DUT:
542 DUTSetup.verify_kernel_module(node, module, force_load)
545 def verify_uio_driver_on_all_duts(nodes):
546 """Verify if uio driver kernel module is loaded on all DUTs. If module
547 is not present it will try to load it.
549 :param nodes: DUT nodes.
552 for node in nodes.values():
553 if node[u"type"] == NodeType.DUT:
554 uio_driver = Topology.get_uio_driver(node)
555 DUTSetup.verify_kernel_module(node, uio_driver, force_load=True)
558 def load_kernel_module(node, module):
559 """Load kernel module on node.
561 :param node: DUT node.
562 :param module: Module to load.
566 :raises RuntimeError: If loading failed.
568 command = f"modprobe {module}"
569 message = f"Failed to load {module} on host {node[u'host']}"
571 exec_cmd_no_error(node, command, timeout=30, sudo=True, message=message)
574 def running_in_container(node):
575 """This method tests if topology node is running inside container.
577 :param node: Topology node.
579 :returns: True if running in docker container, false if not or failed
583 command = "cat /.dockerenv"
585 exec_cmd_no_error(node, command, timeout=30)
591 def get_docker_mergeddir(node, uuid=None):
592 """Get Docker overlay for MergedDir diff.
594 :param node: DUT node.
595 :param uuid: Docker UUID.
598 :returns: Docker container MergedDir.
600 :raises RuntimeError: If getting output failed.
603 command = 'fgrep "hostname" /proc/self/mountinfo | cut -f 4 -d" "'
604 message = "Failed to get UUID!"
605 stdout, _ = exec_cmd_no_error(node, command, message=message)
606 uuid = stdout.split(sep="/")[-2]
609 f"--format='{{{{.GraphDriver.Data.MergedDir}}}}' {uuid}"
611 message = f"Failed to get directory of {uuid} on host {node[u'host']}"
613 stdout, _ = exec_cmd_no_error(node, command, sudo=True, message=message)
614 return stdout.strip()
617 def get_hugepages_info(node, hugesize=None):
618 """Get number of huge pages in system.
620 :param node: Node in the topology.
621 :param hugesize: Size of hugepages. Default system huge size if None.
624 :returns: Number of huge pages in system.
626 :raises RuntimeError: If reading failed.
629 hugesize = "$(grep Hugepagesize /proc/meminfo | awk '{ print $2 }')"
630 command = f"cat /sys/kernel/mm/hugepages/hugepages-{hugesize}kB/*"
631 stdout, _ = exec_cmd_no_error(node, command)
633 line = stdout.splitlines()
635 "free_hugepages": int(line[0]),
636 "nr_hugepages": int(line[1]),
637 "nr_hugepages_mempolicy": int(line[2]),
638 "nr_overcommit_hugepages": int(line[3]),
639 "resv_hugepages": int(line[4]),
640 "surplus_hugepages": int(line[5])
643 logger.trace(u"Reading huge pages information failed!")
647 node, huge_mnt, mem_size, hugesize=2048, allocate=False):
648 """Check if there is enough HugePages in system. If allocate is set to
649 true, try to allocate more HugePages.
651 :param node: Node in the topology.
652 :param huge_mnt: HugePage mount point.
653 :param mem_size: Reqeusted memory in MB.
654 :param hugesize: HugePage size in KB.
655 :param allocate: Whether to allocate more memory if not enough.
661 :raises RuntimeError: Mounting hugetlbfs failed or not enough HugePages
662 or increasing map count failed.
664 # Get huge pages information.
665 hugepages = DUTSetup.get_hugepages_info(node, hugesize=hugesize)
667 # Check if hugepages requested are available on node.
668 if hugepages[u"nr_overcommit_hugepages"]:
669 # If overcommit is used, we need to know how many additional pages
671 huge_available = hugepages[u"nr_overcommit_hugepages"] - \
672 hugepages[u"surplus_hugepages"]
674 # Fallbacking to free_hugepages which were used before to detect.
675 huge_available = hugepages[u"free_hugepages"]
677 if ((mem_size * 1024) // hugesize) > huge_available:
678 # If we want to allocate hugepage dynamically.
680 huge_needed = ((mem_size * 1024) // hugesize) - huge_available
681 huge_to_allocate = huge_needed + hugepages[u"nr_hugepages"]
682 max_map_count = huge_to_allocate * 4
683 # Check if huge pages mount point exist.
685 exec_cmd_no_error(node, u"fgrep 'hugetlbfs' /proc/mounts")
687 exec_cmd_no_error(node, f"mkdir -p {huge_mnt}", sudo=True)
690 f"mount -t hugetlbfs -o pagesize={hugesize}k none "
693 # Increase maximum number of memory map areas for process.
696 f"echo \"{max_map_count}\" | "
697 f"sudo tee /proc/sys/vm/max_map_count",
698 message=f"Increase map count failed on {node[u'host']}!"
700 # Increase hugepage count.
703 f"echo \"{huge_to_allocate}\" | "
704 f"sudo tee /proc/sys/vm/nr_hugepages",
705 message=f"Mount huge pages failed on {node[u'host']}!"
707 # If we do not want to allocate dynamically end with error.
710 f"Not enough availablehuge pages: {huge_available}!"