1 # Copyright (c) 2019 Cisco and/or its affiliates.
2 # Licensed under the Apache License, Version 2.0 (the "License");
3 # you may not use this file except in compliance with the License.
4 # You may obtain a copy of the License at:
6 # http://www.apache.org/licenses/LICENSE-2.0
8 # Unless required by applicable law or agreed to in writing, software
9 # distributed under the License is distributed on an "AS IS" BASIS,
10 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 # See the License for the specific language governing permissions and
12 # limitations under the License.
14 """DUT setup library."""
16 from robot.api import logger
18 from resources.libraries.python.Constants import Constants
19 from resources.libraries.python.ssh import SSH, exec_cmd_no_error
20 from resources.libraries.python.topology import NodeType, Topology
24 """Contains methods for setting up DUTs."""
27 def get_service_logs(node, service):
28 """Get specific service unit logs from node.
30 :param node: Node in the topology.
31 :param service: Service unit name.
35 command = u"echo $(< /tmp/*supervisor*.log)"\
36 if DUTSetup.running_in_container(node) \
37 else f"journalctl --no-pager --unit={service} " \
38 f"--since=\"$(echo `systemctl show -p ActiveEnterTimestamp " \
39 f"{service}` | awk \'{{print $2 $3}}\')\""
40 message = f"Node {node[u'host']} failed to get logs from unit {service}"
43 node, command, timeout=30, sudo=True, message=message
47 def get_service_logs_on_all_duts(nodes, service):
48 """Get specific service unit logs from all DUTs.
50 :param nodes: Nodes in the topology.
51 :param service: Service unit name.
55 for node in nodes.values():
56 if node[u"type"] == NodeType.DUT:
57 DUTSetup.get_service_logs(node, service)
60 def restart_service(node, service):
61 """Restart the named service on node.
63 :param node: Node in the topology.
64 :param service: Service unit name.
68 command = f"supervisorctl restart {service}" \
69 if DUTSetup.running_in_container(node) \
70 else f"service {service} restart"
71 message = f"Node {node[u'host']} failed to restart service {service}"
74 node, command, timeout=180, sudo=True, message=message
77 DUTSetup.get_service_logs(node, service)
80 def restart_service_on_all_duts(nodes, service):
81 """Restart the named service on all DUTs.
83 :param nodes: Nodes in the topology.
84 :param service: Service unit name.
88 for node in nodes.values():
89 if node[u"type"] == NodeType.DUT:
90 DUTSetup.restart_service(node, service)
93 def start_service(node, service):
94 """Start up the named service on node.
96 :param node: Node in the topology.
97 :param service: Service unit name.
101 # TODO: change command to start once all parent function updated.
102 command = f"supervisorctl restart {service}" \
103 if DUTSetup.running_in_container(node) \
104 else f"service {service} restart"
105 message = f"Node {node[u'host']} failed to start service {service}"
108 node, command, timeout=180, sudo=True, message=message
111 DUTSetup.get_service_logs(node, service)
114 def start_service_on_all_duts(nodes, service):
115 """Start up the named service on all DUTs.
117 :param nodes: Nodes in the topology.
118 :param service: Service unit name.
122 for node in nodes.values():
123 if node[u"type"] == NodeType.DUT:
124 DUTSetup.start_service(node, service)
127 def stop_service(node, service):
128 """Stop the named service on node.
130 :param node: Node in the topology.
131 :param service: Service unit name.
135 command = f"supervisorctl stop {service}" \
136 if DUTSetup.running_in_container(node) \
137 else f"service {service} stop"
138 message = f"Node {node[u'host']} failed to stop service {service}"
141 node, command, timeout=180, sudo=True, message=message
144 DUTSetup.get_service_logs(node, service)
147 def stop_service_on_all_duts(nodes, service):
148 """Stop the named service on all DUTs.
150 :param nodes: Nodes in the topology.
151 :param service: Service unit name.
155 for node in nodes.values():
156 if node[u"type"] == NodeType.DUT:
157 DUTSetup.stop_service(node, service)
160 def get_vpp_pid(node):
161 """Get PID of running VPP process.
163 :param node: DUT node.
167 :raises RuntimeError: If it is not possible to get the PID.
174 logger.trace(f"Try {i}: Get VPP PID")
175 ret_code, stdout, stderr = ssh.exec_command(u"pidof vpp")
179 f"Not possible to get PID of VPP process on node: "
180 f"{node[u'host']}\n {stdout + stderr}"
183 pid_list = stdout.split()
184 if len(pid_list) == 1:
187 logger.debug(f"No VPP PID found on node {node[u'host']}")
191 f"More then one VPP PID found on node {node[u'host']}"
193 retval = [int(pid) for pid in pid_list]
198 def get_vpp_pids(nodes):
199 """Get PID of running VPP process on all DUTs.
201 :param nodes: DUT nodes.
207 for node in nodes.values():
208 if node[u"type"] == NodeType.DUT:
209 pids[node[u"host"]] = DUTSetup.get_vpp_pid(node)
213 def crypto_device_verify(node, crypto_type, numvfs, force_init=False):
214 """Verify if Crypto QAT device virtual functions are initialized on all
215 DUTs. If parameter force initialization is set to True, then try to
216 initialize or remove VFs on QAT.
218 :param node: DUT node.
219 :crypto_type: Crypto device type - HW_DH895xcc or HW_C3xxx.
220 :param numvfs: Number of VFs to initialize, 0 - disable the VFs.
221 :param force_init: If True then try to initialize to specific value.
223 :type crypto_type: string
225 :type force_init: bool
227 :raises RuntimeError: If QAT VFs are not created and force init is set
230 pci_addr = Topology.get_cryptodev(node)
231 sriov_numvfs = DUTSetup.get_sriov_numvfs(node, pci_addr)
233 if sriov_numvfs != numvfs:
235 # QAT is not initialized and we want to initialize with numvfs
236 DUTSetup.crypto_device_init(node, crypto_type, numvfs)
239 f"QAT device failed to create VFs on {node[u'host']}"
243 def crypto_device_init(node, crypto_type, numvfs):
244 """Init Crypto QAT device virtual functions on DUT.
246 :param node: DUT node.
247 :crypto_type: Crypto device type - HW_DH895xcc or HW_C3xxx.
248 :param numvfs: Number of VFs to initialize, 0 - disable the VFs.
250 :type crypto_type: string
253 :raises RuntimeError: If failed to stop VPP or QAT failed to initialize.
255 if crypto_type == u"HW_DH895xcc":
256 kernel_mod = u"qat_dh895xcc"
257 kernel_drv = u"dh895xcc"
258 elif crypto_type == u"HW_C3xxx":
259 kernel_mod = u"qat_c3xxx"
260 kernel_drv = u"c3xxx"
263 f"Unsupported crypto device type on {node[u'host']}"
266 pci_addr = Topology.get_cryptodev(node)
268 # QAT device must be re-bound to kernel driver before initialization.
269 DUTSetup.verify_kernel_module(node, kernel_mod, force_load=True)
271 # Stop VPP to prevent deadlock.
272 DUTSetup.stop_service(node, Constants.VPP_UNIT)
274 current_driver = DUTSetup.get_pci_dev_driver(
275 node, pci_addr.replace(u":", r"\:")
277 if current_driver is not None:
278 DUTSetup.pci_driver_unbind(node, pci_addr)
280 # Bind to kernel driver.
281 DUTSetup.pci_driver_bind(node, pci_addr, kernel_drv)
283 # Initialize QAT VFs.
285 DUTSetup.set_sriov_numvfs(node, pci_addr, numvfs)
288 def get_virtfn_pci_addr(node, pf_pci_addr, vf_id):
289 """Get PCI address of Virtual Function.
291 :param node: DUT node.
292 :param pf_pci_addr: Physical Function PCI address.
293 :param vf_id: Virtual Function number.
295 :type pf_pci_addr: str
297 :returns: Virtual Function PCI address.
299 :raises RuntimeError: If failed to get Virtual Function PCI address.
301 command = f"sh -c \"basename $(readlink " \
302 f"/sys/bus/pci/devices/{pf_pci_addr}/virtfn{vf_id})\""
303 message = u"Failed to get virtual function PCI address."
305 stdout, _ = exec_cmd_no_error(
306 node, command, timeout=30, sudo=True, message=message
309 return stdout.strip()
312 def get_sriov_numvfs(node, pf_pci_addr):
313 """Get number of SR-IOV VFs.
315 :param node: DUT node.
316 :param pf_pci_addr: Physical Function PCI device address.
318 :type pf_pci_addr: str
319 :returns: Number of VFs.
321 :raises RuntimeError: If PCI device is not SR-IOV capable.
323 pci = pf_pci_addr.replace(u":", r"\:")
324 command = f"cat /sys/bus/pci/devices/{pci}/sriov_numvfs"
325 message = f"PCI device {pf_pci_addr} is not a SR-IOV device."
328 stdout, _ = exec_cmd_no_error(
329 node, command, timeout=30, sudo=True, message=message
332 sriov_numvfs = int(stdout)
335 f"Reading sriov_numvfs info failed on {node[u'host']}"
341 def set_sriov_numvfs(node, pf_pci_addr, numvfs=0):
342 """Init or reset SR-IOV virtual functions by setting its number on PCI
343 device on DUT. Setting to zero removes all VFs.
345 :param node: DUT node.
346 :param pf_pci_addr: Physical Function PCI device address.
347 :param numvfs: Number of VFs to initialize, 0 - removes the VFs.
349 :type pf_pci_addr: str
351 :raises RuntimeError: Failed to create VFs on PCI.
353 pci = pf_pci_addr.replace(u":", r"\:")
354 command = f"sh -c \"echo {numvfs} | " \
355 f"tee /sys/bus/pci/devices/{pci}/sriov_numvfs\""
356 message = f"Failed to create {numvfs} VFs on {pf_pci_addr} device " \
357 f"on {node[u'host']}"
360 node, command, timeout=120, sudo=True, message=message
364 def pci_driver_unbind(node, pci_addr):
365 """Unbind PCI device from current driver on node.
367 :param node: DUT node.
368 :param pci_addr: PCI device address.
371 :raises RuntimeError: If PCI device unbind failed.
373 pci = pci_addr.replace(u":", r"\:")
374 command = f"sh -c \"echo {pci_addr} | " \
375 f"tee /sys/bus/pci/devices/{pci}/driver/unbind\""
376 message = f"Failed to unbind PCI device {pci_addr} on {node[u'host']}"
379 node, command, timeout=120, sudo=True, message=message
383 def pci_driver_bind(node, pci_addr, driver):
384 """Bind PCI device to driver on node.
386 :param node: DUT node.
387 :param pci_addr: PCI device address.
388 :param driver: Driver to bind.
392 :raises RuntimeError: If PCI device bind failed.
394 message = f"Failed to bind PCI device {pci_addr} to {driver} " \
395 f"on host {node[u'host']}"
396 pci = pci_addr.replace(u":", r"\:")
397 command = f"sh -c \"echo {driver} | " \
398 f"tee /sys/bus/pci/devices/{pci}/driver_override\""
401 node, command, timeout=120, sudo=True, message=message
404 command = f"sh -c \"echo {pci_addr} | " \
405 f"tee /sys/bus/pci/drivers/{driver}/bind\""
408 node, command, timeout=120, sudo=True, message=message
411 command = f"sh -c \"echo | " \
412 f"tee /sys/bus/pci/devices/{pci}/driver_override\""
415 node, command, timeout=120, sudo=True, message=message
419 def pci_vf_driver_unbind(node, pf_pci_addr, vf_id):
420 """Unbind Virtual Function from driver on node.
422 :param node: DUT node.
423 :param pf_pci_addr: PCI device address.
424 :param vf_id: Virtual Function ID.
426 :type pf_pci_addr: str
428 :raises RuntimeError: If Virtual Function unbind failed.
430 vf_pci_addr = DUTSetup.get_virtfn_pci_addr(node, pf_pci_addr, vf_id)
431 pf_pci = pf_pci_addr.replace(u":", r"\:")
432 vf_path = f"/sys/bus/pci/devices/{pf_pci}/virtfn{vf_id}"
434 command = f"sh -c \"echo {vf_pci_addr} | tee {vf_path}/driver/unbind\""
435 message = f"Failed to unbind VF {vf_pci_addr} on {node[u'host']}"
438 node, command, timeout=120, sudo=True, message=message
442 def pci_vf_driver_bind(node, pf_pci_addr, vf_id, driver):
443 """Bind Virtual Function to driver on node.
445 :param node: DUT node.
446 :param pf_pci_addr: PCI device address.
447 :param vf_id: Virtual Function ID.
448 :param driver: Driver to bind.
450 :type pf_pci_addr: str
453 :raises RuntimeError: If PCI device bind failed.
455 vf_pci_addr = DUTSetup.get_virtfn_pci_addr(node, pf_pci_addr, vf_id)
456 pf_pci = pf_pci_addr.replace(u":", r'\:')
457 vf_path = f"/sys/bus/pci/devices/{pf_pci}/virtfn{vf_id}"
459 message = f"Failed to bind VF {vf_pci_addr} to {driver} " \
460 f"on {node[u'host']}"
461 command = f"sh -c \"echo {driver} | tee {vf_path}/driver_override\""
464 node, command, timeout=120, sudo=True, message=message
467 command = f"sh -c \"echo {vf_pci_addr} | " \
468 f"tee /sys/bus/pci/drivers/{driver}/bind\""
471 node, command, timeout=120, sudo=True, message=message
474 command = f"sh -c \"echo | tee {vf_path}/driver_override\""
477 node, command, timeout=120, sudo=True, message=message
481 def get_pci_dev_driver(node, pci_addr):
482 """Get current PCI device driver on node.
485 # lspci -vmmks 0000:00:05.0
487 Class: Ethernet controller
489 Device: Virtio network device
490 SVendor: Red Hat, Inc
495 :param node: DUT node.
496 :param pci_addr: PCI device address.
499 :returns: Driver or None
500 :raises RuntimeError: If PCI rescan or lspci command execution failed.
501 :raises RuntimeError: If it is not possible to get the interface driver
502 information from the node.
508 logger.trace(f"Try number {i}: Get PCI device driver")
510 cmd = f"lspci -vmmks {pci_addr}"
511 ret_code, stdout, _ = ssh.exec_command(cmd)
513 raise RuntimeError(f"'{cmd}' failed on '{node[u'host']}'")
515 for line in stdout.splitlines():
521 name, value = line.split(u"\t", 1)
523 if name == u"Driver:":
525 if name == u"Driver:":
530 f"Driver for PCI device {pci_addr} not found, "
531 f"executing pci rescan and retrying"
533 cmd = u"sh -c \"echo 1 > /sys/bus/pci/rescan\""
534 ret_code, _, _ = ssh.exec_command_sudo(cmd)
535 if int(ret_code) != 0:
536 raise RuntimeError(f"'{cmd}' failed on '{node[u'host']}'")
541 def verify_kernel_module(node, module, force_load=False):
542 """Verify if kernel module is loaded on node. If parameter force
543 load is set to True, then try to load the modules.
546 :param module: Module to verify.
547 :param force_load: If True then try to load module.
550 :type force_load: bool
551 :raises RuntimeError: If module is not loaded or failed to load.
553 command = f"grep -w {module} /proc/modules"
554 message = f"Kernel module {module} is not loaded " \
555 f"on host {node[u'host']}"
559 node, command, timeout=30, sudo=False, message=message
563 # Module is not loaded and we want to load it
564 DUTSetup.load_kernel_module(node, module)
569 def verify_kernel_module_on_all_duts(nodes, module, force_load=False):
570 """Verify if kernel module is loaded on all DUTs. If parameter force
571 load is set to True, then try to load the modules.
573 :param nodes: DUT nodes.
574 :param module: Module to verify.
575 :param force_load: If True then try to load module.
578 :type force_load: bool
580 for node in nodes.values():
581 if node[u"type"] == NodeType.DUT:
582 DUTSetup.verify_kernel_module(node, module, force_load)
585 def verify_uio_driver_on_all_duts(nodes):
586 """Verify if uio driver kernel module is loaded on all DUTs. If module
587 is not present it will try to load it.
589 :param nodes: DUT nodes.
592 for node in nodes.values():
593 if node[u"type"] == NodeType.DUT:
594 uio_driver = Topology.get_uio_driver(node)
595 DUTSetup.verify_kernel_module(node, uio_driver, force_load=True)
598 def load_kernel_module(node, module):
599 """Load kernel module on node.
601 :param node: DUT node.
602 :param module: Module to load.
606 :raises RuntimeError: If loading failed.
608 command = f"modprobe {module}"
609 message = f"Failed to load {module} on host {node[u'host']}"
611 exec_cmd_no_error(node, command, timeout=30, sudo=True, message=message)
614 def install_vpp_on_all_duts(nodes, vpp_pkg_dir):
615 """Install VPP on all DUT nodes. Start the VPP service in case of
616 systemd is not available or does not support autostart.
618 :param nodes: Nodes in the topology.
619 :param vpp_pkg_dir: Path to directory where VPP packages are stored.
621 :type vpp_pkg_dir: str
622 :raises RuntimeError: If failed to remove or install VPP.
624 for node in nodes.values():
625 message = f"Failed to install VPP on host {node[u'host']}!"
626 if node[u"type"] == NodeType.DUT:
627 command = u"ln -s /dev/null /etc/sysctl.d/80-vpp.conf || true"
628 exec_cmd_no_error(node, command, sudo=True)
630 command = u". /etc/lsb-release; echo \"${DISTRIB_ID}\""
631 stdout, _ = exec_cmd_no_error(node, command)
633 if stdout.strip() == u"Ubuntu":
635 node, u"apt-get purge -y '*vpp*' || true",
636 timeout=120, sudo=True
638 # workaround to avoid installation of vpp-api-python
640 node, u"rm -f {vpp_pkg_dir}vpp-api-python.deb",
641 timeout=120, sudo=True
644 node, f"dpkg -i --force-all {vpp_pkg_dir}*.deb",
645 timeout=120, sudo=True, message=message
647 exec_cmd_no_error(node, u"dpkg -l | grep vpp", sudo=True)
648 if DUTSetup.running_in_container(node):
649 DUTSetup.restart_service(node, Constants.VPP_UNIT)
652 node, u"yum -y remove '*vpp*' || true",
653 timeout=120, sudo=True
655 # workaround to avoid installation of vpp-api-python
657 node, u"rm -f {vpp_pkg_dir}vpp-api-python.rpm",
658 timeout=120, sudo=True
661 node, f"rpm -ivh {vpp_pkg_dir}*.rpm",
662 timeout=120, sudo=True, message=message
664 exec_cmd_no_error(node, u"rpm -qai '*vpp*'", sudo=True)
665 DUTSetup.restart_service(node, Constants.VPP_UNIT)
668 def running_in_container(node):
669 """This method tests if topology node is running inside container.
671 :param node: Topology node.
673 :returns: True if running in docker container, false if not or failed
677 command = u"fgrep docker /proc/1/cgroup"
678 message = u"Failed to get cgroup settings."
681 node, command, timeout=30, sudo=False, message=message
688 def get_docker_mergeddir(node, uuid):
689 """Get Docker overlay for MergedDir diff.
691 :param node: DUT node.
692 :param uuid: Docker UUID.
695 :returns: Docker container MergedDir.
697 :raises RuntimeError: If getting output failed.
699 command = f"docker inspect " \
700 f"--format='{{{{.GraphDriver.Data.MergedDir}}}}' {uuid}"
701 message = f"Failed to get directory of {uuid} on host {node[u'host']}"
703 stdout, _ = exec_cmd_no_error(node, command, sudo=True, message=message)
704 return stdout.strip()
707 def get_huge_page_size(node):
708 """Get default size of huge pages in system.
710 :param node: Node in the topology.
712 :returns: Default size of free huge pages in system.
714 :raises RuntimeError: If reading failed for three times.
720 ret_code, stdout, _ = ssh.exec_command_sudo(
721 u"grep Hugepagesize /proc/meminfo | awk '{ print $2 }'"
725 huge_size = int(stdout)
727 logger.trace(u"Reading huge page size information failed")
731 raise RuntimeError(u"Getting huge page size information failed.")
735 def get_huge_page_free(node, huge_size):
736 """Get number of free huge pages in system.
738 :param node: Node in the topology.
739 :param huge_size: Size of hugepages.
742 :returns: Number of free huge pages in system.
744 :raises RuntimeError: If reading failed for three times.
746 # TODO: add numa aware option
751 ret_code, stdout, _ = ssh.exec_command_sudo(
752 f"cat /sys/kernel/mm/hugepages/hugepages-{huge_size}kB/"
757 huge_free = int(stdout)
759 logger.trace(u"Reading free huge pages information failed")
763 raise RuntimeError(u"Getting free huge pages information failed.")
767 def get_huge_page_total(node, huge_size):
768 """Get total number of huge pages in system.
770 :param node: Node in the topology.
771 :param huge_size: Size of hugepages.
774 :returns: Total number of huge pages in system.
776 :raises RuntimeError: If reading failed for three times.
778 # TODO: add numa aware option
783 ret_code, stdout, _ = ssh.exec_command_sudo(
784 f"cat /sys/kernel/mm/hugepages/hugepages-{huge_size}kB/"
789 huge_total = int(stdout)
791 logger.trace(u"Reading total huge pages information failed")
795 raise RuntimeError(u"Getting total huge pages information failed.")
799 def check_huge_page(node, huge_mnt, mem_size, allocate=False):
800 """Check if there is enough HugePages in system. If allocate is set to
801 true, try to allocate more HugePages.
803 :param node: Node in the topology.
804 :param huge_mnt: HugePage mount point.
805 :param mem_size: Requested memory in MB.
806 :param allocate: Whether to allocate more memory if not enough.
811 :raises RuntimeError: Mounting hugetlbfs failed or not enough HugePages
812 or increasing map count failed.
814 # TODO: split function into smaller parts.
818 # Get huge pages information
819 huge_size = DUTSetup.get_huge_page_size(node)
820 huge_free = DUTSetup.get_huge_page_free(node, huge_size)
821 huge_total = DUTSetup.get_huge_page_total(node, huge_size)
823 # Check if memory requested is available on
824 mem_size = int(mem_size)
825 if (mem_size * 1024) > (huge_free * huge_size):
826 # If we want to allocate hugepage dynamically
828 mem_needed = (mem_size * 1024) - (huge_free * huge_size)
829 huge_to_allocate = ((mem_needed // huge_size) * 2) + huge_total
830 max_map_count = huge_to_allocate*4
831 # Increase maximum number of memory map areas a process may have
832 ret_code, _, _ = ssh.exec_command_sudo(
833 f"echo \"{max_map_count}\" | "
834 f"sudo tee /proc/sys/vm/max_map_count"
836 if int(ret_code) != 0:
838 f"Increase map count failed on {node[u'host']}"
840 # Increase hugepage count
841 ret_code, _, _ = ssh.exec_command_sudo(
842 f"echo \"{huge_to_allocate}\" | "
843 f"sudo tee /proc/sys/vm/nr_hugepages"
845 if int(ret_code) != 0:
847 f"Mount huge pages failed on {node[u'host']}"
849 # If we do not want to allocate dynamically end with error
852 f"Not enough free huge pages: {huge_free}, "
853 f"{huge_free * huge_size} MB"
855 # Check if huge pages mount point exist
857 ret_code, stdout, _ = ssh.exec_command(u"cat /proc/mounts")
858 if int(ret_code) == 0:
859 for line in stdout.splitlines():
860 # Try to find something like:
861 # none /mnt/huge hugetlbfs rw,realtime,pagesize=2048k 0 0
863 if mount[2] == u"hugetlbfs" and mount[1] == huge_mnt:
866 # If huge page mount point not exist create one
868 ret_code, _, _ = ssh.exec_command_sudo(f"mkdir -p {huge_mnt}")
869 if int(ret_code) != 0:
871 f"Create mount dir failed on {node[u'host']}"
873 ret_code, _, _ = ssh.exec_command_sudo(
874 f"mount -t hugetlbfs -o pagesize=2048k none {huge_mnt}"
876 if int(ret_code) != 0:
878 f"Mount huge pages failed on {node[u'host']}"