1 # Copyright (c) 2018 Cisco and/or its affiliates.
2 # Licensed under the Apache License, Version 2.0 (the "License");
3 # you may not use this file except in compliance with the License.
4 # You may obtain a copy of the License at:
6 # http://www.apache.org/licenses/LICENSE-2.0
8 # Unless required by applicable law or agreed to in writing, software
9 # distributed under the License is distributed on an "AS IS" BASIS,
10 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 # See the License for the specific language governing permissions and
12 # limitations under the License.
14 """DUT setup library."""
16 from robot.api import logger
18 from resources.libraries.python.Constants import Constants
19 from resources.libraries.python.ssh import SSH, exec_cmd_no_error
20 from resources.libraries.python.topology import NodeType, Topology
23 class DUTSetup(object):
24 """Contains methods for setting up DUTs."""
27 def get_service_logs(node, service):
28 """Get specific service unit logs from node.
30 :param node: Node in the topology.
31 :param service: Service unit name.
35 if DUTSetup.running_in_container(node):
36 command = ('echo $(< /var/log/supervisord.log);'
37 'echo $(< /tmp/*supervisor*.log)')
39 command = ('journalctl --no-pager --unit={name} '
40 '--since="$(echo `systemctl show -p '
41 'ActiveEnterTimestamp {name}` | '
42 'awk \'{{print $2 $3}}\')"'.
44 message = 'Node {host} failed to get logs from unit {name}'.\
45 format(host=node['host'], name=service)
47 exec_cmd_no_error(node, command, timeout=30, sudo=True,
51 def get_service_logs_on_all_duts(nodes, service):
52 """Get specific service unit logs from all DUTs.
54 :param nodes: Nodes in the topology.
55 :param service: Service unit name.
59 for node in nodes.values():
60 if node['type'] == NodeType.DUT:
61 DUTSetup.get_service_logs(node, service)
64 def start_service(node, service):
65 """Start up the named service on node.
67 :param node: Node in the topology.
68 :param service: Service unit name.
72 if DUTSetup.running_in_container(node):
73 command = 'supervisorctl restart {name}'.format(name=service)
75 command = 'service {name} restart'.format(name=service)
76 message = 'Node {host} failed to start service {name}'.\
77 format(host=node['host'], name=service)
79 exec_cmd_no_error(node, command, timeout=30, sudo=True, message=message)
81 DUTSetup.get_service_logs(node, service)
84 def start_service_on_all_duts(nodes, service):
85 """Start up the named service on all DUTs.
87 :param node: Nodes in the topology.
88 :param service: Service unit name.
92 for node in nodes.values():
93 if node['type'] == NodeType.DUT:
94 DUTSetup.start_service(node, service)
97 def stop_service(node, service):
98 """Stop the named service on node.
100 :param node: Node in the topology.
101 :param service: Service unit name.
105 if DUTSetup.running_in_container(node):
106 command = 'supervisorctl stop {name}'.format(name=service)
108 command = 'service {name} stop'.format(name=service)
109 message = 'Node {host} failed to stop service {name}'.\
110 format(host=node['host'], name=service)
112 exec_cmd_no_error(node, command, timeout=30, sudo=True, message=message)
114 DUTSetup.get_service_logs(node, service)
117 def stop_service_on_all_duts(nodes, service):
118 """Stop the named service on all DUTs.
120 :param node: Nodes in the topology.
121 :param service: Service unit name.
125 for node in nodes.values():
126 if node['type'] == NodeType.DUT:
127 DUTSetup.stop_service(node, service)
131 """Run script over SSH to setup the DUT node.
133 :param node: DUT node to set up.
136 :raises Exception: If the DUT setup fails.
138 command = 'bash {0}/{1}/dut_setup.sh'.\
139 format(Constants.REMOTE_FW_DIR, Constants.RESOURCES_LIB_SH)
140 message = 'DUT test setup script failed at node {name}'.\
141 format(name=node['host'])
143 exec_cmd_no_error(node, command, timeout=120, sudo=True,
147 def setup_all_duts(nodes):
148 """Run script over SSH to setup all DUT nodes.
150 :param nodes: Topology nodes.
153 for node in nodes.values():
154 if node['type'] == NodeType.DUT:
155 DUTSetup.setup_dut(node)
158 def get_vpp_pid(node):
159 """Get PID of running VPP process.
161 :param node: DUT node.
165 :raises RuntimeError: If it is not possible to get the PID.
171 logger.trace('Try {}: Get VPP PID'.format(i))
172 ret_code, stdout, stderr = ssh.exec_command('pidof vpp')
175 raise RuntimeError('Not possible to get PID of VPP process '
176 'on node: {0}\n {1}'.
177 format(node['host'], stdout + stderr))
179 pid_list = stdout.split()
180 if len(pid_list) == 1:
183 logger.debug("No VPP PID found on node {0}".
184 format(node['host']))
187 logger.debug("More then one VPP PID found on node {0}".
188 format(node['host']))
189 return [int(pid) for pid in pid_list]
194 def get_vpp_pids(nodes):
195 """Get PID of running VPP process on all DUTs.
197 :param nodes: DUT nodes.
203 for node in nodes.values():
204 if node['type'] == NodeType.DUT:
205 pids[node['host']] = DUTSetup.get_vpp_pid(node)
209 def crypto_device_verify(node, force_init=False, numvfs=32):
210 """Verify if Crypto QAT device virtual functions are initialized on all
211 DUTs. If parameter force initialization is set to True, then try to
212 initialize or remove VFs on QAT.
214 :param node: DUT node.
215 :param force_init: If True then try to initialize to specific value.
216 :param numvfs: Number of VFs to initialize, 0 - disable the VFs.
218 :type force_init: bool
221 :raises RuntimeError: If QAT VFs are not created and force init is set
224 pci_addr = Topology.get_cryptodev(node)
225 sriov_numvfs = DUTSetup.get_sriov_numvfs(node, pci_addr)
227 if sriov_numvfs != numvfs:
229 # QAT is not initialized and we want to initialize with numvfs
230 DUTSetup.crypto_device_init(node, numvfs)
232 raise RuntimeError('QAT device failed to create VFs on {host}'.
233 format(host=node['host']))
236 def crypto_device_init(node, numvfs):
237 """Init Crypto QAT device virtual functions on DUT.
239 :param node: DUT node.
240 :param numvfs: Number of VFs to initialize, 0 - disable the VFs.
244 :raises RuntimeError: If failed to stop VPP or QAT failed to initialize.
246 pci_addr = Topology.get_cryptodev(node)
248 # QAT device must be re-bound to kernel driver before initialization.
249 DUTSetup.verify_kernel_module(node, 'qat_dh895xcc', force_load=True)
251 # Stop VPP to prevent deadlock.
252 DUTSetup.stop_service(node, Constants.VPP_UNIT)
254 current_driver = DUTSetup.get_pci_dev_driver(
255 node, pci_addr.replace(':', r'\:'))
256 if current_driver is not None:
257 DUTSetup.pci_driver_unbind(node, pci_addr)
259 # Bind to kernel driver.
260 DUTSetup.pci_driver_bind(node, pci_addr, 'dh895xcc')
262 # Initialize QAT VFs.
264 DUTSetup.set_sriov_numvfs(node, pci_addr, numvfs)
267 def get_virtfn_pci_addr(node, pf_pci_addr, vf_id):
268 """Get PCI address of Virtual Function.
270 :param node: DUT node.
271 :param pf_pci_addr: Physical Function PCI address.
272 :param vf_id: Virtual Function number.
274 :type pf_pci_addr: str
276 :returns: Virtual Function PCI address.
278 :raises RuntimeError: If failed to get Virtual Function PCI address.
281 "'basename $(readlink /sys/bus/pci/devices/{pci}/virtfn{vf_id})'".\
282 format(pci=pf_pci_addr, vf_id=vf_id)
283 message = 'Failed to get virtual function PCI address.'
285 stdout, _ = exec_cmd_no_error(node, command, timeout=30, sudo=True,
288 return stdout.strip()
291 def get_sriov_numvfs(node, pf_pci_addr):
292 """Get number of SR-IOV VFs.
294 :param node: DUT node.
295 :param pf_pci_addr: Physical Function PCI device address.
297 :type pf_pci_addr: str
298 :returns: Number of VFs.
300 :raises RuntimeError: If PCI device is not SR-IOV capable.
302 command = 'cat /sys/bus/pci/devices/{pci}/sriov_numvfs'.\
303 format(pci=pf_pci_addr.replace(':', r'\:'))
304 message = 'PCI device {pci} is not a SR-IOV device.'.\
305 format(pci=pf_pci_addr)
308 stdout, _ = exec_cmd_no_error(node, command, timeout=30, sudo=True,
311 sriov_numvfs = int(stdout)
313 logger.trace('Reading sriov_numvfs info failed on {host}'.
314 format(host=node['host']))
319 def set_sriov_numvfs(node, pf_pci_addr, numvfs=0):
320 """Init or reset SR-IOV virtual functions by setting its number on PCI
321 device on DUT. Setting to zero removes all VFs.
323 :param node: DUT node.
324 :param pf_pci_addr: Physical Function PCI device address.
325 :param numvfs: Number of VFs to initialize, 0 - removes the VFs.
327 :type pf_pci_addr: str
329 :raises RuntimeError: Failed to create VFs on PCI.
332 "'echo {num} | tee /sys/bus/pci/devices/{pci}/sriov_numvfs'".\
333 format(num=numvfs, pci=pf_pci_addr.replace(':', r'\:'))
334 message = 'Failed to create {num} VFs on {pci} device on {host}'.\
335 format(num=numvfs, pci=pf_pci_addr, host=node['host'])
337 exec_cmd_no_error(node, command, timeout=120, sudo=True,
341 def pci_driver_unbind(node, pci_addr):
342 """Unbind PCI device from current driver on node.
344 :param node: DUT node.
345 :param pci_addr: PCI device address.
348 :raises RuntimeError: If PCI device unbind failed.
351 "'echo {pci} | tee /sys/bus/pci/devices/{pcie}/driver/unbind'".\
352 format(pci=pci_addr, pcie=pci_addr.replace(':', r'\:'))
353 message = 'Failed to unbind PCI device {pci} on {host}'.\
354 format(pci=pci_addr, host=node['host'])
356 exec_cmd_no_error(node, command, timeout=120, sudo=True,
360 def pci_driver_bind(node, pci_addr, driver):
361 """Bind PCI device to driver on node.
363 :param node: DUT node.
364 :param pci_addr: PCI device address.
365 :param driver: Driver to bind.
369 :raises RuntimeError: If PCI device bind failed.
371 message = 'Failed to bind PCI device {pci} to {driver} on host {host}'.\
372 format(pci=pci_addr, driver=driver, host=node['host'])
375 "'echo {driver} | tee /sys/bus/pci/devices/{pci}/driver_override'".\
376 format(driver=driver, pci=pci_addr.replace(':', r'\:'))
378 exec_cmd_no_error(node, command, timeout=120, sudo=True,
382 "'echo {pci} | tee /sys/bus/pci/drivers/{driver}/bind'".\
383 format(pci=pci_addr, driver=driver)
385 exec_cmd_no_error(node, command, timeout=120, sudo=True,
389 "'echo | tee /sys/bus/pci/devices/{pci}/driver_override'".\
390 format(pci=pci_addr.replace(':', r'\:'))
392 exec_cmd_no_error(node, command, timeout=120, sudo=True,
396 def pci_vf_driver_unbind(node, pf_pci_addr, vf_id):
397 """Unbind Virtual Function from driver on node.
399 :param node: DUT node.
400 :param pf_pci_addr: PCI device address.
401 :param vf_id: Virtual Function ID.
403 :type pf_pci_addr: str
405 :raises RuntimeError: If Virtual Function unbind failed.
407 vf_pci_addr = DUTSetup.get_virtfn_pci_addr(node, pf_pci_addr, vf_id)
408 vf_path = "/sys/bus/pci/devices/{pf_pci_addr}/virtfn{vf_id}".\
409 format(pf_pci_addr=pf_pci_addr.replace(':', r'\:'), vf_id=vf_id)
412 "'echo {vf_pci_addr} | tee {vf_path}/driver/unbind'".\
413 format(vf_pci_addr=vf_pci_addr, vf_path=vf_path)
415 message = 'Failed to unbind VF {vf_pci_addr} to on {host}'.\
416 format(vf_pci_addr=vf_pci_addr, host=node['host'])
418 exec_cmd_no_error(node, command, timeout=120, sudo=True,
422 def pci_vf_driver_bind(node, pf_pci_addr, vf_id, driver):
423 """Bind Virtual Function to driver on node.
425 :param node: DUT node.
426 :param pf_pci_addr: PCI device address.
427 :param vf_id: Virtual Function ID.
428 :param driver: Driver to bind.
430 :type pf_pci_addr: str
433 :raises RuntimeError: If PCI device bind failed.
435 vf_pci_addr = DUTSetup.get_virtfn_pci_addr(node, pf_pci_addr, vf_id)
436 vf_path = "/sys/bus/pci/devices/{pf_pci_addr}/virtfn{vf_id}".\
437 format(pf_pci_addr=pf_pci_addr.replace(':', r'\:'), vf_id=vf_id)
439 message = 'Failed to bind VF {vf_pci_addr} to {driver} on {host}'.\
440 format(vf_pci_addr=vf_pci_addr, driver=driver, host=node['host'])
443 "'echo {driver} | tee {vf_path}/driver_override'".\
444 format(driver=driver, vf_path=vf_path)
446 exec_cmd_no_error(node, command, timeout=120, sudo=True,
450 "'echo {vf_pci_addr} | tee /sys/bus/pci/drivers/{driver}/bind'".\
451 format(vf_pci_addr=vf_pci_addr, driver=driver)
453 exec_cmd_no_error(node, command, timeout=120, sudo=True,
457 "'echo | tee {vf_path}/driver_override'".\
458 format(vf_path=vf_path)
460 exec_cmd_no_error(node, command, timeout=120, sudo=True,
464 def get_pci_dev_driver(node, pci_addr):
465 """Get current PCI device driver on node.
468 # lspci -vmmks 0000:00:05.0
470 Class: Ethernet controller
472 Device: Virtio network device
473 SVendor: Red Hat, Inc
478 :param node: DUT node.
479 :param pci_addr: PCI device address.
482 :returns: Driver or None
483 :raises RuntimeError: If PCI rescan or lspci command execution failed.
484 :raises RuntimeError: If it is not possible to get the interface driver
485 information from the node.
491 logger.trace('Try number {0}: Get PCI device driver'.format(i))
493 cmd = 'lspci -vmmks {0}'.format(pci_addr)
494 ret_code, stdout, _ = ssh.exec_command(cmd)
496 raise RuntimeError("'{0}' failed on '{1}'"
497 .format(cmd, node['host']))
499 for line in stdout.splitlines():
505 name, value = line.split("\t", 1)
507 if name == "Driver:":
509 if name == 'Driver:':
513 logger.trace('Driver for PCI device {} not found, executing '
514 'pci rescan and retrying'.format(pci_addr))
515 cmd = 'sh -c "echo 1 > /sys/bus/pci/rescan"'
516 ret_code, _, _ = ssh.exec_command_sudo(cmd)
517 if int(ret_code) != 0:
518 raise RuntimeError("'{0}' failed on '{1}'"
519 .format(cmd, node['host']))
524 def verify_kernel_module(node, module, force_load=False):
525 """Verify if kernel module is loaded on node. If parameter force
526 load is set to True, then try to load the modules.
529 :param module: Module to verify.
530 :param force_load: If True then try to load module.
533 :type force_load: bool
534 :raises RuntimeError: If module is not loaded or failed to load.
536 command = 'grep -w {module} /proc/modules'.format(module=module)
537 message = 'Kernel module {module} is not loaded on host {host}'.\
538 format(module=module, host=node['host'])
541 exec_cmd_no_error(node, command, timeout=30, sudo=False,
545 # Module is not loaded and we want to load it
546 DUTSetup.load_kernel_module(node, module)
551 def verify_kernel_module_on_all_duts(nodes, module, force_load=False):
552 """Verify if kernel module is loaded on all DUTs. If parameter force
553 load is set to True, then try to load the modules.
555 :param node: DUT nodes.
556 :param module: Module to verify.
557 :param force_load: If True then try to load module.
560 :type force_load: bool
562 for node in nodes.values():
563 if node['type'] == NodeType.DUT:
564 DUTSetup.verify_kernel_module(node, module, force_load)
567 def verify_uio_driver_on_all_duts(nodes):
568 """Verify if uio driver kernel module is loaded on all DUTs. If module
569 is not present it will try to load it.
571 :param node: DUT nodes.
574 for node in nodes.values():
575 if node['type'] == NodeType.DUT:
576 uio_driver = Topology.get_uio_driver(node)
577 DUTSetup.verify_kernel_module(node, uio_driver, force_load=True)
580 def load_kernel_module(node, module):
581 """Load kernel module on node.
583 :param node: DUT node.
584 :param module: Module to load.
588 :raises RuntimeError: If loading failed.
590 command = 'modprobe {module}'.format(module=module)
591 message = 'Failed to load {module} on host {host}'.\
592 format(module=module, host=node['host'])
594 exec_cmd_no_error(node, command, timeout=30, sudo=True, message=message)
597 def install_vpp_on_all_duts(nodes, vpp_pkg_dir):
598 """Install VPP on all DUT nodes.
600 :param nodes: Nodes in the topology.
601 :param vpp_pkg_dir: Path to directory where VPP packages are stored.
603 :type vpp_pkg_dir: str
604 :raises RuntimeError: If failed to remove or install VPP.
606 for node in nodes.values():
607 message = 'Failed to install VPP on host {host}!'.\
608 format(host=node['host'])
609 if node['type'] == NodeType.DUT:
610 command = 'ln -s /dev/null /etc/sysctl.d/80-vpp.conf || true'
611 exec_cmd_no_error(node, command, sudo=True)
613 command = '. /etc/lsb-release; echo "${DISTRIB_ID}"'
614 stdout, _ = exec_cmd_no_error(node, command)
616 if stdout.strip() == 'Ubuntu':
617 exec_cmd_no_error(node, 'apt-get purge -y "*vpp*" || true',
618 timeout=120, sudo=True)
619 exec_cmd_no_error(node, 'dpkg -i --force-all {dir}*.deb'.
620 format(dir=vpp_pkg_dir), timeout=120,
621 sudo=True, message=message)
622 exec_cmd_no_error(node, 'dpkg -l | grep vpp', sudo=True)
624 exec_cmd_no_error(node, 'yum -y remove "*vpp*" || true',
625 timeout=120, sudo=True)
626 exec_cmd_no_error(node, 'rpm -ivh {dir}*.rpm'.
627 format(dir=vpp_pkg_dir), timeout=120,
628 sudo=True, message=message)
629 exec_cmd_no_error(node, 'rpm -qai *vpp*', sudo=True)
632 def running_in_container(node):
633 """This method tests if topology node is running inside container.
635 :param node: Topology node.
637 :returns: True if running in docker container, false if not or failed
641 command = "fgrep docker /proc/1/cgroup"
642 message = 'Failed to get cgroup settings.'
644 exec_cmd_no_error(node, command, timeout=30, sudo=False,
651 def get_docker_mergeddir(node, uuid):
652 """Get Docker overlay for MergedDir diff.
654 :param node: DUT node.
655 :param uuid: Docker UUID.
658 :returns: Docker container MergedDir.
660 :raises RuntimeError: If getting output failed.
662 command = "docker inspect --format='"\
663 "{{{{.GraphDriver.Data.MergedDir}}}}' {uuid}".format(uuid=uuid)
664 message = 'Failed to get directory of {uuid} on host {host}'.\
665 format(uuid=uuid, host=node['host'])
667 stdout, _ = exec_cmd_no_error(node, command, sudo=True, message=message)
668 return stdout.strip()
671 def get_huge_page_size(node):
672 """Get default size of huge pages in system.
674 :param node: Node in the topology.
676 :returns: Default size of free huge pages in system.
678 :raises RuntimeError: If reading failed for three times.
684 ret_code, stdout, _ = ssh.exec_command_sudo(
685 "grep Hugepagesize /proc/meminfo | awk '{ print $2 }'")
688 huge_size = int(stdout)
690 logger.trace('Reading huge page size information failed')
694 raise RuntimeError('Getting huge page size information failed.')
698 def get_huge_page_free(node, huge_size):
699 """Get number of free huge pages in system.
701 :param node: Node in the topology.
702 :param huge_size: Size of hugepages.
705 :returns: Number of free huge pages in system.
707 :raises RuntimeError: If reading failed for three times.
709 # TODO: add numa aware option
714 ret_code, stdout, _ = ssh.exec_command_sudo(
715 'cat /sys/kernel/mm/hugepages/hugepages-{0}kB/free_hugepages'.
719 huge_free = int(stdout)
721 logger.trace('Reading free huge pages information failed')
725 raise RuntimeError('Getting free huge pages information failed.')
729 def get_huge_page_total(node, huge_size):
730 """Get total number of huge pages in system.
732 :param node: Node in the topology.
733 :param huge_size: Size of hugepages.
737 :returns: Total number of huge pages in system.
739 :raises RuntimeError: If reading failed for three times.
741 # TODO: add numa aware option
746 ret_code, stdout, _ = ssh.exec_command_sudo(
747 'cat /sys/kernel/mm/hugepages/hugepages-{0}kB/nr_hugepages'.
751 huge_total = int(stdout)
753 logger.trace('Reading total huge pages information failed')
757 raise RuntimeError('Getting total huge pages information failed.')
761 def check_huge_page(node, huge_mnt, mem_size, allocate=False):
762 """Check if there is enough HugePages in system. If allocate is set to
763 true, try to allocate more HugePages.
765 :param node: Node in the topology.
766 :param huge_mnt: HugePage mount point.
767 :param mem_size: Requested memory in MB.
768 :param allocate: Whether to allocate more memory if not enough.
774 :raises RuntimeError: Mounting hugetlbfs failed or not enough HugePages
775 or increasing map count failed.
777 # TODO: split function into smaller parts.
781 # Get huge pages information
782 huge_size = DUTSetup.get_huge_page_size(node)
783 huge_free = DUTSetup.get_huge_page_free(node, huge_size)
784 huge_total = DUTSetup.get_huge_page_total(node, huge_size)
786 # Check if memory reqested is available on host
787 if (mem_size * 1024) > (huge_free * huge_size):
788 # If we want to allocate hugepage dynamically
790 mem_needed = (mem_size * 1024) - (huge_free * huge_size)
791 huge_to_allocate = ((mem_needed / huge_size) * 2) + huge_total
792 max_map_count = huge_to_allocate*4
793 # Increase maximum number of memory map areas a process may have
794 ret_code, _, _ = ssh.exec_command_sudo(
795 'echo "{0}" | sudo tee /proc/sys/vm/max_map_count'.
796 format(max_map_count))
797 if int(ret_code) != 0:
798 raise RuntimeError('Increase map count failed on {host}'.
799 format(host=node['host']))
800 # Increase hugepage count
801 ret_code, _, _ = ssh.exec_command_sudo(
802 'echo "{0}" | sudo tee /proc/sys/vm/nr_hugepages'.
803 format(huge_to_allocate))
804 if int(ret_code) != 0:
805 raise RuntimeError('Mount huge pages failed on {host}'.
806 format(host=node['host']))
807 # If we do not want to allocate dynamicaly end with error
809 raise RuntimeError('Not enough free huge pages: {0}, {1} MB'.
810 format(huge_free, huge_free * huge_size))
811 # Check if huge pages mount point exist
813 ret_code, stdout, _ = ssh.exec_command('cat /proc/mounts')
814 if int(ret_code) == 0:
815 for line in stdout.splitlines():
816 # Try to find something like:
817 # none /mnt/huge hugetlbfs rw,relatime,pagesize=2048k 0 0
819 if mount[2] == 'hugetlbfs' and mount[1] == huge_mnt:
822 # If huge page mount point not exist create one
824 ret_code, _, _ = ssh.exec_command_sudo(
825 'mkdir -p {mnt}'.format(mnt=huge_mnt))
826 if int(ret_code) != 0:
827 raise RuntimeError('Create mount dir failed on {host}'.
828 format(host=node['host']))
829 ret_code, _, _ = ssh.exec_command_sudo(
830 'mount -t hugetlbfs -o pagesize=2048k none {mnt}'.
831 format(mnt=huge_mnt))
832 if int(ret_code) != 0:
833 raise RuntimeError('Mount huge pages failed on {host}'.
834 format(host=node['host']))