X-Git-Url: https://gerrit.fd.io/r/gitweb?p=csit.git;a=blobdiff_plain;f=resources%2Flibraries%2Fpython%2FContainerUtils.py;h=3d70684695a388e829e5f6100df72ffa30e10713;hp=6e413c64f023b8e4295c834063cbd4e49aaaa5d3;hb=HEAD;hpb=dee46d5cedf38248eaf6a54dd273e93a592007b0 diff --git a/resources/libraries/python/ContainerUtils.py b/resources/libraries/python/ContainerUtils.py index 6e413c64f0..fc32248f6b 100644 --- a/resources/libraries/python/ContainerUtils.py +++ b/resources/libraries/python/ContainerUtils.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019 Cisco and/or its affiliates. +# Copyright (c) 2024 Cisco and/or its affiliates. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at: @@ -23,9 +23,11 @@ from robot.libraries.BuiltIn import BuiltIn from resources.libraries.python.Constants import Constants from resources.libraries.python.CpuUtils import CpuUtils +from resources.libraries.python.PapiExecutor import PapiSocketExecutor from resources.libraries.python.ssh import SSH from resources.libraries.python.topology import Topology, SocketType from resources.libraries.python.VppConfigGenerator import VppConfigGenerator +from resources.libraries.python.VPPUtil import VPPUtil __all__ = [ @@ -141,23 +143,52 @@ class ContainerManager: self.engine.container = self.containers[container] self.engine.execute(command) - def start_vpp_in_all_containers(self): + def start_vpp_in_all_containers(self, verify=True): """Start VPP in all containers.""" for container in self.containers: self.engine.container = self.containers[container] - self.engine.start_vpp() + # For multiple containers, delayed verify is faster. + self.engine.start_vpp(verify=False) + if verify: + self.verify_vpp_in_all_containers() - def restart_vpp_in_all_containers(self): + def _disconnect_papi_to_all_containers(self): + """Disconnect any open PAPI connections to VPPs in containers. + + The current PAPI implementation caches open connections, + so explicit disconnect is needed before VPP becomes inaccessible. + + Currently this is a protected method, as restart, stop and destroy + are the only dangerous methods, and all are handled by ContainerManager. + """ + for container_object in self.containers.values(): + PapiSocketExecutor.disconnect_by_node_and_socket( + container_object.node, + container_object.api_socket, + ) + + def restart_vpp_in_all_containers(self, verify=True): """Restart VPP in all containers.""" + self._disconnect_papi_to_all_containers() for container in self.containers: self.engine.container = self.containers[container] - self.engine.restart_vpp() + # For multiple containers, delayed verify is faster. + self.engine.restart_vpp(verify=False) + if verify: + self.verify_vpp_in_all_containers() def verify_vpp_in_all_containers(self): """Verify that VPP is installed and running in all containers.""" + # For multiple containers, multiple fors are faster. for container in self.containers: self.engine.container = self.containers[container] - self.engine.verify_vpp() + self.engine.verify_vppctl() + for container in self.containers: + self.engine.container = self.containers[container] + self.engine.adjust_privileges() + for container in self.containers: + self.engine.container = self.containers[container] + self.engine.verify_vpp_papi() def configure_vpp_in_all_containers(self, chain_topology, **kwargs): """Configure VPP in all containers. @@ -174,8 +205,8 @@ class ContainerManager: dut_cnt = len( Counter( [ - self.containers[container].node[u"host"] - for container in self.containers + f"{container.node['host']}{container.node['port']}" + for container in self.containers.values() ] ) ) @@ -225,6 +256,11 @@ class ContainerManager: self._configure_vpp_chain_ipsec( mid1=mid1, mid2=mid2, sid1=sid1, sid2=sid2, guest_dir=guest_dir, nf_instance=idx, **kwargs) + elif chain_topology == u"chain_dma": + self._configure_vpp_chain_dma( + mid1=mid1, mid2=mid2, sid1=sid1, sid2=sid2, + guest_dir=guest_dir, **kwargs + ) else: raise RuntimeError( f"Container topology {chain_topology} not implemented" @@ -247,6 +283,25 @@ class ContainerManager: f"{self.engine.container.name}-{kwargs[u'sid2']}" ) + def _configure_vpp_chain_dma(self, **kwargs): + """Configure VPP in chain topology with l2xc (dma). + + :param kwargs: Named parameters. + :type kwargs: dict + """ + dma_wqs = kwargs[f"dma_wqs"] + self.engine.create_vpp_startup_config_dma(dma_wqs) + + self.engine.create_vpp_exec_config( + u"memif_create_chain_dma.exec", + mid1=kwargs[u"mid1"], mid2=kwargs[u"mid2"], + sid1=kwargs[u"sid1"], sid2=kwargs[u"sid2"], + socket1=f"{kwargs[u'guest_dir']}/memif-" + f"{self.engine.container.name}-{kwargs[u'sid1']}", + socket2=f"{kwargs[u'guest_dir']}/memif-" + f"{self.engine.container.name}-{kwargs[u'sid2']}" + ) + def _configure_vpp_cross_horiz(self, **kwargs): """Configure VPP in cross horizontal topology (single memif). @@ -297,10 +352,10 @@ class ContainerManager: """ self.engine.create_vpp_startup_config() - vif1_mac = kwargs[u"tg_if1_mac"] \ + vif1_mac = kwargs[u"tg_pf1_mac"] \ if (kwargs[u"mid1"] - 1) % kwargs[u"nodes"] + 1 == 1 \ else f"52:54:00:00:{(kwargs[u'mid1'] - 1):02X}:02" - vif2_mac = kwargs[u"tg_if2_mac"] \ + vif2_mac = kwargs[u"tg_pf2_mac"] \ if (kwargs[u"mid2"] - 1) % kwargs[u"nodes"] + 1 == kwargs[u"nodes"]\ else f"52:54:00:00:{(kwargs['mid2'] + 1):02X}:01" self.engine.create_vpp_exec_config( @@ -332,11 +387,11 @@ class ContainerManager: self.engine.container.node, kwargs[u"dut1_if2"]) if_black_name = Topology.get_interface_name( self.engine.container.node, kwargs[u"dut1_if1"]) - tg_if_ip4 = kwargs[u"tg_if2_ip4"] - tg_if_mac = kwargs[u"tg_if2_mac"] + tg_pf_ip4 = kwargs[u"tg_pf2_ip4"] + tg_pf_mac = kwargs[u"tg_pf2_mac"] else: - tg_if_ip4 = kwargs[u"tg_if1_ip4"] - tg_if_mac = kwargs[u"tg_if1_mac"] + tg_pf_ip4 = kwargs[u"tg_pf1_ip4"] + tg_pf_mac = kwargs[u"tg_pf1_mac"] if1_pci = Topology.get_interface_pci_addr( self.engine.container.node, kwargs[u"dut2_if1"]) if2_pci = Topology.get_interface_pci_addr( @@ -368,7 +423,7 @@ class ContainerManager: f"create interface memif id {i} socket-id 2 master\n" f"set interface state memif2/{i} up\n" f"set interface l2 bridge memif2/{i} 2\n" - f"set ip arp memif2/{i} {tg_if_ip4} {tg_if_mac} " + f"set ip neighbor memif2/{i} {tg_pf_ip4} {tg_pf_mac} " f"static\n\n" ) @@ -401,8 +456,8 @@ class ContainerManager: tnl_local_ip = f"{local_ip_base}.{nf_instance + 100}" tnl_remote_ip = f"{local_ip_base}.{nf_instance}" remote_ip_base = kwargs[u"dut1_if1_ip4"].rsplit(u".", 1)[0] - tg_if_ip4 = kwargs[u"tg_if1_ip4"] - tg_if_mac = kwargs[u"tg_if1_mac"] + tg_pf_ip4 = kwargs[u"tg_pf1_ip4"] + tg_pf_mac = kwargs[u"tg_pf1_mac"] raddr_ip4 = kwargs[u"laddr_ip4"] l_mac1 = 17 l_mac2 = 18 @@ -411,8 +466,8 @@ class ContainerManager: tnl_local_ip = f"{local_ip_base}.{nf_instance}" tnl_remote_ip = f"{local_ip_base}.{nf_instance + 100}" remote_ip_base = kwargs[u"dut2_if2_ip4"].rsplit(u".", 1)[0] - tg_if_ip4 = kwargs[u"tg_if2_ip4"] - tg_if_mac = kwargs[u"tg_if2_mac"] + tg_pf_ip4 = kwargs[u"tg_pf2_ip4"] + tg_pf_mac = kwargs[u"tg_pf2_mac"] raddr_ip4 = kwargs[u"raddr_ip4"] l_mac1 = 1 l_mac2 = 2 @@ -428,8 +483,8 @@ class ContainerManager: sid2=u"2", mac1=f"02:02:00:00:{l_mac1:02X}:{(nf_instance - 1):02X}", mac2=f"02:02:00:00:{l_mac2:02X}:{(nf_instance - 1):02X}", - tg_if2_ip4=tg_if_ip4, - tg_if2_mac=tg_if_mac, + tg_pf2_ip4=tg_pf_ip4, + tg_pf2_mac=tg_pf_mac, raddr_ip4=raddr_ip4, tnl_local_ip=tnl_local_ip, tnl_remote_ip=tnl_remote_ip, @@ -455,10 +510,10 @@ class ContainerManager: role2 = u"master" if node == kwargs[u"nodes"] else u"slave" kwargs[u"mid2"] = kwargs[u"mid2"] \ if node == kwargs[u"nodes"] else kwargs[u"mid2"] + 1 - vif1_mac = kwargs[u"tg_if1_mac"] \ + vif1_mac = kwargs[u"tg_pf1_mac"] \ if (kwargs[u"mid1"] - 1) % kwargs[u"nodes"] + 1 == 1 \ else f"52:54:00:00:{(kwargs[u'mid1'] - 1):02X}:02" - vif2_mac = kwargs[u"tg_if2_mac"] \ + vif2_mac = kwargs[u"tg_pf2_mac"] \ if (kwargs[u"mid2"] - 1) % kwargs[u"nodes"] + 1 == kwargs[u"nodes"]\ else f"52:54:00:00:{(kwargs[u'mid2'] + 1):02X}:01" socket1 = f"{kwargs[u'guest_dir']}/memif-{self.engine.container.name}-"\ @@ -481,12 +536,16 @@ class ContainerManager: def stop_all_containers(self): """Stop all containers.""" + # TODO: Rework if containers can be affected outside ContainerManager. + self._disconnect_papi_to_all_containers() for container in self.containers: self.engine.container = self.containers[container] self.engine.stop() def destroy_all_containers(self): """Destroy all containers.""" + # TODO: Rework if containers can be affected outside ContainerManager. + self._disconnect_papi_to_all_containers() for container in self.containers: self.engine.container = self.containers[container] self.engine.destroy() @@ -543,54 +602,105 @@ class ContainerEngine: """System info.""" raise NotImplementedError - def start_vpp(self): + def start_vpp(self, verify=True): """Start VPP inside a container.""" self.execute( - u"setsid /usr/bin/vpp -c /etc/vpp/startup.conf " - u">/tmp/vppd.log 2>&1 < /dev/null &") + u"/usr/bin/vpp -c /etc/vpp/startup.conf") topo_instance = BuiltIn().get_library_instance( u"resources.libraries.python.topology.Topology" ) + topo_instance.add_new_socket( + self.container.node, + SocketType.CLI, + self.container.name, + self.container.cli_socket, + ) topo_instance.add_new_socket( self.container.node, SocketType.PAPI, self.container.name, - f"/tmp/vpp_sockets/{self.container.name}/api.sock" + self.container.api_socket, ) topo_instance.add_new_socket( self.container.node, SocketType.STATS, self.container.name, - f"/tmp/vpp_sockets/{self.container.name}/stats.sock" + self.container.stats_socket, ) + if verify: + self.verify_vpp() - def restart_vpp(self): + def restart_vpp(self, verify=True): """Restart VPP service inside a container.""" self.execute(u"pkill vpp") - self.start_vpp() - self.execute(u"cat /tmp/vppd.log") + self.start_vpp(verify=verify) + + def verify_vpp(self): + """Verify VPP is running and ready.""" + self.verify_vppctl() + self.adjust_privileges() + self.verify_vpp_papi() # TODO Rewrite to use the VPPUtil.py functionality and remove this. - def verify_vpp(self, retries=120, retry_wait=1): + def verify_vppctl(self, retries=120, retry_wait=1): """Verify that VPP is installed and running inside container. + This function waits a while so VPP can start. + PCI interfaces are listed for debug purposes. + When the check passes, VPP API socket is created on remote side, + but perhaps its directory does not have the correct access rights yet. + :param retries: Check for VPP for this number of times Default: 120 :param retry_wait: Wait for this number of seconds between retries. """ - cmd = (u"vppctl show pci 2>&1 | " - u"fgrep -v 'Connection refused' | " - u"fgrep -v 'No such file or directory'") + for _ in range(retries + 1): + try: + # Execute puts the command into single quotes, + # so inner arguments are enclosed in qouble quotes here. + self.execute( + u'/usr/bin/vppctl show pci 2>&1 | ' + u'fgrep -v "Connection refused" | ' + u'fgrep -v "No such file or directory"' + ) + break + except (RuntimeError, AssertionError): + sleep(retry_wait) + else: + self.execute(u"cat /tmp/vppd.log") + raise RuntimeError( + f"VPP did not come up in container: {self.container.name}" + ) + + def adjust_privileges(self): + """Adjust privileges to control VPP without sudo.""" + self.execute("chmod -R o+rwx /run/vpp") + + def verify_vpp_papi(self, retries=120, retry_wait=1): + """Verify that VPP is available for PAPI. + This also opens and caches PAPI connection for quick reuse. + The connection is disconnected when ContainerManager decides to do so. + + :param retries: Check for VPP for this number of times Default: 120 + :param retry_wait: Wait for this number of seconds between retries. + """ + # Wait for success. for _ in range(retries + 1): try: - self.execute(cmd) + VPPUtil.vpp_show_version( + node=self.container.node, + remote_vpp_socket=self.container.api_socket, + log=False, + ) break - except RuntimeError: + except (RuntimeError, AssertionError): sleep(retry_wait) else: - msg = f"VPP did not come up in container: {self.container.name}" - raise RuntimeError(msg) + self.execute(u"cat /tmp/vppd.log") + raise RuntimeError( + f"VPP PAPI fails in container: {self.container.name}" + ) def create_base_vpp_startup_config(self, cpuset_cpus=None): """Create base startup configuration of VPP on container. @@ -607,10 +717,8 @@ class ContainerEngine: vpp_config = VppConfigGenerator() vpp_config.set_node(self.container.node) vpp_config.add_unix_cli_listen() - vpp_config.add_unix_nodaemon() vpp_config.add_unix_exec(u"/tmp/running.exec") vpp_config.add_socksvr(socket=Constants.SOCKSVR_PATH) - vpp_config.add_statseg_per_node_counters(value=u"on") if cpuset_cpus: # We will pop the first core from the list to be a main core vpp_config.add_cpu_main_core(str(cpuset_cpus.pop(0))) @@ -620,9 +728,13 @@ class ContainerEngine: vpp_config.add_buffers_per_numa(215040) vpp_config.add_plugin(u"disable", u"default") vpp_config.add_plugin(u"enable", u"memif_plugin.so") - vpp_config.add_heapsize(u"4G") - vpp_config.add_ip_heap_size(u"4G") - vpp_config.add_statseg_size(u"4G") + vpp_config.add_plugin(u"enable", u"perfmon_plugin.so") + vpp_config.add_main_heap_size(u"2G") + vpp_config.add_main_heap_page_size(self.container.page_size) + vpp_config.add_default_hugepage_size(self.container.page_size) + vpp_config.add_statseg_size(u"2G") + vpp_config.add_statseg_page_size(self.container.page_size) + vpp_config.add_statseg_per_node_counters(u"on") return vpp_config @@ -654,6 +766,7 @@ class ContainerEngine: vpp_config.add_dpdk_no_tx_checksum_offload() vpp_config.add_dpdk_dev_default_rxq(rxq) vpp_config.add_plugin(u"enable", u"dpdk_plugin.so") + vpp_config.add_plugin(u"enable", u"perfmon_plugin.so") # Apply configuration self.execute(u"mkdir -p /etc/vpp/") @@ -668,9 +781,26 @@ class ContainerEngine: :type cpuset_cpus: list """ vpp_config = self.create_base_vpp_startup_config(cpuset_cpus) - vpp_config.add_plugin(u"enable", u"crypto_ia32_plugin.so") + vpp_config.add_plugin(u"enable", u"crypto_native_plugin.so") vpp_config.add_plugin(u"enable", u"crypto_ipsecmb_plugin.so") vpp_config.add_plugin(u"enable", u"crypto_openssl_plugin.so") + vpp_config.add_plugin(u"enable", u"perfmon_plugin.so") + + # Apply configuration + self.execute(u"mkdir -p /etc/vpp/") + self.execute( + f'echo "{vpp_config.get_config_str()}" | tee /etc/vpp/startup.conf' + ) + + def create_vpp_startup_config_dma(self, dma_devices): + """Create startup configuration of VPP DMA. + + :param dma_devices: DMA devices list. + :type dma_devices: list + """ + vpp_config = self.create_base_vpp_startup_config() + vpp_config.add_plugin(u"enable", u"dma_intel_plugin.so") + vpp_config.add_dma_dev(dma_devices) # Apply configuration self.execute(u"mkdir -p /etc/vpp/") @@ -689,7 +819,7 @@ class ContainerEngine: running = u"/tmp/running.exec" template = f"{Constants.RESOURCES_TPL_CONTAINER}/{template_file}" - with open(template, "r") as src_file: + with open(template, u"rt") as src_file: src = Template(src_file.read()) self.execute(f'echo "{src.safe_substitute(**kwargs)}" > {running}') @@ -715,18 +845,6 @@ class ContainerEngine: :type name: str :raises RuntimeError: If applying cgroup settings via cgset failed. """ - ret, _, _ = self.container.ssh.exec_command_sudo( - u"cgset -r cpuset.cpu_exclusive=0 /" - ) - if int(ret) != 0: - raise RuntimeError(u"Failed to apply cgroup settings.") - - ret, _, _ = self.container.ssh.exec_command_sudo( - u"cgset -r cpuset.mem_exclusive=0 /" - ) - if int(ret) != 0: - raise RuntimeError(u"Failed to apply cgroup settings.") - ret, _, _ = self.container.ssh.exec_command_sudo( f"cgcreate -g cpuset:/{name}" ) @@ -734,13 +852,13 @@ class ContainerEngine: raise RuntimeError(u"Failed to copy cgroup settings from root.") ret, _, _ = self.container.ssh.exec_command_sudo( - f"cgset -r cpuset.cpu_exclusive=0 /{name}" + f"cgset -r cpuset.cpus=0 /{name}" ) if int(ret) != 0: raise RuntimeError(u"Failed to apply cgroup settings.") ret, _, _ = self.container.ssh.exec_command_sudo( - f"cgset -r cpuset.mem_exclusive=0 /{name}" + f"cgset -r cpuset.mems=0 /{name}" ) if int(ret) != 0: raise RuntimeError(u"Failed to apply cgroup settings.") @@ -771,7 +889,7 @@ class LXC(ContainerEngine): else u"amd64" image = self.container.image if self.container.image \ - else f"-d ubuntu -r bionic -a {target_arch}" + else f"-d ubuntu -r jammy -a {target_arch}" cmd = f"lxc-create -t download --name {self.container.name} " \ f"-- {image} --no-validate" @@ -993,13 +1111,13 @@ class Docker(ContainerEngine): else Constants.DOCKER_SUT_IMAGE_UBUNTU setattr(self.container, u"image", img) - cmd = f"docker pull {self.container.image}" - - ret, _, _ = self.container.ssh.exec_command_sudo(cmd, timeout=1800) - if int(ret) != 0: - raise RuntimeError( - f"Failed to create container {self.container.name}." - ) + if "/" in self.container.image: + cmd = f"docker pull {self.container.image}" + ret, _, _ = self.container.ssh.exec_command_sudo(cmd, timeout=1800) + if int(ret) != 0: + raise RuntimeError( + f"Failed to create container {self.container.name}." + ) if self.container.cpuset_cpus: self._configure_cgroup(u"docker") @@ -1036,8 +1154,8 @@ class Docker(ContainerEngine): if self.container.mnt else u"" cmd = f"docker run --privileged --detach --interactive --tty --rm " \ - f"--cgroup-parent docker {cpuset_cpus} {cpuset_mems} {publish} " \ - f"{env} {volume} --name {self.container.name} " \ + f"--cgroup-parent docker.slice {cpuset_cpus} {cpuset_mems} " \ + f"{publish} {env} {volume} --name {self.container.name} " \ f"{self.container.image} {command}" ret, _, _ = self.container.ssh.exec_command_sudo(cmd) @@ -1180,8 +1298,19 @@ class Container: except KeyError: # Creating new attribute if attr == u"node": + # Create and cache a connected SSH instance. self.__dict__[u"ssh"] = SSH() self.__dict__[u"ssh"].connect(value) + elif attr == u"name": + # Socket paths to not have mutable state, + # this just saves some horizontal space in callers. + # TODO: Rename the dir so other apps can add sockets easily. + # E.g. f"/tmp/app_sockets/{value}/vpp_api.sock" + path = f"/tmp/vpp_sockets/{value}" + self.__dict__[u"socket_dir"] = path + self.__dict__[u"api_socket"] = f"{path}/api.sock" + self.__dict__[u"cli_socket"] = f"{path}/cli.sock" + self.__dict__[u"stats_socket"] = f"{path}/stats.sock" self.__dict__[attr] = value else: # Updating attribute base of type