From: Peter Mikus Date: Thu, 30 Jan 2020 13:45:11 +0000 (+0000) Subject: T-Rex: CPU pinning X-Git-Url: https://gerrit.fd.io/r/gitweb?p=csit.git;a=commitdiff_plain;h=6bcf4d40d83bbf026f9fd0105bebf579423c65a6 T-Rex: CPU pinning + Detect NUMA + Pin based on numa location Signed-off-by: Peter Mikus Change-Id: Ife350f8c70e5437ac7c1413c7753f2a2f62777d9 --- diff --git a/resources/libraries/python/CpuUtils.py b/resources/libraries/python/CpuUtils.py index 842c16d7ef..e4fff010f1 100644 --- a/resources/libraries/python/CpuUtils.py +++ b/resources/libraries/python/CpuUtils.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019 Cisco and/or its affiliates. +# Copyright (c) 2020 Cisco and/or its affiliates. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at: @@ -141,7 +141,7 @@ class CpuUtils: @staticmethod def cpu_slice_of_list_per_node( node, cpu_node, skip_cnt=0, cpu_cnt=0, smt_used=False): - """Return string of node related list of CPU numbers. + """Return node related subset of list of CPU numbers. :param node: Node dictionary with cpuinfo. :param cpu_node: Numa node number. @@ -283,12 +283,6 @@ class CpuUtils: dtc_is_integer = isinstance(nf_dtc, int) if not smt_used and not dtc_is_integer: raise RuntimeError(u"Cannot allocate if SMT is not enabled!") - # TODO: Please reword the following todo if it is still relevant - # TODO: Workaround as we are using physical core as main unit, we must - # adjust number of physical dataplane cores in case of float for further - # array referencing. As rounding method in Py2.7 and Py3.x differs, we - # are using static mapping. This can be rewritten using flat arrays and - # different logic (from Physical core unit to Logical core unit). if not dtc_is_integer: nf_dtc = 1 @@ -361,3 +355,40 @@ class CpuUtils: nf_nodes=nf_nodes, nf_chain=nf_chain, nf_node=nf_node, nf_mtcr=nf_mtcr, nf_dtcr=nf_dtcr, nf_dtc=nf_dtc, skip_cnt=skip_cnt ) + + @staticmethod + def get_affinity_trex( + node, if1_pci, if2_pci, tg_mtc=1, tg_dtc=1, tg_ltc=1): + """Get affinity for T-Rex. Result will be used to pin T-Rex threads. + + :param node: TG node. + :param if1_pci: TG first interface. + :param if2_pci: TG second interface. + :param tg_mtc: TG main thread count. + :param tg_dtc: TG dataplane thread count. + :param tg_ltc: TG latency thread count. + :type node: dict + :type if1_pci: str + :type if2_pci: str + :type tg_mtc: int + :type tg_dtc: int + :type tg_ltc: int + :returns: List of CPUs allocated to T-Rex including numa node. + :rtype: int, int, int, list + """ + interface_list = [if1_pci, if2_pci] + cpu_node = Topology.get_interfaces_numa_node(node, *interface_list) + + master_thread_id = CpuUtils.cpu_slice_of_list_per_node( + node, cpu_node, skip_cnt=0, cpu_cnt=tg_mtc, + smt_used=False) + + threads = CpuUtils.cpu_slice_of_list_per_node( + node, cpu_node, skip_cnt=tg_mtc, cpu_cnt=tg_dtc, + smt_used=False) + + latency_thread_id = CpuUtils.cpu_slice_of_list_per_node( + node, cpu_node, skip_cnt=tg_mtc + tg_dtc, cpu_cnt=tg_ltc, + smt_used=False) + + return master_thread_id[0], latency_thread_id[0], cpu_node, threads diff --git a/resources/libraries/python/InterfaceUtil.py b/resources/libraries/python/InterfaceUtil.py index 0f18f8f807..3e2e38ef81 100644 --- a/resources/libraries/python/InterfaceUtil.py +++ b/resources/libraries/python/InterfaceUtil.py @@ -113,8 +113,6 @@ class RdmaMode(IntEnum): class InterfaceUtil: """General utilities for managing interfaces""" - __UDEV_IF_RULES_FILE = u"/etc/udev/rules.d/10-network.rules" - @staticmethod def pci_to_int(pci_str): """Convert PCI address from string format (0000:18:0a.0) to @@ -541,45 +539,6 @@ class InterfaceUtil: """ return DUTSetup.get_pci_dev_driver(node, pci_addr) - @staticmethod - def tg_set_interfaces_udev_rules(node): - """Set udev rules for interfaces. - - Create udev rules file in /etc/udev/rules.d where are rules for each - interface used by TG node, based on MAC interface has specific name. - So after unbind and bind again to kernel driver interface has same - name as before. This must be called after TG has set name for each - port in topology dictionary. - udev rule example - SUBSYSTEM=="net", ACTION=="add", ATTR{address}=="52:54:00:e1:8a:0f", - NAME="eth1" - - :param node: Node to set udev rules on (must be TG node). - :type node: dict - :raises RuntimeError: If setting of udev rules fails. - """ - ssh = SSH() - ssh.connect(node) - - cmd = f"rm -f {InterfaceUtil.__UDEV_IF_RULES_FILE}" - ret_code, _, _ = ssh.exec_command_sudo(cmd) - if int(ret_code) != 0: - raise RuntimeError(f"'{cmd}' failed on '{node[u'host']}'") - - for interface in node[u"interfaces"].values(): - rule = u'SUBSYSTEM==\\"net\\", ACTION==\\"add\\", ATTR{address}' + \ - u'==\\"' + interface[u"mac_address"] + u'\\", NAME=\\"' + \ - interface[u"name"] + u'\\"' - cmd = f"sh -c \"echo '{rule}'\" >> " \ - f"{InterfaceUtil.__UDEV_IF_RULES_FILE}'" - - ret_code, _, _ = ssh.exec_command_sudo(cmd) - if int(ret_code) != 0: - raise RuntimeError(f"'{cmd}' failed on '{node[u'host']}'") - - cmd = u"/etc/init.d/udev restart" - ssh.exec_command_sudo(cmd) - @staticmethod def tg_set_interfaces_default_driver(node): """Set interfaces default driver specified in topology yaml file. @@ -667,7 +626,7 @@ class InterfaceUtil: InterfaceUtil.update_nic_interface_names(node) @staticmethod - def update_tg_interface_data_on_node(node, skip_tg_udev=False): + def update_tg_interface_data_on_node(node): """Update interface name for TG/linux node in DICT__nodes. .. note:: @@ -679,9 +638,7 @@ class InterfaceUtil: "00:00:00:00:00:00": "lo" :param node: Node selected from DICT__nodes. - :param skip_tg_udev: Skip udev rename on TG node. :type node: dict - :type skip_tg_udev: bool :raises RuntimeError: If getting of interface name and MAC fails. """ # First setup interface driver specified in yaml file @@ -706,10 +663,6 @@ class InterfaceUtil: continue interface[u"name"] = name - # Set udev rules for interfaces - if not skip_tg_udev: - InterfaceUtil.tg_set_interfaces_udev_rules(node) - @staticmethod def iface_update_numa_node(node): """For all interfaces from topology file update numa node based on @@ -751,26 +704,9 @@ class InterfaceUtil: else: raise RuntimeError(f"Update numa node failed for: {if_pci}") - @staticmethod - def update_all_numa_nodes(nodes, skip_tg=False): - """For all nodes and all their interfaces from topology file update numa - node information based on information from the node. - - :param nodes: Nodes in the topology. - :param skip_tg: Skip TG node - :type nodes: dict - :type skip_tg: bool - :returns: Nothing. - """ - for node in nodes.values(): - if node[u"type"] == NodeType.DUT: - InterfaceUtil.iface_update_numa_node(node) - elif node[u"type"] == NodeType.TG and not skip_tg: - InterfaceUtil.iface_update_numa_node(node) - @staticmethod def update_all_interface_data_on_all_nodes( - nodes, skip_tg=False, skip_tg_udev=False, numa_node=False): + nodes, skip_tg=False, skip_vpp=False): """Update interface names on all nodes in DICT__nodes. This method updates the topology dictionary by querying interface lists @@ -778,25 +714,17 @@ class InterfaceUtil: :param nodes: Nodes in the topology. :param skip_tg: Skip TG node. - :param skip_tg_udev: Skip udev rename on TG node. - :param numa_node: Retrieve numa_node location. + :param skip_vpp: Skip VPP node. :type nodes: dict :type skip_tg: bool - :type skip_tg_udev: bool - :type numa_node: bool + :type skip_vpp: bool """ - for node_data in nodes.values(): - if node_data[u"type"] == NodeType.DUT: - InterfaceUtil.update_vpp_interface_data_on_node(node_data) - elif node_data[u"type"] == NodeType.TG and not skip_tg: - InterfaceUtil.update_tg_interface_data_on_node( - node_data, skip_tg_udev) - - if numa_node: - if node_data[u"type"] == NodeType.DUT: - InterfaceUtil.iface_update_numa_node(node_data) - elif node_data[u"type"] == NodeType.TG and not skip_tg: - InterfaceUtil.iface_update_numa_node(node_data) + for node in nodes.values(): + if node[u"type"] == NodeType.DUT and not skip_vpp: + InterfaceUtil.update_vpp_interface_data_on_node(node) + elif node[u"type"] == NodeType.TG and not skip_tg: + InterfaceUtil.update_tg_interface_data_on_node(node) + InterfaceUtil.iface_update_numa_node(node) @staticmethod def create_vlan_subinterface(node, interface, vlan): diff --git a/resources/libraries/python/TrafficGenerator.py b/resources/libraries/python/TrafficGenerator.py index 8976dff348..1b519d5713 100644 --- a/resources/libraries/python/TrafficGenerator.py +++ b/resources/libraries/python/TrafficGenerator.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019 Cisco and/or its affiliates. +# Copyright (c) 2020 Cisco and/or its affiliates. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at: @@ -19,6 +19,7 @@ from robot.api import logger from robot.libraries.BuiltIn import BuiltIn from .Constants import Constants +from .CpuUtils import CpuUtils from .DropRateSearch import DropRateSearch from .MLRsearch.AbstractMeasurer import AbstractMeasurer from .MLRsearch.MultipleLossRatioSearch import MultipleLossRatioSearch @@ -278,6 +279,11 @@ class TrafficGenerator(AbstractMeasurer): if1_adj_addr, if2_adj_addr = if2_adj_addr, if1_adj_addr self._ifaces_reordered = True + master_thread_id, latency_thread_id, socket, threads = \ + CpuUtils.get_affinity_trex( + self._node, tg_if1, tg_if2, + tg_dtc=Constants.TREX_CORE_COUNT) + if osi_layer in (u"L2", u"L3"): dst_mac0 = f"0x{if1_adj_addr.replace(u':', u',0x')}" src_mac0 = f"0x{if1_addr.replace(u':', u',0x')}" @@ -287,6 +293,7 @@ class TrafficGenerator(AbstractMeasurer): self._node, f"sh -c 'cat << EOF > /etc/trex_cfg.yaml\n" f"- version: 2\n" + f" c: {len(threads)}\n" f" limit_memory: {Constants.TREX_LIMIT_MEMORY}\n" f" interfaces: [\"{if1_pci}\",\"{if2_pci}\"]\n" f" port_info:\n" @@ -294,14 +301,21 @@ class TrafficGenerator(AbstractMeasurer): f" src_mac: [{src_mac0}]\n" f" - dest_mac: [{dst_mac1}]\n" f" src_mac: [{src_mac1}]\n" + f" platform :\n" + f" master_thread_id: {master_thread_id}\n" + f" latency_thread_id: {latency_thread_id}\n" + f" dual_if:\n" + f" - socket: {socket}\n" + f" threads: {threads}\n" f"EOF'", - sudo=True, message=u"TRex config generation error" + sudo=True, message=u"TRex config generation!" ) elif osi_layer == u"L7": exec_cmd_no_error( self._node, f"sh -c 'cat << EOF > /etc/trex_cfg.yaml\n" f"- version: 2\n" + f" c: {len(threads)}\n" f" limit_memory: {Constants.TREX_LIMIT_MEMORY}\n" f" interfaces: [\"{if1_pci}\",\"{if2_pci}\"]\n" f" port_info:\n" @@ -309,11 +323,17 @@ class TrafficGenerator(AbstractMeasurer): f" default_gw: [{if1_adj_addr}]\n" f" - ip: [{if2_addr}]\n" f" default_gw: [{if2_adj_addr}]\n" + f" platform :\n" + f" master_thread_id: {master_thread_id}\n" + f" latency_thread_id: {latency_thread_id}\n" + f" dual_if:\n" + f" - socket: {socket}\n" + f" threads: {threads}\n" f"EOF'", - sudo=True, message=u"TRex config generation error" + sudo=True, message=u"TRex config generation!" ) else: - raise ValueError(u"Unknown Test Type") + raise ValueError(u"Unknown Test Type!") self._startup_trex(osi_layer) @@ -348,7 +368,6 @@ class TrafficGenerator(AbstractMeasurer): cd_cmd = f"cd '{Constants.TREX_INSTALL_DIR}/scripts/'" trex_cmd = OptionString([u"nohup", u"./t-rex-64"]) trex_cmd.add(u"-i") - trex_cmd.add(f"-c {Constants.TREX_CORE_COUNT}") trex_cmd.add(u"--prefix $(hostname)") trex_cmd.add(u"--hdrh") trex_cmd.add(u"--no-scapy-server") @@ -392,8 +411,6 @@ class TrafficGenerator(AbstractMeasurer): :rtype: bool :raises RuntimeError: If node type is not a TG. """ - # No need to check subtype, we know it is TREX. - ret, _, _ = exec_cmd(node, u"pidof t-rex", sudo=True) return bool(int(ret) == 0) diff --git a/resources/tools/testbed-setup/ansible/roles/cleanup/tasks/tg.yaml b/resources/tools/testbed-setup/ansible/roles/cleanup/tasks/tg.yaml index f58cb59a1a..a026ec2acd 100644 --- a/resources/tools/testbed-setup/ansible/roles/cleanup/tasks/tg.yaml +++ b/resources/tools/testbed-setup/ansible/roles/cleanup/tasks/tg.yaml @@ -5,6 +5,7 @@ import_tasks: kill_process.yaml vars: process: "_t-rex" + when: docker_tg is undefined tags: kill-process - name: Kill processes - WRK @@ -12,3 +13,4 @@ vars: process: "wrk" tags: kill-process + when: docker_tg is undefined \ No newline at end of file diff --git a/resources/tools/testbed-setup/ansible/roles/tg/files/csit-initialize-docker-tg.sh b/resources/tools/testbed-setup/ansible/roles/tg/files/csit-initialize-docker-tg.sh index 0c6dbee492..2d307b1c38 100755 --- a/resources/tools/testbed-setup/ansible/roles/tg/files/csit-initialize-docker-tg.sh +++ b/resources/tools/testbed-setup/ansible/roles/tg/files/csit-initialize-docker-tg.sh @@ -1,6 +1,6 @@ #!/usr/bin/env bash -# Copyright (c) 2019 Cisco and/or its affiliates. +# Copyright (c) 2020 Cisco and/or its affiliates. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at: @@ -21,6 +21,7 @@ case "${1:-start}" in "start" ) # Run TG for cnt in $(seq 1 ${2:-1}); do + docker network create --driver bridge csit-nw-tg${cnt} # If the IMAGE is not already loaded then docker run will pull the # IMAGE, and all image dependencies, before it starts the container. dcr_image="snergster/csit-sut:latest" @@ -47,10 +48,11 @@ case "${1:-start}" in dcr_stc_params+="--volume /dev/hugepages:/dev/hugepages " params=(${dcr_stc_params} --name csit-tg-"${cnt}" "${dcr_image}") - docker run "${params[@]}" + docker run --network=csit-nw-tg${cnt} "${params[@]}" done ;; "stop" ) - docker rm --force $(docker ps --all --quiet --filter name=csit-tg) + docker rm --force $(docker ps --all --quiet --filter name=csit) + docker network rm $(docker network ls --filter name=csit --quiet) ;; -esac +esac \ No newline at end of file diff --git a/tests/dpdk/perf/__init__.robot b/tests/dpdk/perf/__init__.robot index 10878c2d19..096bc8865b 100644 --- a/tests/dpdk/perf/__init__.robot +++ b/tests/dpdk/perf/__init__.robot @@ -1,4 +1,4 @@ -# Copyright (c) 2019 Cisco and/or its affiliates. +# Copyright (c) 2020 Cisco and/or its affiliates. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at: @@ -20,11 +20,11 @@ | Library | resources.libraries.python.DPDK.DPDKTools | | Suite Setup | Run Keywords | Setup performance global Variables -| ... | AND | Setup Framework | ${nodes} -| ... | AND | Install DPDK test on all DUTs | ${nodes} -| ... | AND | Get CPU Info from All Nodes | ${nodes} -| ... | AND | Update All Numa Nodes -| ... | ${nodes} | skip_tg=${True} +| ... | AND | Setup Framework | ${nodes} +| ... | AND | Install DPDK test on all DUTs | ${nodes} +| ... | AND | Get CPU Info from All Nodes | ${nodes} +| ... | AND | Update All Interface Data on All Nodes | ${nodes} +| ... | skip_tg=${True} | skip_vpp=${True} | | Suite Teardown | Cleanup Framework | ${nodes} diff --git a/tests/vpp/device/__init__.robot b/tests/vpp/device/__init__.robot index f166c124c4..0d31fc1876 100644 --- a/tests/vpp/device/__init__.robot +++ b/tests/vpp/device/__init__.robot @@ -1,4 +1,4 @@ -# Copyright (c) 2019 Cisco and/or its affiliates. +# Copyright (c) 2020 Cisco and/or its affiliates. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at: @@ -26,7 +26,6 @@ | ... | AND | Verify Vpp on All Duts | ${nodes} | ... | AND | Get CPU Info from All Nodes | ${nodes} | ... | AND | Update All Interface Data on All Nodes | ${nodes} -| ... | skip_tg_udev=${True} | numa_node=${True} | | Suite Teardown | Cleanup Framework | ${nodes} diff --git a/tests/vpp/perf/__init__.robot b/tests/vpp/perf/__init__.robot index 90afe801cd..e5a2c751a0 100644 --- a/tests/vpp/perf/__init__.robot +++ b/tests/vpp/perf/__init__.robot @@ -1,4 +1,4 @@ -# Copyright (c) 2019 Cisco and/or its affiliates. +# Copyright (c) 2020 Cisco and/or its affiliates. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at: @@ -28,7 +28,7 @@ | ... | AND | Show Vpp Version on All Duts | ${nodes} | ... | AND | Get CPU Info from All Nodes | ${nodes} | ... | AND | Update All Interface Data on All Nodes | ${nodes} -| ... | skip_tg=${True} | numa_node=${True} +| ... | skip_tg=${True} | | Suite Teardown | Cleanup Framework | ${nodes}