perf: Fix broken hoststack tests
[csit.git] / resources / libraries / python / HoststackUtil.py
1 # Copyright (c) 2020 Cisco and/or its affiliates.
2 # Licensed under the Apache License, Version 2.0 (the "License");
3 # you may not use this file except in compliance with the License.
4 # You may obtain a copy of the License at:
5 #
6 #     http://www.apache.org/licenses/LICENSE-2.0
7 #
8 # Unless required by applicable law or agreed to in writing, software
9 # distributed under the License is distributed on an "AS IS" BASIS,
10 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 # See the License for the specific language governing permissions and
12 # limitations under the License.
13
14 """Host Stack util library."""
15 import json
16 from time import sleep
17 from robot.api import logger
18
19 from resources.libraries.python.Constants import Constants
20 from resources.libraries.python.ssh import exec_cmd, exec_cmd_no_error
21 from resources.libraries.python.PapiExecutor import PapiSocketExecutor
22 from resources.libraries.python.DUTSetup import DUTSetup
23
24 class HoststackUtil():
25     """Utilities for Host Stack tests."""
26
27     @staticmethod
28     def get_vpp_echo_command(vpp_echo_attributes):
29         """Construct the vpp_echo command using the specified attributes.
30
31         :param vpp_echo_attributes: vpp_echo test program attributes.
32         :type vpp_echo_attributes: dict
33         :returns: Command line components of the vpp_echo command
34             'name' - program name
35             'args' - command arguments.
36         :rtype: dict
37         """
38         # TODO: Use a python class instead of dictionary for the return type
39         proto = vpp_echo_attributes[u"uri_protocol"]
40         addr = vpp_echo_attributes[u"uri_ip4_addr"]
41         port = vpp_echo_attributes[u"uri_port"]
42         vpp_echo_cmd = {}
43         vpp_echo_cmd[u"name"] = u"vpp_echo"
44         vpp_echo_cmd[u"args"] = f"{vpp_echo_attributes[u'role']} " \
45             f"socket-name {vpp_echo_attributes[u'vpp_api_socket']} " \
46             f"{vpp_echo_attributes[u'json_output']} " \
47             f"uri {proto}://{addr}/{port} " \
48             f"nthreads {vpp_echo_attributes[u'nthreads']} " \
49             f"mq-size {vpp_echo_attributes[u'mq_size']} " \
50             f"nclients {vpp_echo_attributes[u'nclients']} " \
51             f"quic-streams {vpp_echo_attributes[u'quic_streams']} " \
52             f"time {vpp_echo_attributes[u'time']} " \
53             f"fifo-size {vpp_echo_attributes[u'fifo_size']} " \
54             f"TX={vpp_echo_attributes[u'tx_bytes']} " \
55             f"RX={vpp_echo_attributes[u'rx_bytes']}"
56         if vpp_echo_attributes[u"rx_results_diff"]:
57             vpp_echo_cmd[u"args"] += u" rx-results-diff"
58         if vpp_echo_attributes[u"tx_results_diff"]:
59             vpp_echo_cmd[u"args"] += u" tx-results-diff"
60         return vpp_echo_cmd
61
62     @staticmethod
63     def get_iperf3_command(iperf3_attributes):
64         """Construct the iperf3 command using the specified attributes.
65
66         :param iperf3_attributes: iperf3 test program attributes.
67         :type iperf3_attributes: dict
68         :returns: Command line components of the iperf3 command
69             'env_vars' - environment variables
70             'name' - program name
71             'args' - command arguments.
72         :rtype: dict
73         """
74         # TODO: Use a python class instead of dictionary for the return type
75         iperf3_cmd = {}
76         iperf3_cmd[u"env_vars"] = f"VCL_CONFIG={Constants.REMOTE_FW_DIR}/" \
77             f"{Constants.RESOURCES_TPL_VCL}/" \
78             f"{iperf3_attributes[u'vcl_config']}"
79         if iperf3_attributes[u"ld_preload"]:
80             iperf3_cmd[u"env_vars"] += \
81                 f" LD_PRELOAD={Constants.VCL_LDPRELOAD_LIBRARY}"
82         if iperf3_attributes[u'transparent_tls']:
83             iperf3_cmd[u"env_vars"] += u" LDP_ENV_TLS_TRANS=1"
84
85         json_results = u" --json" if iperf3_attributes[u'json'] else u""
86         ip_address = f" {iperf3_attributes[u'ip_address']}" if u"ip_address" \
87                      in iperf3_attributes else u""
88         iperf3_cmd[u"name"] = u"iperf3"
89         iperf3_cmd[u"args"] = f"--{iperf3_attributes[u'role']}{ip_address} " \
90                               f"--interval 0{json_results} " \
91                               f"--version{iperf3_attributes[u'ip_version']}"
92
93         if iperf3_attributes[u"role"] == u"server":
94             iperf3_cmd[u"args"] += u" --one-off"
95         else:
96             iperf3_cmd[u"args"] += u" --get-server-output"
97             if u"parallel" in iperf3_attributes:
98                 iperf3_cmd[u"args"] += \
99                     f" --parallel {iperf3_attributes[u'parallel']}"
100             if u"time" in iperf3_attributes:
101                 iperf3_cmd[u"args"] += \
102                     f" --time {iperf3_attributes[u'time']}"
103         return iperf3_cmd
104
105     @staticmethod
106     def set_hoststack_quic_fifo_size(node, fifo_size):
107         """Set the QUIC protocol fifo size.
108
109         :param node: Node to set the QUIC fifo size on.
110         :param fifo_size: fifo size, passed to the quic set fifo-size command.
111         :type node: dict
112         :type fifo_size: str
113         """
114         cmd = f"quic set fifo-size {fifo_size}"
115         PapiSocketExecutor.run_cli_cmd(node, cmd)
116
117     @staticmethod
118     def set_hoststack_quic_crypto_engine(node, quic_crypto_engine,
119                                          fail_on_error=False):
120         """Set the Hoststack QUIC crypto engine on node
121
122         :param node: Node to enable/disable HostStack.
123         :param quic_crypto_engine: type of crypto engine
124         :type node: dict
125         :type quic_crypto_engine: str
126         """
127         vpp_crypto_engines = {u"openssl", u"native", u"ipsecmb"}
128         if quic_crypto_engine == u"nocrypto":
129             logger.trace(u"No QUIC crypto engine.")
130             return
131
132         if quic_crypto_engine in vpp_crypto_engines:
133             cmds = [u"quic set crypto api vpp",
134                     f"set crypto handler aes-128-gcm {quic_crypto_engine}",
135                     f"set crypto handler aes-256-gcm {quic_crypto_engine}"]
136         elif quic_crypto_engine == u"picotls":
137             cmds = [u"quic set crypto api picotls"]
138         else:
139             raise ValueError(f"Unknown QUIC crypto_engine {quic_crypto_engine}")
140
141         for cmd in cmds:
142             try:
143                 PapiSocketExecutor.run_cli_cmd(node, cmd)
144             except AssertionError:
145                 if fail_on_error:
146                     raise
147
148     @staticmethod
149     def get_hoststack_test_program_logs(node, program):
150         """Get HostStack test program stdout log.
151
152         :param node: DUT node.
153         :param program: test program.
154         :type node: dict
155         :type program: dict
156         """
157         program_name = program[u"name"]
158         cmd = f"sh -c \'cat /tmp/{program_name}_stdout.log\'"
159         stdout_log, _ = exec_cmd_no_error(node, cmd, sudo=True, \
160             message=f"Get {program_name} stdout log failed!")
161
162         cmd = f"sh -c \'cat /tmp/{program_name}_stderr.log\'"
163         stderr_log, _ = exec_cmd_no_error(node, cmd, sudo=True, \
164             message=f"Get {program_name} stderr log failed!")
165         return stdout_log, stderr_log
166
167     @staticmethod
168     def start_hoststack_test_program(node, namespace, core_list, program):
169         """Start the specified HostStack test program.
170
171         :param node: DUT node.
172         :param namespace: Net Namespace to run program in.
173         :param core_list: List of cpu's to pass to taskset to pin the test
174             program to a different set of cores on the same numa node as VPP.
175         :param program: Test program.
176         :type node: dict
177         :type namespace: str
178         :type core_list: str
179         :type program: dict
180         :returns: Process ID
181         :rtype: int
182         :raises RuntimeError: If node subtype is not a DUT or startup failed.
183         """
184         if node[u"type"] != u"DUT":
185             raise RuntimeError(u"Node type is not a DUT!")
186
187         program_name = program[u"name"]
188         DUTSetup.kill_program(node, program_name, namespace)
189
190         if namespace == u"default":
191             shell_cmd = u"sh -c"
192         else:
193             shell_cmd = f"ip netns exec {namespace} sh -c"
194
195         env_vars = f"{program[u'env_vars']} " if u"env_vars" in program else u""
196         args = program[u"args"]
197         cmd = f"nohup {shell_cmd} \'{env_vars}taskset --cpu-list {core_list} " \
198             f"{program_name} {args} >/tmp/{program_name}_stdout.log " \
199             f"2>/tmp/{program_name}_stderr.log &\'"
200         try:
201             exec_cmd_no_error(node, cmd, sudo=True)
202             return DUTSetup.get_pid(node, program_name)[0]
203         except RuntimeError:
204             stdout_log, stderr_log = \
205                 HoststackUtil.get_hoststack_test_program_logs(node,
206                                                               program)
207             raise RuntimeError(f"Start {program_name} failed!\nSTDERR:\n" \
208                                f"{stderr_log}\nSTDOUT:\n{stdout_log}")
209         return None
210
211     @staticmethod
212     def stop_hoststack_test_program(node, program, pid):
213         """Stop the specified Hoststack test program.
214
215         :param node: DUT node.
216         :param program: Test program.
217         :param pid: Process ID of test program.
218         :type node: dict
219         :type program: dict
220         :type pid: int
221         """
222         program_name = program[u"name"]
223         if program_name == u"nginx":
224             cmd = u"nginx -s quit"
225             errmsg = u"Quit nginx failed!"
226         else:
227             cmd = f'if [ -n "$(ps {pid} | grep {program_name})" ] ; ' \
228                 f'then kill -s SIGTERM {pid}; fi'
229             errmsg = f"Kill {program_name} ({pid}) failed!"
230
231         exec_cmd_no_error(node, cmd, message=errmsg, sudo=True)
232
233     @staticmethod
234     def hoststack_test_program_finished(node, program_pid):
235         """Wait for the specified HostStack test program process to complete.
236
237         :param node: DUT node.
238         :param program_pid: test program pid.
239         :type node: dict
240         :type program_pid: str
241         :raises RuntimeError: If node subtype is not a DUT.
242         """
243         if node[u"type"] != u"DUT":
244             raise RuntimeError(u"Node type is not a DUT!")
245
246         cmd = f"sh -c 'strace -qqe trace=none -p {program_pid}'"
247         exec_cmd(node, cmd, sudo=True)
248         # Wait a bit for stdout/stderr to be flushed to log files
249         # TODO: see if sub-second sleep works e.g. sleep(0.1)
250         sleep(1)
251
252     @staticmethod
253     def analyze_hoststack_test_program_output(
254             node, role, nsim_attr, program):
255         """Gather HostStack test program output and check for errors.
256
257         The [defer_fail] return bool is used instead of failing immediately
258         to allow the analysis of both the client and server instances of
259         the test program for debugging a test failure.  When [defer_fail]
260         is true, then the string returned is debug output instead of
261         JSON formatted test program results.
262
263         :param node: DUT node.
264         :param role: Role (client|server) of test program.
265         :param nsim_attr: Network Simulation Attributes.
266         :param program: Test program.
267         :param program_args: List of test program args.
268         :type node: dict
269         :type role: str
270         :type nsim_attr: dict
271         :type program: dict
272         :returns: tuple of [defer_fail] bool and either JSON formatted hoststack
273             test program output or failure debug output.
274         :rtype: bool, str
275         :raises RuntimeError: If node subtype is not a DUT.
276         """
277         if node[u"type"] != u"DUT":
278             raise RuntimeError(u"Node type is not a DUT!")
279
280         program_name = program[u"name"]
281         program_stdout, program_stderr = \
282             HoststackUtil.get_hoststack_test_program_logs(node, program)
283         if len(program_stdout) == 0 and len(program_stderr) == 0:
284             logger.trace(f"Retrying {program_name} log retrieval")
285             program_stdout, program_stderr = \
286                HoststackUtil.get_hoststack_test_program_logs(node, program)
287
288         env_vars = f"{program[u'env_vars']} " if u"env_vars" in program else u""
289         program_cmd = f"{env_vars}{program_name} {program[u'args']}"
290         test_results = f"Test Results of '{program_cmd}':\n"
291
292         if nsim_attr[u"output_nsim_enable"] or \
293             nsim_attr[u"xc_nsim_enable"]:
294             if nsim_attr[u"output_nsim_enable"]:
295                 feature_name = u"output"
296             else:
297                 feature_name = u"cross-connect"
298             test_results += \
299                 f"NSIM({feature_name}): delay " \
300                 f"{nsim_attr[u'delay_in_usec']} usecs, " \
301                 f"avg-pkt-size {nsim_attr[u'average_packet_size']}, " \
302                 f"bandwidth {nsim_attr[u'bw_in_bits_per_second']} " \
303                 f"bits/sec, pkt-drop-rate {nsim_attr[u'packets_per_drop']} " \
304                 f"pkts/drop\n"
305
306         # TODO: Incorporate show error stats into results analysis
307         test_results += \
308             f"\n{role} VPP 'show errors' on host {node[u'host']}:\n" \
309             f"{PapiSocketExecutor.run_cli_cmd(node, u'show error')}\n"
310
311         if u"error" in program_stderr.lower():
312             test_results += f"ERROR DETECTED:\n{program_stderr}"
313             return (True, test_results)
314         if not program_stdout:
315             test_results += f"\nNo {program} test data retrieved!\n"
316             ls_stdout, _ = exec_cmd_no_error(node, u"ls -l /tmp/*.log",
317                                              sudo=True)
318             test_results += f"{ls_stdout}\n"
319             return (True, test_results)
320         if program[u"name"] == u"vpp_echo":
321             if u"JSON stats" in program_stdout and \
322                     u'"has_failed": "0"' in program_stdout:
323                 json_start = program_stdout.find(u"{")
324                 #TODO: Fix parsing once vpp_echo produces valid
325                 # JSON output. Truncate for now.
326                 json_end = program_stdout.find(u',\n  "closing"')
327                 json_results = f"{program_stdout[json_start:json_end]}\n}}"
328                 program_json = json.loads(json_results)
329             else:
330                 test_results += u"Invalid test data output!\n" + program_stdout
331                 return (True, test_results)
332         elif program[u"name"] == u"iperf3":
333             test_results += program_stdout
334             iperf3_json = json.loads(program_stdout)
335             program_json = iperf3_json[u"intervals"][0][u"sum"]
336         else:
337             test_results += u"Unknown HostStack Test Program!\n" + \
338                             program_stdout
339             return (True, program_stdout)
340         return (False, json.dumps(program_json))
341
342     @staticmethod
343     def hoststack_test_program_defer_fail(server_defer_fail, client_defer_fail):
344         """Return True if either HostStack test program fail was deferred.
345
346         :param server_defer_fail: server no results value.
347         :param client_defer_fail: client no results value.
348         :type server_defer_fail: bool
349         :type client_defer_fail: bool
350         :rtype: bool
351         """
352         return server_defer_fail and client_defer_fail