From cb265c6948658a10a46252afc889403f0baab31c Mon Sep 17 00:00:00 2001 From: Andrew Yourtchenko Date: Thu, 25 Jul 2019 10:03:51 +0000 Subject: [PATCH] tests: disable pg capture before enabling it In a lot of places within the unit tests pg_start() starts the capture with an already existing capture running for the same test. If the pcap file already exists, then it is renamed and there is no problem. However, there is a potential for race if the previous pg_start() has enabled the capture, but the check for renaming it happened just slightly earlier than the first packet has arrived. Then a second call to pg_start() will hit a check that a file exists, and will cause an error. This is especially visible when running the tests in parallel due to increased load. Solution: disable the capture before enabling it. This will flush the aready running capture and eliminate the race. The additional delay that flushing of the pcap creates has exposed several other race conditions: NAT tests: Some of the NAT reassembly tests verify that the entries were added to the reassembly data structures, but do so by comparing the quantities of entries. With the default timeout being 2s, some of the entries might timeout, resulting in a bogus test failure. Solution: Bump the timeout to 20s for the affected tests. Punt tests: nr_packets == 3 makes test intermittently fail, nr_packets > 3 make it reliably fail, and nr_packets = 2 works Solution: set nr_packets == 2 for the time being IGMP tests: the leave-group calls get a spurious packet from the time the new groups were configured Solution: add 1 second delay before starting to delete the groups Type: test Change-Id: I931182a7b2860cf670e030ee7da8038f6e87356d Signed-off-by: Andrew Yourtchenko --- test/test_igmp.py | 2 ++ test/test_nat.py | 22 ++++++++++++++++++++++ test/test_punt.py | 6 +++++- test/vpp_pg_interface.py | 3 ++- 4 files changed, 31 insertions(+), 2 deletions(-) diff --git a/test/test_igmp.py b/test/test_igmp.py index 68a3e4e41f3..f1c49acba4c 100644 --- a/test/test_igmp.py +++ b/test/test_igmp.py @@ -469,6 +469,8 @@ class TestIgmp(VppTestCase): h10.add_vpp_config() capture = self.pg0.get_capture(2, timeout=10) + # wait for a little bit + self.sleep(1) # # remove state, expect the report for the removal diff --git a/test/test_nat.py b/test/test_nat.py index a6786218bd3..773eb51d4ee 100644 --- a/test/test_nat.py +++ b/test/test_nat.py @@ -3670,10 +3670,22 @@ class TestNAT44(MethodHolder): sw_if_index=self.pg1.sw_if_index, is_add=1) + reas_cfg1 = self.vapi.nat_get_reass() + # this test was intermittently failing in some cases + # until we temporarily bump the reassembly timeouts + self.vapi.nat_set_reass(timeout=20, max_reass=1024, max_frag=5, + drop_frag=0) + self.frag_in_order(proto=IP_PROTOS.tcp) self.frag_in_order(proto=IP_PROTOS.udp) self.frag_in_order(proto=IP_PROTOS.icmp) + # restore the reassembly timeouts + self.vapi.nat_set_reass(timeout=reas_cfg1.ip4_timeout, + max_reass=reas_cfg1.ip4_max_reass, + max_frag=reas_cfg1.ip4_max_frag, + drop_frag=reas_cfg1.ip4_drop_frag) + def test_frag_forwarding(self): """ NAT44 forwarding fragment test """ self.vapi.nat44_add_del_interface_addr( @@ -4514,7 +4526,17 @@ class TestNAT44EndpointDependent(MethodHolder): sw_if_index=self.pg1.sw_if_index, is_add=1) self.vapi.nat44_forwarding_enable_disable(enable=True) + reas_cfg1 = self.vapi.nat_get_reass() + # this test was intermittently failing in some cases + # until we temporarily bump the reassembly timeouts + self.vapi.nat_set_reass(timeout=20, max_reass=1024, max_frag=5, + drop_frag=0) self.frag_in_order(proto=IP_PROTOS.tcp, dont_translate=True) + # restore the reassembly timeouts + self.vapi.nat_set_reass(timeout=reas_cfg1.ip4_timeout, + max_reass=reas_cfg1.ip4_max_reass, + max_frag=reas_cfg1.ip4_max_frag, + drop_frag=reas_cfg1.ip4_drop_frag) def test_frag_out_of_order(self): """ NAT44 translate fragments arriving out of order """ diff --git a/test/test_punt.py b/test/test_punt.py index 3ba1be4d5ed..0b4585a15c7 100644 --- a/test/test_punt.py +++ b/test/test_punt.py @@ -77,7 +77,9 @@ class TestPuntSocket(VppTestCase): ports = [1111, 2222, 3333, 4444] sock_servers = list() - nr_packets = 3 + # FIXME: nr_packets > 3 results in failure + # nr_packets = 3 makes the test unstable + nr_packets = 2 @classmethod def setUpClass(cls): @@ -679,6 +681,8 @@ class TestIP6PuntSocket(TestPuntSocket): self.pg0.add_stream(pkts) self.pg_enable_capture(self.pg_interfaces) self.pg_start() + # give a chance to punt socket to collect all packets + self.sleep(1) self.pg0.get_capture(0) rx = self.socket_client_close() diff --git a/test/vpp_pg_interface.py b/test/vpp_pg_interface.py index e6dae66feec..ed45b459251 100755 --- a/test/vpp_pg_interface.py +++ b/test/vpp_pg_interface.py @@ -146,7 +146,8 @@ class VppPGInterface(VppInterface): of at most n packets. If n < 0, this is no limit """ - + # disable the capture to flush the capture + self.disable_capture() self._rename_previous_capture_file(self.out_path, self.out_history_counter, self._out_file) -- 2.16.6