1 # Copyright (c) 2023 Cisco and/or its affiliates.
2 # Licensed under the Apache License, Version 2.0 (the "License");
3 # you may not use this file except in compliance with the License.
4 # You may obtain a copy of the License at:
6 # http://www.apache.org/licenses/LICENSE-2.0
8 # Unless required by applicable law or agreed to in writing, software
9 # distributed under the License is distributed on an "AS IS" BASIS,
10 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 # See the License for the specific language governing permissions and
12 # limitations under the License.
14 """Script for determining whether per-patch perf test votes -1.
16 This script expects a particular tree created on a filesystem by
17 per_patch_perf.sh bootstrap script, including test results
18 exported as json files according to a current model schema.
19 This script extracts the results (according to tresult type)
20 and joins them into one list of floats for parent and one for current.
22 This script then uses jumpavg library to determine whether there was
23 a regression, progression or no change for each testcase.
25 If the set of test names does not match, or there was a regression,
26 this script votes -1 (by exiting with code 1), otherwise it votes +1 (exit 0).
33 from typing import Dict, List
35 from resources.libraries.python import jumpavg
38 def parse(dirpath: str, fake_value: float) -> Dict[str, List[float]]:
39 """Looks for test jsons, extract scalar results.
41 Files other than .json are skipped, jsons without test_id are skipped.
42 If the test failed, four fake values are used as a fake result.
44 Units are ignored, as both parent and current are tested
45 with the same CSIT code so the unit should be identical.
47 :param dirpath: Path to the directory tree to examine.
48 :param fail_value: Fake value to use for test cases that failed.
50 :returns: Mapping from test IDs to list of measured values.
51 :rtype: Dict[str, List[float]]
52 :raises RuntimeError: On duplicate test ID or unknown test type.
55 for root, _, files in os.walk(dirpath):
56 for filename in files:
57 if not filename.endswith(".json"):
59 filepath = os.path.join(root, filename)
60 with open(filepath, "rt", encoding="utf8") as file_in:
61 data = json.load(file_in)
62 if "test_id" not in data:
64 name = data["test_id"]
66 raise RuntimeError(f"Duplicate: {name}")
67 if not data["passed"]:
68 results[name] = [fake_value] * 4
70 result_object = data["result"]
71 result_type = result_object["type"]
72 if result_type == "mrr":
73 results[name] = result_object["receive_rate"]["rate"]["values"]
74 elif result_type == "ndrpdr":
75 results[name] = [result_object["pdr"]["lower"]["rate"]["value"]]
76 elif result_type == "soak":
78 result_object["critical_rate"]["lower"]["rate"]["value"]
80 elif result_type == "reconf":
81 results[name] = [result_object["loss"]["time"]["value"]]
82 elif result_type == "hoststack":
83 results[name] = [result_object["bandwidth"]["value"]]
85 raise RuntimeError(f"Unknown result type: {result_type}")
90 """Execute the main logic, return a number to return as the return code.
92 Call parse to get parent and current data.
93 Use higher fake value for parent, so changes that keep a test failing
94 are marked as regressions.
96 If there are multiple iterations, the value lists are joined.
97 For each test, call jumpavg.classify to detect possible regression.
99 If there is at least one regression, return 3.
101 :returns: Return code, 0 or 3 based on the comparison result.
105 parent_aggregate = {}
106 current_aggregate = {}
112 parent_results = parse(f"csit_parent/{iteration}", fake_value=2.0)
113 parent_names = set(parent_results.keys())
114 if test_names is None:
115 test_names = parent_names
117 # No more iterations.
119 assert parent_names == test_names, f"{parent_names} != {test_names}"
120 current_results = parse(f"csit_current/{iteration}", fake_value=1.0)
121 current_names = set(current_results.keys())
123 current_names == parent_names
124 ), f"{current_names} != {parent_names}"
125 for name in test_names:
126 if name not in parent_aggregate:
127 parent_aggregate[name] = []
128 if name not in current_aggregate:
129 current_aggregate[name] = []
130 parent_aggregate[name].extend(parent_results[name])
131 current_aggregate[name].extend(current_results[name])
133 for name in test_names:
134 print(f"Test name: {name}")
135 parent_values = parent_aggregate[name]
136 current_values = current_aggregate[name]
137 print(f"Time-ordered MRR values for parent build: {parent_values}")
138 print(f"Time-ordered MRR values for current build: {current_values}")
139 parent_values = sorted(parent_values)
140 current_values = sorted(current_values)
141 max_value = max([1.0] + parent_values + current_values)
142 parent_stats = jumpavg.AvgStdevStats.for_runs(parent_values)
143 current_stats = jumpavg.AvgStdevStats.for_runs(current_values)
144 parent_group_list = jumpavg.BitCountingGroupList(
146 ).append_group_of_runs([parent_stats])
147 combined_group_list = (
148 parent_group_list.copy().extend_runs_to_last_group([current_stats])
150 separated_group_list = parent_group_list.append_group_of_runs(
153 print(f"Value-ordered MRR values for parent build: {parent_values}")
154 print(f"Value-ordered MRR values for current build: {current_values}")
155 avg_diff = (current_stats.avg - parent_stats.avg) / parent_stats.avg
156 print(f"Difference of averages relative to parent: {100 * avg_diff}%")
157 print(f"Jumpavg representation of parent group: {parent_stats}")
158 print(f"Jumpavg representation of current group: {current_stats}")
160 f"Jumpavg representation of both as one group:"
161 f" {combined_group_list[0].stats}"
163 bits_diff = separated_group_list.bits - combined_group_list.bits
164 compared = "longer" if bits_diff >= 0 else "shorter"
166 f"Separate groups are {compared} than single group"
167 f" by {abs(bits_diff)} bits"
169 # TODO: Version of classify that takes max_value and list of stats?
170 # That matters if only stats (not list of floats) are given.
171 classified_list = jumpavg.classify([parent_values, current_values])
172 if len(classified_list) < 2:
173 print(f"Test {name}: normal (no anomaly)")
175 anomaly = classified_list[1].comment
176 if anomaly == "regression":
177 print(f"Test {name}: anomaly regression")
178 exit_code = 3 # 1 or 2 can be caused by other errors
180 print(f"Test {name}: anomaly {anomaly}")
181 print(f"Exit code: {exit_code}")
185 if __name__ == "__main__":