1 # Copyright (c) 2024 Cisco and/or its affiliates.
2 # Licensed under the Apache License, Version 2.0 (the "License");
3 # you may not use this file except in compliance with the License.
4 # You may obtain a copy of the License at:
6 # http://www.apache.org/licenses/LICENSE-2.0
8 # Unless required by applicable law or agreed to in writing, software
9 # distributed under the License is distributed on an "AS IS" BASIS,
10 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 # See the License for the specific language governing permissions and
12 # limitations under the License.
14 """Script for analyzing 3 result sets for "git bisect" purposes.
16 Jumpavg library is used for comparing description length of three groupings.
17 The mid result is grouped with early or late result, or as a separate group.
18 The jump we are looking for is between the mid and the smaller group
19 of the grouping with less bits.
20 Except when a grouping with all three sets as separate groups is the smallest.
21 In that case we chose the bigger difference in averages.
26 from typing import List, Tuple
28 from resources.libraries.python import jumpavg
29 from resources.libraries.python.model.parse import parse
32 def read_from_dir(dirname: str) -> Tuple[List[float], float]:
33 """Parse samples from dir, print them and stats, return them as list.
35 In case there are more test cases, their results are concatenated.
37 :param direname: The directory name (maybe with path) to parse.
39 :returns: The samples, deserialized from json, and the average.
40 :rtype: Tuple[List[float], float]
41 :raises RuntimeError: On parsing error.
43 results = parse(dirname)
45 for result in results.values():
46 samples.extend(result)
47 print(f"Read {dirname}: {samples!r}")
48 stats = jumpavg.AvgStdevStats.for_runs(samples)
49 print(f"Stats: {stats!r}")
50 return samples, stats.avg
54 """Execute the main logic, return the return code.
56 :returns: The return code, 0 or 3 depending on the comparison result.
59 early_results, early_avg = read_from_dir("csit_early")
60 late_results, late_avg = read_from_dir("csit_late")
61 mid_results, mid_avg = read_from_dir("csit_mid")
62 max_early, abs_diff_late = max(early_avg, mid_avg), abs(late_avg - mid_avg)
63 max_late, abs_diff_early = max(late_avg, mid_avg), abs(early_avg - mid_avg)
64 rel_diff_early = abs_diff_early / max_early if max_early else 0.0
65 rel_diff_late = abs_diff_late / max_late if max_late else 0.0
66 max_value = max(early_results + mid_results + late_results)
67 # Create a common group list with just the early group.
68 common_group_list = jumpavg.BitCountingGroupList(
70 ).append_group_of_runs(early_results)
71 # Try grouping the mid with the early.
72 early_group_list = common_group_list.copy()
73 early_group_list.extend_runs_to_last_group(mid_results)
74 early_group_list.append_group_of_runs(late_results)
75 early_bits = early_group_list.bits
76 print(f"Early group list bits: {early_bits}")
77 # Now the same, but grouping the mid with the late.
78 late_group_list = common_group_list.copy()
79 late_group_list.append_group_of_runs(mid_results)
80 late_group_list.extend_runs_to_last_group(late_results)
81 late_bits = late_group_list.bits
82 print(f"Late group list bits: {late_bits}")
83 # Finally, group each separately, as if double anomaly happened.
84 double_group_list = common_group_list.copy()
85 double_group_list.append_group_of_runs(mid_results)
86 double_group_list.append_group_of_runs(late_results)
87 double_bits = double_group_list.bits
88 print(f"Double group list bits: {double_bits}")
89 single_bits = min(early_bits, late_bits)
90 if double_bits <= single_bits:
91 # In this case, comparing early_bits with late_bits is not the best,
92 # as that would probably select based on stdev, not based on diff.
93 # Example: mid (small stdev) is closer to early (small stdev),
94 # and farther from late (big stdev).
95 # As grouping mid with early would increase their combined stdev,
96 # it is not selected. This means a noisy late bound can affect
97 # what human perceives as the more interesting region.
98 # So we select only based on averages.
99 print("Perhaps two different anomalies. Selecting by averages only.")
100 diff = single_bits - double_bits
101 print(f"Saved {diff} ({100*diff/single_bits}%) bits.")
102 if rel_diff_early > rel_diff_late:
103 print("The mid results are considered late.")
104 print("Preferring relative difference of averages:")
105 print(f"{100*rel_diff_early}% to {100*rel_diff_late}%.")
106 # rc==1 is when command is not found.
107 # rc==2 is when python interpreter does not find the script.
110 print("The mid results are considered early.")
111 print("Preferring relative difference of averages:")
112 print(f"{100*rel_diff_late}% to {100*rel_diff_early}%.")
115 # When difference of averages is within stdev,
116 # we let jumpavg decide, as here difference in stdev
117 # can be the more interesting signal.
118 diff = early_bits - late_bits
119 if early_bits > late_bits:
120 print("The mid results are considered late.")
121 print(f"Saved {diff} ({100*diff/early_bits}%) bits.")
122 print(f"New relative difference is {100*rel_diff_early}%.")
125 print("The mid results are considered early.")
126 print(f"Saved {-diff} ({-100*diff/late_bits}%) bits.")
127 print(f"New relative difference is {100*rel_diff_late}%.")
129 print(f"Exit code {exit_code}")
133 if __name__ == "__main__":