1 # Copyright (c) 2023 Cisco and/or its affiliates.
2 # Licensed under the Apache License, Version 2.0 (the "License");
3 # you may not use this file except in compliance with the License.
4 # You may obtain a copy of the License at:
6 # http://www.apache.org/licenses/LICENSE-2.0
8 # Unless required by applicable law or agreed to in writing, software
9 # distributed under the License is distributed on an "AS IS" BASIS,
10 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 # See the License for the specific language governing permissions and
12 # limitations under the License.
14 """Script for analyzing 3 result sets for "git bisect" purposes.
16 Jumpavg library is used for comparing description length of three groupings.
17 The mid result is grouped with early or late result, or as a separate group.
18 The jump we are looking for is between the mid and the smaller group
19 of the grouping with less bits.
20 Except when a grouping with all three sets as separate groups is the smallest.
21 In that case we chose the bigger difference in averages.
26 from typing import List, Tuple
28 from resources.libraries.python import jumpavg
29 from resources.libraries.python.model.parse import parse
32 def read_from_dir(dirname: str) -> Tuple[List[float], float]:
33 """Parse samples from dir, print them and stats, return them as list.
35 In case there are more test cases, their results are concatenated.
37 :param direname: The directory name (maybe with path) to parse.
39 :returns: The samples, deserialized from json, and the average.
40 :rtype: Tuple[List[float], float]
41 :raises RuntimeError: On parsing error.
43 results = parse(dirname)
45 for result in results.values():
46 samples.extend(result)
47 print(f"Read {dirname}: {samples!r}")
48 stats = jumpavg.AvgStdevStats.for_runs(samples)
49 print(f"Stats: {stats!r}")
50 return samples, stats.avg
54 """Execute the main logic, return the return code.
56 :returns: The return code, 0 or 3 depending on the comparison result.
59 early_results, early_avg = read_from_dir("csit_early")
60 late_results, late_avg = read_from_dir("csit_late")
61 mid_results, mid_avg = read_from_dir("csit_mid")
62 rel_diff_to_early = abs(early_avg - mid_avg) / max(early_avg, mid_avg)
63 rel_diff_to_late = abs(late_avg - mid_avg) / max(late_avg, mid_avg)
64 max_value = max(early_results + mid_results + late_results)
65 # Create a common group list with just the early group.
66 common_group_list = jumpavg.BitCountingGroupList(
68 ).append_group_of_runs(early_results)
69 # Try grouping the mid with the early.
70 early_group_list = common_group_list.copy()
71 early_group_list.extend_runs_to_last_group(mid_results)
72 early_group_list.append_group_of_runs(late_results)
73 early_bits = early_group_list.bits
74 print(f"Early group list bits: {early_bits}")
75 # Now the same, but grouping the mid with the late.
76 late_group_list = common_group_list.copy()
77 late_group_list.append_group_of_runs(mid_results)
78 late_group_list.extend_runs_to_last_group(late_results)
79 late_bits = late_group_list.bits
80 print(f"Late group list bits: {late_bits}")
81 # Finally, group each separately, as if double anomaly happened.
82 double_group_list = common_group_list.copy()
83 double_group_list.append_group_of_runs(mid_results)
84 double_group_list.append_group_of_runs(late_results)
85 double_bits = double_group_list.bits
86 print(f"Double group list bits: {double_bits}")
87 single_bits = min(early_bits, late_bits)
88 if double_bits <= single_bits:
89 # In this case, comparing early_bits with late_bits is not the best,
90 # as that would probably select based on stdev, not based on diff.
91 # Example: mid (small stdev) is closer to early (small stdev),
92 # and farther from late (big stdev).
93 # As grouping mid with early would increase their combined stdev,
94 # it is not selected. This means a noisy late bound can affect
95 # what human perceives as the more interesting region.
96 # So we select only based on averages.
97 print("Perhaps two different anomalies. Selecting by averages only.")
98 diff = single_bits - double_bits
99 print(f"Saved {diff} ({100*diff/single_bits}%) bits.")
100 if rel_diff_to_early > rel_diff_to_late:
101 print("The mid results are considered late.")
102 print("Preferring relative difference of averages:")
103 print(f"{100*rel_diff_to_early}% to {100*rel_diff_to_late}%.")
104 # rc==1 is when command is not found.
105 # rc==2 is when python interpreter does not find the script.
108 print("The mid results are considered early.")
109 print("Preferring relative difference of averages:")
110 print(f"{100*rel_diff_to_late}% to {100*rel_diff_to_early}%.")
113 # When difference of averages is within stdev,
114 # we let jumpavg decide, as here difference in stdev
115 # can be the more interesting signal.
116 diff = early_bits - late_bits
117 if early_bits > late_bits:
118 print("The mid results are considered late.")
119 print(f"Saved {diff} ({100*diff/early_bits}%) bits.")
120 print(f"New relative difference is {100*rel_diff_to_early}%.")
123 print("The mid results are considered early.")
124 print(f"Saved {-diff} ({-100*diff/late_bits}%) bits.")
125 print(f"New relative difference is {100*rel_diff_to_late}%.")
127 print(f"Exit code {exit_code}")
131 if __name__ == "__main__":