Python3: resources and libraries

[csit.git] / resources / tools / scripts / compare_perpatch.py
diff --git a/resources/tools/scripts/compare_perpatch.py b/resources/tools/scripts/compare_perpatch.py

index 3bda338..c2c165c 100644 (file)
--- a/resources/tools/scripts/compare_perpatch.py
+++ b/resources/tools/scripts/compare_perpatch.py
@@ -32,6 +32,8 @@ from jumpavg.BitCountingClassifier import BitCountingClassifier
  def hack(value_list):
      """Return middle two quartiles, hoping to reduce influence of outliers.
  
+    Currently "middle two" is "all", but that can change in future.
+
      :param value_list: List to pick subset from.
      :type value_list: list of float
      :returns: New list containing middle values.
@@ -40,77 +42,85 @@ def hack(value_list):
      tmp = sorted(value_list)
      eight = len(tmp) / 8
      ret = tmp[3*eight:-eight]
-    return ret
+    return tmp # ret
+
  
  iteration = -1
  parent_iterations = list()
-new_iterations = list()
+current_iterations = list()
  num_tests = None
  while 1:
      iteration += 1
      parent_lines = list()
-    new_lines = list()
-    filename = "csit_parent/{iter}/results.txt".format(iter=iteration)
+    current_lines = list()
+    filename = f"csit_parent/{iteration}/results.txt"
      try:
          with open(filename) as parent_file:
              parent_lines = parent_file.readlines()
      except IOError:
          break
      num_lines = len(parent_lines)
-    filename = "csit_current/{iter}/results.txt".format(iter=iteration)
-    with open(filename) as new_file:
-        new_lines = new_file.readlines()
-    if num_lines != len(new_lines):
-        print "Number of tests does not match within iteration", iteration
+    filename = f"csit_current/{iteration}/results.txt"
+    with open(filename) as current_file:
+        current_lines = current_file.readlines()
+    if num_lines != len(current_lines):
+        print(f"Number of tests does not match within iteration {iteration}")
          sys.exit(1)
      if num_tests is None:
          num_tests = num_lines
      elif num_tests != num_lines:
-        print "Number of tests does not match previous at iteration", iteration
+        print(
+            f"Number of tests does not match previous at iteration {iteration}"
+        )
          sys.exit(1)
      parent_iterations.append(parent_lines)
-    new_iterations.append(new_lines)
+    current_iterations.append(current_lines)
  classifier = BitCountingClassifier()
  exit_code = 0
  for test_index in range(num_tests):
      val_max = 1.0
      parent_values = list()
-    new_values = list()
+    current_values = list()
      for iteration_index in range(len(parent_iterations)):
          parent_values.extend(
-            json.loads(parent_iterations[iteration_index][test_index]))
-        new_values.extend(
-            json.loads(new_iterations[iteration_index][test_index]))
-    print "TRACE pre-hack parent: {p}".format(p=parent_values)
-    print "TRACE pre-hack current: {n}".format(n=new_values)
+            json.loads(parent_iterations[iteration_index][test_index])
+        )
+        current_values.extend(
+            json.loads(current_iterations[iteration_index][test_index])
+        )
+    print(f"Time-ordered MRR values for parent build: {parent_values}")
+    print(f"Time-ordered MRR values for current build: {current_values}")
      parent_values = hack(parent_values)
-    new_values = hack(new_values)
+    current_values = hack(current_values)
      parent_max = BitCountingMetadataFactory.find_max_value(parent_values)
-    new_max = BitCountingMetadataFactory.find_max_value(new_values)
-    val_max = max(val_max, parent_max, new_max)
+    current_max = BitCountingMetadataFactory.find_max_value(current_values)
+    val_max = max(val_max, parent_max, current_max)
      factory = BitCountingMetadataFactory(val_max)
      parent_stats = factory.from_data(parent_values)
-    new_factory = BitCountingMetadataFactory(val_max, parent_stats.avg)
-    new_stats = new_factory.from_data(new_values)
-    print "TRACE parent: {p}".format(p=parent_values)
-    print "TRACE current: {n}".format(n=new_values)
-    print "DEBUG parent: {p}".format(p=parent_stats)
-    print "DEBUG current: {n}".format(n=new_stats)
-    common_max = max(parent_stats.avg, new_stats.avg)
-    difference = (new_stats.avg - parent_stats.avg) / common_max
-    print "DEBUG difference: {d}%".format(d=100 * difference)
-    classified_list = classifier.classify([parent_stats, new_stats])
+    current_factory = BitCountingMetadataFactory(val_max, parent_stats.avg)
+    current_stats = current_factory.from_data(current_values)
+    both_stats = factory.from_data(parent_values + current_values)
+    print(f"Value-ordered MRR values for parent build: {parent_values}")
+    print(f"Value-ordered MRR values for current build: {current_values}")
+    difference = (current_stats.avg - parent_stats.avg) / parent_stats.avg
+    print(f"Difference of averages relative to parent: {100 * difference}%")
+    print(f"Jumpavg representation of parent group: {parent_stats}")
+    print(f"Jumpavg representation of current group: {current_stats}")
+    print(f"Jumpavg representation of both as one group: {both_stats}")
+    bits = parent_stats.bits + current_stats.bits - both_stats.bits
+    compared = u"longer" if bits >= 0 else u"shorter"
+    print(
+        f"Separate groups are {compared} than single group by {abs(bits)} bits"
+    )
+    classified_list = classifier.classify([parent_stats, current_stats])
      if len(classified_list) < 2:
-        print "Test test_index {test_index}: normal (no anomaly)".format(
-            test_index=test_index)
+        print(f"Test test_index {test_index}: normal (no anomaly)")
          continue
      anomaly = classified_list[1].metadata.classification
-    if anomaly == "regression":
-        print "Test test_index {test_index}: anomaly regression".format(
-            test_index=test_index)
+    if anomaly == u"regression":
+        print(f"Test test_index {test_index}: anomaly regression")
          exit_code = 1
          continue
-    print "Test test_index {test_index}: anomaly {anomaly}".format(
-        test_index=test_index, anomaly=anomaly)
-print "DEBUG exit code {code}".format(code=exit_code)
+    print(f"Test test_index {test_index}: anomaly {anomaly}")
+print(f"Exit code {exit_code}")
  sys.exit(exit_code)