2 files changed, 134 insertions, 56 deletions
diff --git a/resources/tools/integrated/compare_bisect.py b/resources/tools/integrated/compare_bisect.py
new file mode 100644
index 0000000000..247ba507ef
--- /dev/null
+++ b/resources/tools/integrated/compare_bisect.py
@@ -0,0 +1,132 @@
+# Copyright (c) 2023 Cisco and/or its affiliates.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Script for analyzing 3 result sets for "git bisect" purposes.
+
+Jumpavg library is used for comparing description length of three groupings.
+The mid result is grouped with early or late result, or as a separate group.
+The jump we are looking for is between the mid and the smaller group
+of the grouping with less bits.
+Except when a grouping with all three sets as separate groups is the smallest.
+In that case we chose the bigger difference in averages.
+"""
+
+import sys
+
+from typing import List, Tuple
+
+from resources.libraries.python import jumpavg
+from resources.libraries.python.model.parse import parse
+
+
+def read_from_dir(dirname: str) -> Tuple[List[float], float]:
+    """Parse samples from dir, print them and stats, return them as list.
+
+    In case there are more test cases, their results are concatenated.
+
+    :param direname: The directory name (maybe with path) to parse.
+    :type dirname: str
+    :returns: The samples, deserialized from json, and the average.
+    :rtype: Tuple[List[float], float]
+    :raises RuntimeError: On parsing error.
+    """
+    results = parse(dirname)
+    samples = []
+    for result in results.values():
+        samples.extend(result)
+    print(f"Read {dirname}: {samples!r}")
+    stats = jumpavg.AvgStdevStats.for_runs(samples)
+    print(f"Stats: {stats!r}")
+    return samples, stats.avg
+
+
+def main() -> int:
+    """Execute the main logic, return the return code.
+
+    :returns: The return code, 0 or 3 depending on the comparison result.
+    :rtype: int
+    """
+    early_results, early_avg = read_from_dir("csit_early")
+    late_results, late_avg = read_from_dir("csit_late")
+    mid_results, mid_avg = read_from_dir("csit_mid")
+    rel_diff_to_early = abs(early_avg - mid_avg) / max(early_avg, mid_avg)
+    rel_diff_to_late = abs(late_avg - mid_avg) / max(late_avg, mid_avg)
+    max_value = max(early_results + mid_results + late_results)
+    # Create a common group list with just the early group.
+    common_group_list = jumpavg.BitCountingGroupList(
+        max_value=max_value
+    ).append_group_of_runs(early_results)
+    # Try grouping the mid with the early.
+    early_group_list = common_group_list.copy()
+    early_group_list.extend_runs_to_last_group(mid_results)
+    early_group_list.append_group_of_runs(late_results)
+    early_bits = early_group_list.bits
+    print(f"Early group list bits: {early_bits}")
+    # Now the same, but grouping the mid with the late.
+    late_group_list = common_group_list.copy()
+    late_group_list.append_group_of_runs(mid_results)
+    late_group_list.extend_runs_to_last_group(late_results)
+    late_bits = late_group_list.bits
+    print(f"Late group list bits: {late_bits}")
+    # Finally, group each separately, as if double anomaly happened.
+    double_group_list = common_group_list.copy()
+    double_group_list.append_group_of_runs(mid_results)
+    double_group_list.append_group_of_runs(late_results)
+    double_bits = double_group_list.bits
+    print(f"Double group list bits: {double_bits}")
+    single_bits = min(early_bits, late_bits)
+    if double_bits <= single_bits:
+        # In this case, comparing early_bits with late_bits is not the best,
+        # as that would probably select based on stdev, not based on diff.
+        # Example: mid (small stdev) is closer to early (small stdev),
+        # and farther from late (big stdev).
+        # As grouping mid with early would increase their combined stdev,
+        # it is not selected. This means a noisy late bound can affect
+        # what human perceives as the more interesting region.
+        # So we select only based on averages.
+        print("Perhaps two different anomalies. Selecting by averages only.")
+        diff = single_bits - double_bits
+        print(f"Saved {diff} ({100*diff/single_bits}%) bits.")
+        if rel_diff_to_early > rel_diff_to_late:
+            print("The mid results are considered late.")
+            print("Preferring relative difference of averages:")
+            print(f"{100*rel_diff_to_early}% to {100*rel_diff_to_late}%.")
+            # rc==1 is when command is not found.
+            # rc==2 is when python interpreter does not find the script.
+            exit_code = 3
+        else:
+            print("The mid results are considered early.")
+            print("Preferring relative difference of averages:")
+            print(f"{100*rel_diff_to_late}% to {100*rel_diff_to_early}%.")
+            exit_code = 0
+    else:
+        # When difference of averages is within stdev,
+        # we let jumpavg decide, as here difference in stdev
+        # can be the more interesting signal.
+        diff = early_bits - late_bits
+        if early_bits > late_bits:
+            print("The mid results are considered late.")
+            print(f"Saved {diff} ({100*diff/early_bits}%) bits.")
+            print(f"New relative difference is {100*rel_diff_to_early}%.")
+            exit_code = 3
+        else:
+            print("The mid results are considered early.")
+            print(f"Saved {-diff} ({-100*diff/late_bits}%) bits.")
+            print(f"New relative difference is {100*rel_diff_to_late}%.")
+            exit_code = 0
+    print(f"Exit code {exit_code}")
+    return exit_code
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/resources/tools/integrated/compare_perpatch.py b/resources/tools/integrated/compare_perpatch.py
index 0adb6ae73e..9b04b7bdea 100644
--- a/resources/tools/integrated/compare_perpatch.py
+++ b/resources/tools/integrated/compare_perpatch.py
@@ -16,7 +16,7 @@
 This script expects a particular tree created on a filesystem by
 per_patch_perf.sh bootstrap script, including test results
 exported as json files according to a current model schema.
-This script extracts the results (according to tresult type)
+This script extracts the results (according to result type)
 and joins them into one list of floats for parent and one for current.
 
 This script then uses jumpavg library to determine whether there was
@@ -26,64 +26,10 @@ If the set of test names does not match, or there was a regression,
 this script votes -1 (by exiting with code 1), otherwise it votes +1 (exit 0).
 """
 
-import json
-import os
 import sys
 
-from typing import Dict, List
-
 from resources.libraries.python import jumpavg
-
-
-def parse(dirpath: str, fake_value: float) -> Dict[str, List[float]]:
-    """Looks for test jsons, extract scalar results.
-
-    Files other than .json are skipped, jsons without test_id are skipped.
-    If the test failed, four fake values are used as a fake result.
-
-    Units are ignored, as both parent and current are tested
-    with the same CSIT code so the unit should be identical.
-
-    :param dirpath: Path to the directory tree to examine.
-    :param fail_value: Fake value to use for test cases that failed.
-    :type dirpath: str
-    :returns: Mapping from test IDs to list of measured values.
-    :rtype: Dict[str, List[float]]
-    :raises RuntimeError: On duplicate test ID or unknown test type.
-    """
-    results = {}
-    for root, _, files in os.walk(dirpath):
-        for filename in files:
-            if not filename.endswith(".json"):
-                continue
-            filepath = os.path.join(root, filename)
-            with open(filepath, "rt", encoding="utf8") as file_in:
-                data = json.load(file_in)
-            if "test_id" not in data:
-                continue
-            name = data["test_id"]
-            if name in results:
-                raise RuntimeError(f"Duplicate: {name}")
-            if not data["passed"]:
-                results[name] = [fake_value] * 4
-                continue
-            result_object = data["result"]
-            result_type = result_object["type"]
-            if result_type == "mrr":
-                results[name] = result_object["receive_rate"]["rate"]["values"]
-            elif result_type == "ndrpdr":
-                results[name] = [result_object["pdr"]["lower"]["rate"]["value"]]
-            elif result_type == "soak":
-                results[name] = [
-                    result_object["critical_rate"]["lower"]["rate"]["value"]
-                ]
-            elif result_type == "reconf":
-                results[name] = [result_object["loss"]["time"]["value"]]
-            elif result_type == "hoststack":
-                results[name] = [result_object["bandwidth"]["value"]]
-            else:
-                raise RuntimeError(f"Unknown result type: {result_type}")
-    return results
+from resources.libraries.python.model.parse import parse
 
 
 def main() -> int: