4 files changed, 205 insertions, 67 deletions
diff --git a/resources/tools/integrated/__init__.py b/resources/tools/integrated/__init__.py
index a708b16817..fed3209ecf 100644
--- a/resources/tools/integrated/__init__.py
+++ b/resources/tools/integrated/__init__.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2018 Cisco and/or its affiliates.
+# Copyright (c) 2021 Cisco and/or its affiliates.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at:
diff --git a/resources/tools/integrated/check_crc.py b/resources/tools/integrated/check_crc.py
index 1d98a467e2..01a557c30c 100644
--- a/resources/tools/integrated/check_crc.py
+++ b/resources/tools/integrated/check_crc.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2020 Cisco and/or its affiliates.
+# Copyright (c) 2023 Cisco and/or its affiliates.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at:
@@ -47,6 +47,7 @@ def main():
     checker = VppApiCrcChecker(api_dir)
     try:
         checker.report_initial_conflicts(report_missing=True)
+        checker.print_warnings()
     except RuntimeError as err:
         stderr_lines = [
             f"{err!r}",
@@ -62,8 +63,8 @@ def main():
             u"",
             u"In the former case, please consult the following document",
             u"to see how to make CSIT accept the .api editing change.",
-            u"https://github.com/FDio/csit/blob/master/docs/automating_vpp_api_flag_day.rst"
-            u"",
+            u"https://github.com/FDio/csit/blob/master/docs/"
+            u"automating_vpp_api_flag_day.rst",
             u"For the latter case, please rebase the patch to see",
             u"if that fixes the problem. If repeated rebases do not help",
             u"send and email to csit-dev@lists.fd.io asking to investigate.",
diff --git a/resources/tools/integrated/compare_bisect.py b/resources/tools/integrated/compare_bisect.py
new file mode 100644
index 0000000000..7a48bcc2af
--- /dev/null
+++ b/resources/tools/integrated/compare_bisect.py
@@ -0,0 +1,134 @@
+# Copyright (c) 2024 Cisco and/or its affiliates.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Script for analyzing 3 result sets for "git bisect" purposes.
+
+Jumpavg library is used for comparing description length of three groupings.
+The mid result is grouped with early or late result, or as a separate group.
+The jump we are looking for is between the mid and the smaller group
+of the grouping with less bits.
+Except when a grouping with all three sets as separate groups is the smallest.
+In that case we chose the bigger difference in averages.
+"""
+
+import sys
+
+from typing import List, Tuple
+
+from resources.libraries.python import jumpavg
+from resources.libraries.python.model.parse import parse
+
+
+def read_from_dir(dirname: str) -> Tuple[List[float], float]:
+    """Parse samples from dir, print them and stats, return them as list.
+
+    In case there are more test cases, their results are concatenated.
+
+    :param direname: The directory name (maybe with path) to parse.
+    :type dirname: str
+    :returns: The samples, deserialized from json, and the average.
+    :rtype: Tuple[List[float], float]
+    :raises RuntimeError: On parsing error.
+    """
+    results = parse(dirname)
+    samples = []
+    for result in results.values():
+        samples.extend(result)
+    print(f"Read {dirname}: {samples!r}")
+    stats = jumpavg.AvgStdevStats.for_runs(samples)
+    print(f"Stats: {stats!r}")
+    return samples, stats.avg
+
+
+def main() -> int:
+    """Execute the main logic, return the return code.
+
+    :returns: The return code, 0 or 3 depending on the comparison result.
+    :rtype: int
+    """
+    early_results, early_avg = read_from_dir("csit_early")
+    late_results, late_avg = read_from_dir("csit_late")
+    mid_results, mid_avg = read_from_dir("csit_mid")
+    max_early, abs_diff_late = max(early_avg, mid_avg), abs(late_avg - mid_avg)
+    max_late, abs_diff_early = max(late_avg, mid_avg), abs(early_avg - mid_avg)
+    rel_diff_early = abs_diff_early / max_early if max_early else 0.0
+    rel_diff_late = abs_diff_late / max_late if max_late else 0.0
+    max_value = max(early_results + mid_results + late_results)
+    # Create a common group list with just the early group.
+    common_group_list = jumpavg.BitCountingGroupList(
+        max_value=max_value
+    ).append_group_of_runs(early_results)
+    # Try grouping the mid with the early.
+    early_group_list = common_group_list.copy()
+    early_group_list.extend_runs_to_last_group(mid_results)
+    early_group_list.append_group_of_runs(late_results)
+    early_bits = early_group_list.bits
+    print(f"Early group list bits: {early_bits}")
+    # Now the same, but grouping the mid with the late.
+    late_group_list = common_group_list.copy()
+    late_group_list.append_group_of_runs(mid_results)
+    late_group_list.extend_runs_to_last_group(late_results)
+    late_bits = late_group_list.bits
+    print(f"Late group list bits: {late_bits}")
+    # Finally, group each separately, as if double anomaly happened.
+    double_group_list = common_group_list.copy()
+    double_group_list.append_group_of_runs(mid_results)
+    double_group_list.append_group_of_runs(late_results)
+    double_bits = double_group_list.bits
+    print(f"Double group list bits: {double_bits}")
+    single_bits = min(early_bits, late_bits)
+    if double_bits <= single_bits:
+        # In this case, comparing early_bits with late_bits is not the best,
+        # as that would probably select based on stdev, not based on diff.
+        # Example: mid (small stdev) is closer to early (small stdev),
+        # and farther from late (big stdev).
+        # As grouping mid with early would increase their combined stdev,
+        # it is not selected. This means a noisy late bound can affect
+        # what human perceives as the more interesting region.
+        # So we select only based on averages.
+        print("Perhaps two different anomalies. Selecting by averages only.")
+        diff = single_bits - double_bits
+        print(f"Saved {diff} ({100*diff/single_bits}%) bits.")
+        if rel_diff_early > rel_diff_late:
+            print("The mid results are considered late.")
+            print("Preferring relative difference of averages:")
+            print(f"{100*rel_diff_early}% to {100*rel_diff_late}%.")
+            # rc==1 is when command is not found.
+            # rc==2 is when python interpreter does not find the script.
+            exit_code = 3
+        else:
+            print("The mid results are considered early.")
+            print("Preferring relative difference of averages:")
+            print(f"{100*rel_diff_late}% to {100*rel_diff_early}%.")
+            exit_code = 0
+    else:
+        # When difference of averages is within stdev,
+        # we let jumpavg decide, as here difference in stdev
+        # can be the more interesting signal.
+        diff = early_bits - late_bits
+        if early_bits > late_bits:
+            print("The mid results are considered late.")
+            print(f"Saved {diff} ({100*diff/early_bits}%) bits.")
+            print(f"New relative difference is {100*rel_diff_early}%.")
+            exit_code = 3
+        else:
+            print("The mid results are considered early.")
+            print(f"Saved {-diff} ({-100*diff/late_bits}%) bits.")
+            print(f"New relative difference is {100*rel_diff_late}%.")
+            exit_code = 0
+    print(f"Exit code {exit_code}")
+    return exit_code
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/resources/tools/integrated/compare_perpatch.py b/resources/tools/integrated/compare_perpatch.py
index 62e55629f6..59ea7e5d0f 100644
--- a/resources/tools/integrated/compare_perpatch.py
+++ b/resources/tools/integrated/compare_perpatch.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2019 Cisco and/or its affiliates.
+# Copyright (c) 2024 Cisco and/or its affiliates.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at:
@@ -13,72 +13,72 @@
 
 """Script for determining whether per-patch perf test votes -1.
 
-This script assumes there exist two text files with processed BMRR results,
-located at hardcoded relative paths (subdirs thereof), having several lines
-of json-parseable lists of float values, corresponding to testcase results.
+This script expects a particular tree created on a filesystem by
+per_patch_perf.sh bootstrap script, including test results
+exported as json files according to a current model schema.
+This script extracts the results (according to result type)
+and joins them into one list of floats for parent and one for current.
+
 This script then uses jumpavg library to determine whether there was
 a regression, progression or no change for each testcase.
-If number of tests does not match, or there was a regression,
+
+If the set of test names does not match, or there was a regression,
 this script votes -1 (by exiting with code 1), otherwise it votes +1 (exit 0).
 """
 
-import json
 import sys
 
 from resources.libraries.python import jumpavg
+from resources.libraries.python.model.parse import parse
+
+
+def main() -> int:
+    """Execute the main logic, return a number to return as the return code.
 
+    Call parse to get parent and current data.
+    Use higher fake value for parent, so changes that keep a test failing
+    are marked as regressions.
 
-def main():
-    """Execute the main logic, return the code to return as return code.
+    If there are multiple iterations, the value lists are joined.
+    For each test, call jumpavg.classify to detect possible regression.
+
+    If there is at least one regression, return 3.
 
     :returns: Return code, 0 or 3 based on the comparison result.
     :rtype: int
     """
     iteration = -1
-    parent_iterations = list()
-    current_iterations = list()
-    num_tests = None
+    parent_aggregate = {}
+    current_aggregate = {}
+    test_names = None
     while 1:
         iteration += 1
-        parent_lines = list()
-        current_lines = list()
-        filename = f"csit_parent/{iteration}/results.txt"
-        try:
-            with open(filename) as parent_file:
-                parent_lines = parent_file.readlines()
-        except IOError:
+        parent_results = {}
+        current_results = {}
+        parent_results = parse(f"csit_parent/{iteration}", fake_value=2.0)
+        parent_names = list(parent_results)
+        if test_names is None:
+            test_names = parent_names
+        if not parent_names:
+            # No more iterations.
             break
-        num_lines = len(parent_lines)
-        filename = f"csit_current/{iteration}/results.txt"
-        with open(filename) as current_file:
-            current_lines = current_file.readlines()
-        if num_lines != len(current_lines):
-            print(
-                f"Number of tests does not match within iteration {iteration}",
-                file=sys.stderr
-            )
-            return 1
-        if num_tests is None:
-            num_tests = num_lines
-        elif num_tests != num_lines:
-            print(
-                f"Number of tests does not match previous at iteration "
-                f"{iteration}", file=sys.stderr
-            )
-            return 1
-        parent_iterations.append(parent_lines)
-        current_iterations.append(current_lines)
+        assert parent_names == test_names, f"{parent_names} != {test_names}"
+        current_results = parse(f"csit_current/{iteration}", fake_value=1.0)
+        current_names = list(current_results)
+        assert (
+            current_names == parent_names
+        ), f"{current_names} != {parent_names}"
+        for name in test_names:
+            if name not in parent_aggregate:
+                parent_aggregate[name] = []
+            if name not in current_aggregate:
+                current_aggregate[name] = []
+            parent_aggregate[name].extend(parent_results[name])
+            current_aggregate[name].extend(current_results[name])
     exit_code = 0
-    for test_index in range(num_tests):
-        parent_values = list()
-        current_values = list()
-        for iteration_index in range(len(parent_iterations)):
-            parent_values.extend(
-                json.loads(parent_iterations[iteration_index][test_index])
-            )
-            current_values.extend(
-                json.loads(current_iterations[iteration_index][test_index])
-            )
+    for name in test_names:
+        parent_values = parent_aggregate[name]
+        current_values = current_aggregate[name]
         print(f"Time-ordered MRR values for parent build: {parent_values}")
         print(f"Time-ordered MRR values for current build: {current_values}")
         parent_values = sorted(parent_values)
@@ -87,11 +87,14 @@ def main():
         parent_stats = jumpavg.AvgStdevStats.for_runs(parent_values)
         current_stats = jumpavg.AvgStdevStats.for_runs(current_values)
         parent_group_list = jumpavg.BitCountingGroupList(
-            max_value=max_value).append_group_of_runs([parent_stats])
-        combined_group_list = parent_group_list.copy(
-            ).extend_runs_to_last_group([current_stats])
+            max_value=max_value
+        ).append_group_of_runs([parent_stats])
+        combined_group_list = (
+            parent_group_list.copy().extend_runs_to_last_group([current_stats])
+        )
         separated_group_list = parent_group_list.append_group_of_runs(
-            [current_stats])
+            [current_stats]
+        )
         print(f"Value-ordered MRR values for parent build: {parent_values}")
         print(f"Value-ordered MRR values for current build: {current_values}")
         avg_diff = (current_stats.avg - parent_stats.avg) / parent_stats.avg
@@ -103,7 +106,7 @@ def main():
             f" {combined_group_list[0].stats}"
         )
         bits_diff = separated_group_list.bits - combined_group_list.bits
-        compared = u"longer" if bits_diff >= 0 else u"shorter"
+        compared = "longer" if bits_diff >= 0 else "shorter"
         print(
             f"Separate groups are {compared} than single group"
             f" by {abs(bits_diff)} bits"
@@ -111,17 +114,17 @@ def main():
         # TODO: Version of classify that takes max_value and list of stats?
         # That matters if only stats (not list of floats) are given.
         classified_list = jumpavg.classify([parent_values, current_values])
-        if len(classified_list) < 2:
-            print(f"Test test_index {test_index}: normal (no anomaly)")
-            continue
-        anomaly = classified_list[1].comment
-        if anomaly == u"regression":
-            print(f"Test test_index {test_index}: anomaly regression")
-            exit_code = 3  # 1 or 2 can be caused by other errors
-            continue
-        print(f"Test test_index {test_index}: anomaly {anomaly}")
+        anomaly_name = "normal (no anomaly)"
+        if len(classified_list) > 1:
+            anomaly = classified_list[1].comment
+            anomaly_name = "anomaly progression"
+            if anomaly == "regression":
+                anomaly_name = "anomaly regression"
+                exit_code = 3  # 1 or 2 can be caused by other errors
+        print(f"Test name {name}: {anomaly_name}")
     print(f"Exit code: {exit_code}")
     return exit_code
 
-if __name__ == u"__main__":
+
+if __name__ == "__main__":
     sys.exit(main())