diff options
Diffstat (limited to 'resources/tools/integrated')
-rw-r--r-- | resources/tools/integrated/__init__.py | 2 | ||||
-rw-r--r-- | resources/tools/integrated/check_crc.py | 7 | ||||
-rw-r--r-- | resources/tools/integrated/compare_bisect.py | 134 | ||||
-rw-r--r-- | resources/tools/integrated/compare_perpatch.py | 129 |
4 files changed, 205 insertions, 67 deletions
diff --git a/resources/tools/integrated/__init__.py b/resources/tools/integrated/__init__.py index a708b16817..fed3209ecf 100644 --- a/resources/tools/integrated/__init__.py +++ b/resources/tools/integrated/__init__.py @@ -1,4 +1,4 @@ -# Copyright (c) 2018 Cisco and/or its affiliates. +# Copyright (c) 2021 Cisco and/or its affiliates. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at: diff --git a/resources/tools/integrated/check_crc.py b/resources/tools/integrated/check_crc.py index 1d98a467e2..01a557c30c 100644 --- a/resources/tools/integrated/check_crc.py +++ b/resources/tools/integrated/check_crc.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020 Cisco and/or its affiliates. +# Copyright (c) 2023 Cisco and/or its affiliates. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at: @@ -47,6 +47,7 @@ def main(): checker = VppApiCrcChecker(api_dir) try: checker.report_initial_conflicts(report_missing=True) + checker.print_warnings() except RuntimeError as err: stderr_lines = [ f"{err!r}", @@ -62,8 +63,8 @@ def main(): u"", u"In the former case, please consult the following document", u"to see how to make CSIT accept the .api editing change.", - u"https://github.com/FDio/csit/blob/master/docs/automating_vpp_api_flag_day.rst" - u"", + u"https://github.com/FDio/csit/blob/master/docs/" + u"automating_vpp_api_flag_day.rst", u"For the latter case, please rebase the patch to see", u"if that fixes the problem. If repeated rebases do not help", u"send and email to csit-dev@lists.fd.io asking to investigate.", diff --git a/resources/tools/integrated/compare_bisect.py b/resources/tools/integrated/compare_bisect.py new file mode 100644 index 0000000000..7a48bcc2af --- /dev/null +++ b/resources/tools/integrated/compare_bisect.py @@ -0,0 +1,134 @@ +# Copyright (c) 2024 Cisco and/or its affiliates. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Script for analyzing 3 result sets for "git bisect" purposes. + +Jumpavg library is used for comparing description length of three groupings. +The mid result is grouped with early or late result, or as a separate group. +The jump we are looking for is between the mid and the smaller group +of the grouping with less bits. +Except when a grouping with all three sets as separate groups is the smallest. +In that case we chose the bigger difference in averages. +""" + +import sys + +from typing import List, Tuple + +from resources.libraries.python import jumpavg +from resources.libraries.python.model.parse import parse + + +def read_from_dir(dirname: str) -> Tuple[List[float], float]: + """Parse samples from dir, print them and stats, return them as list. + + In case there are more test cases, their results are concatenated. + + :param direname: The directory name (maybe with path) to parse. + :type dirname: str + :returns: The samples, deserialized from json, and the average. + :rtype: Tuple[List[float], float] + :raises RuntimeError: On parsing error. + """ + results = parse(dirname) + samples = [] + for result in results.values(): + samples.extend(result) + print(f"Read {dirname}: {samples!r}") + stats = jumpavg.AvgStdevStats.for_runs(samples) + print(f"Stats: {stats!r}") + return samples, stats.avg + + +def main() -> int: + """Execute the main logic, return the return code. + + :returns: The return code, 0 or 3 depending on the comparison result. + :rtype: int + """ + early_results, early_avg = read_from_dir("csit_early") + late_results, late_avg = read_from_dir("csit_late") + mid_results, mid_avg = read_from_dir("csit_mid") + max_early, abs_diff_late = max(early_avg, mid_avg), abs(late_avg - mid_avg) + max_late, abs_diff_early = max(late_avg, mid_avg), abs(early_avg - mid_avg) + rel_diff_early = abs_diff_early / max_early if max_early else 0.0 + rel_diff_late = abs_diff_late / max_late if max_late else 0.0 + max_value = max(early_results + mid_results + late_results) + # Create a common group list with just the early group. + common_group_list = jumpavg.BitCountingGroupList( + max_value=max_value + ).append_group_of_runs(early_results) + # Try grouping the mid with the early. + early_group_list = common_group_list.copy() + early_group_list.extend_runs_to_last_group(mid_results) + early_group_list.append_group_of_runs(late_results) + early_bits = early_group_list.bits + print(f"Early group list bits: {early_bits}") + # Now the same, but grouping the mid with the late. + late_group_list = common_group_list.copy() + late_group_list.append_group_of_runs(mid_results) + late_group_list.extend_runs_to_last_group(late_results) + late_bits = late_group_list.bits + print(f"Late group list bits: {late_bits}") + # Finally, group each separately, as if double anomaly happened. + double_group_list = common_group_list.copy() + double_group_list.append_group_of_runs(mid_results) + double_group_list.append_group_of_runs(late_results) + double_bits = double_group_list.bits + print(f"Double group list bits: {double_bits}") + single_bits = min(early_bits, late_bits) + if double_bits <= single_bits: + # In this case, comparing early_bits with late_bits is not the best, + # as that would probably select based on stdev, not based on diff. + # Example: mid (small stdev) is closer to early (small stdev), + # and farther from late (big stdev). + # As grouping mid with early would increase their combined stdev, + # it is not selected. This means a noisy late bound can affect + # what human perceives as the more interesting region. + # So we select only based on averages. + print("Perhaps two different anomalies. Selecting by averages only.") + diff = single_bits - double_bits + print(f"Saved {diff} ({100*diff/single_bits}%) bits.") + if rel_diff_early > rel_diff_late: + print("The mid results are considered late.") + print("Preferring relative difference of averages:") + print(f"{100*rel_diff_early}% to {100*rel_diff_late}%.") + # rc==1 is when command is not found. + # rc==2 is when python interpreter does not find the script. + exit_code = 3 + else: + print("The mid results are considered early.") + print("Preferring relative difference of averages:") + print(f"{100*rel_diff_late}% to {100*rel_diff_early}%.") + exit_code = 0 + else: + # When difference of averages is within stdev, + # we let jumpavg decide, as here difference in stdev + # can be the more interesting signal. + diff = early_bits - late_bits + if early_bits > late_bits: + print("The mid results are considered late.") + print(f"Saved {diff} ({100*diff/early_bits}%) bits.") + print(f"New relative difference is {100*rel_diff_early}%.") + exit_code = 3 + else: + print("The mid results are considered early.") + print(f"Saved {-diff} ({-100*diff/late_bits}%) bits.") + print(f"New relative difference is {100*rel_diff_late}%.") + exit_code = 0 + print(f"Exit code {exit_code}") + return exit_code + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/resources/tools/integrated/compare_perpatch.py b/resources/tools/integrated/compare_perpatch.py index 62e55629f6..59ea7e5d0f 100644 --- a/resources/tools/integrated/compare_perpatch.py +++ b/resources/tools/integrated/compare_perpatch.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019 Cisco and/or its affiliates. +# Copyright (c) 2024 Cisco and/or its affiliates. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at: @@ -13,72 +13,72 @@ """Script for determining whether per-patch perf test votes -1. -This script assumes there exist two text files with processed BMRR results, -located at hardcoded relative paths (subdirs thereof), having several lines -of json-parseable lists of float values, corresponding to testcase results. +This script expects a particular tree created on a filesystem by +per_patch_perf.sh bootstrap script, including test results +exported as json files according to a current model schema. +This script extracts the results (according to result type) +and joins them into one list of floats for parent and one for current. + This script then uses jumpavg library to determine whether there was a regression, progression or no change for each testcase. -If number of tests does not match, or there was a regression, + +If the set of test names does not match, or there was a regression, this script votes -1 (by exiting with code 1), otherwise it votes +1 (exit 0). """ -import json import sys from resources.libraries.python import jumpavg +from resources.libraries.python.model.parse import parse + + +def main() -> int: + """Execute the main logic, return a number to return as the return code. + Call parse to get parent and current data. + Use higher fake value for parent, so changes that keep a test failing + are marked as regressions. -def main(): - """Execute the main logic, return the code to return as return code. + If there are multiple iterations, the value lists are joined. + For each test, call jumpavg.classify to detect possible regression. + + If there is at least one regression, return 3. :returns: Return code, 0 or 3 based on the comparison result. :rtype: int """ iteration = -1 - parent_iterations = list() - current_iterations = list() - num_tests = None + parent_aggregate = {} + current_aggregate = {} + test_names = None while 1: iteration += 1 - parent_lines = list() - current_lines = list() - filename = f"csit_parent/{iteration}/results.txt" - try: - with open(filename) as parent_file: - parent_lines = parent_file.readlines() - except IOError: + parent_results = {} + current_results = {} + parent_results = parse(f"csit_parent/{iteration}", fake_value=2.0) + parent_names = list(parent_results) + if test_names is None: + test_names = parent_names + if not parent_names: + # No more iterations. break - num_lines = len(parent_lines) - filename = f"csit_current/{iteration}/results.txt" - with open(filename) as current_file: - current_lines = current_file.readlines() - if num_lines != len(current_lines): - print( - f"Number of tests does not match within iteration {iteration}", - file=sys.stderr - ) - return 1 - if num_tests is None: - num_tests = num_lines - elif num_tests != num_lines: - print( - f"Number of tests does not match previous at iteration " - f"{iteration}", file=sys.stderr - ) - return 1 - parent_iterations.append(parent_lines) - current_iterations.append(current_lines) + assert parent_names == test_names, f"{parent_names} != {test_names}" + current_results = parse(f"csit_current/{iteration}", fake_value=1.0) + current_names = list(current_results) + assert ( + current_names == parent_names + ), f"{current_names} != {parent_names}" + for name in test_names: + if name not in parent_aggregate: + parent_aggregate[name] = [] + if name not in current_aggregate: + current_aggregate[name] = [] + parent_aggregate[name].extend(parent_results[name]) + current_aggregate[name].extend(current_results[name]) exit_code = 0 - for test_index in range(num_tests): - parent_values = list() - current_values = list() - for iteration_index in range(len(parent_iterations)): - parent_values.extend( - json.loads(parent_iterations[iteration_index][test_index]) - ) - current_values.extend( - json.loads(current_iterations[iteration_index][test_index]) - ) + for name in test_names: + parent_values = parent_aggregate[name] + current_values = current_aggregate[name] print(f"Time-ordered MRR values for parent build: {parent_values}") print(f"Time-ordered MRR values for current build: {current_values}") parent_values = sorted(parent_values) @@ -87,11 +87,14 @@ def main(): parent_stats = jumpavg.AvgStdevStats.for_runs(parent_values) current_stats = jumpavg.AvgStdevStats.for_runs(current_values) parent_group_list = jumpavg.BitCountingGroupList( - max_value=max_value).append_group_of_runs([parent_stats]) - combined_group_list = parent_group_list.copy( - ).extend_runs_to_last_group([current_stats]) + max_value=max_value + ).append_group_of_runs([parent_stats]) + combined_group_list = ( + parent_group_list.copy().extend_runs_to_last_group([current_stats]) + ) separated_group_list = parent_group_list.append_group_of_runs( - [current_stats]) + [current_stats] + ) print(f"Value-ordered MRR values for parent build: {parent_values}") print(f"Value-ordered MRR values for current build: {current_values}") avg_diff = (current_stats.avg - parent_stats.avg) / parent_stats.avg @@ -103,7 +106,7 @@ def main(): f" {combined_group_list[0].stats}" ) bits_diff = separated_group_list.bits - combined_group_list.bits - compared = u"longer" if bits_diff >= 0 else u"shorter" + compared = "longer" if bits_diff >= 0 else "shorter" print( f"Separate groups are {compared} than single group" f" by {abs(bits_diff)} bits" @@ -111,17 +114,17 @@ def main(): # TODO: Version of classify that takes max_value and list of stats? # That matters if only stats (not list of floats) are given. classified_list = jumpavg.classify([parent_values, current_values]) - if len(classified_list) < 2: - print(f"Test test_index {test_index}: normal (no anomaly)") - continue - anomaly = classified_list[1].comment - if anomaly == u"regression": - print(f"Test test_index {test_index}: anomaly regression") - exit_code = 3 # 1 or 2 can be caused by other errors - continue - print(f"Test test_index {test_index}: anomaly {anomaly}") + anomaly_name = "normal (no anomaly)" + if len(classified_list) > 1: + anomaly = classified_list[1].comment + anomaly_name = "anomaly progression" + if anomaly == "regression": + anomaly_name = "anomaly regression" + exit_code = 3 # 1 or 2 can be caused by other errors + print(f"Test name {name}: {anomaly_name}") print(f"Exit code: {exit_code}") return exit_code -if __name__ == u"__main__": + +if __name__ == "__main__": sys.exit(main()) |