resources/tools/integrated/compare_perpatch.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186

# Copyright (c) 2023 Cisco and/or its affiliates.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at:
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Script for determining whether per-patch perf test votes -1.

This script expects a particular tree created on a filesystem by
per_patch_perf.sh bootstrap script, including test results
exported as json files according to a current model schema.
This script extracts the results (according to tresult type)
and joins them into one list of floats for parent and one for current.

This script then uses jumpavg library to determine whether there was
a regression, progression or no change for each testcase.

If the set of test names does not match, or there was a regression,
this script votes -1 (by exiting with code 1), otherwise it votes +1 (exit 0).
"""

import json
import os
import sys

from typing import Dict, List

from resources.libraries.python import jumpavg


def parse(dirpath: str, fake_value: float) -> Dict[str, List[float]]:
    """Looks for test jsons, extract scalar results.

    Files other than .json are skipped, jsons without test_id are skipped.
    If the test failed, four fake values are used as a fake result.

    Units are ignored, as both parent and current are tested
    with the same CSIT code so the unit should be identical.

    :param dirpath: Path to the directory tree to examine.
    :param fail_value: Fake value to use for test cases that failed.
    :type dirpath: str
    :returns: Mapping from test IDs to list of measured values.
    :rtype: Dict[str, List[float]]
    :raises RuntimeError: On duplicate test ID or unknown test type.
    """
    results = {}
    for root, _, files in os.walk(dirpath):
        for filename in files:
            if not filename.endswith(".json"):
                continue
            filepath = os.path.join(root, filename)
            with open(filepath, "rt", encoding="utf8") as file_in:
                data = json.load(file_in)
            if "test_id" not in data:
                continue
            name = data["test_id"]
            if name in results:
                raise RuntimeError(f"Duplicate: {name}")
            if not data["passed"]:
                results[name] = [fake_value] * 4
                continue
            result_object = data["result"]
            result_type = result_object["type"]
            if result_type == "mrr":
                results[name] = result_object["receive_rate"]["rate"]["values"]
            elif result_type == "ndrpdr":
                results[name] = [result_object["pdr"]["lower"]["rate"]["value"]]
            elif result_type == "soak":
                results[name] = [
                    result_object["critical_rate"]["lower"]["rate"]["value"]
                ]
            elif result_type == "reconf":
                results[name] = [result_object["loss"]["time"]["value"]]
            elif result_type == "hoststack":
                results[name] = [result_object["bandwidth"]["value"]]
            else:
                raise RuntimeError(f"Unknown result type: {result_type}")
    return results


def main() -> int:
    """Execute the main logic, return a number to return as the return code.

    Call parse to get parent and current data.
    Use higher fake value for parent, so changes that keep a test failing
    are marked as regressions.

    If there are multiple iterations, the value lists are joined.
    For each test, call jumpavg.classify to detect possible regression.

    If there is at least one regression, return 3.

    :returns: Return code, 0 or 3 based on the comparison result.
    :rtype: int
    """
    iteration = -1
    parent_aggregate = {}
    current_aggregate = {}
    test_names = None
    while 1:
        iteration += 1
        parent_results = {}
        current_results = {}
        parent_results = parse(f"csit_parent/{iteration}", fake_value=2.0)
        parent_names = set(parent_results.keys())
        if test_names is None:
            test_names = parent_names
        if not parent_names:
            # No more iterations.
            break
        assert parent_names == test_names, f"{parent_names} != {test_names}"
        current_results = parse(f"csit_current/{iteration}", fake_value=1.0)
        current_names = set(current_results.keys())
        assert (
            current_names == parent_names
        ), f"{current_names} != {parent_names}"
        for name in test_names:
            if name not in parent_aggregate:
                parent_aggregate[name] = []
            if name not in current_aggregate:
                current_aggregate[name] = []
            parent_aggregate[name].extend(parent_results[name])
            current_aggregate[name].extend(current_results[name])
    exit_code = 0
    for name in test_names:
        print(f"Test name: {name}")
        parent_values = parent_aggregate[name]
        current_values = current_aggregate[name]
        print(f"Time-ordered MRR values for parent build: {parent_values}")
        print(f"Time-ordered MRR values for current build: {current_values}")
        parent_values = sorted(parent_values)
        current_values = sorted(current_values)
        max_value = max([1.0] + parent_values + current_values)
        parent_stats = jumpavg.AvgStdevStats.for_runs(parent_values)
        current_stats = jumpavg.AvgStdevStats.for_runs(current_values)
        parent_group_list = jumpavg.BitCountingGroupList(
            max_value=max_value
        ).append_group_of_runs([parent_stats])
        combined_group_list = (
            parent_group_list.copy().extend_runs_to_last_group([current_stats])
        )
        separated_group_list = parent_group_list.append_group_of_runs(
            [current_stats]
        )
        print(f"Value-ordered MRR values for parent build: {parent_values}")
        print(f"Value-ordered MRR values for current build: {current_values}")
        avg_diff = (current_stats.avg - parent_stats.avg) / parent_stats.avg
        print(f"Difference of averages relative to parent: {100 * avg_diff}%")
        print(f"Jumpavg representation of parent group: {parent_stats}")
        print(f"Jumpavg representation of current group: {current_stats}")
        print(
            f"Jumpavg representation of both as one group:"
            f" {combined_group_list[0].stats}"
        )
        bits_diff = separated_group_list.bits - combined_group_list.bits
        compared = "longer" if bits_diff >= 0 else "shorter"
        print(
            f"Separate groups are {compared} than single group"
            f" by {abs(bits_diff)} bits"
        )
        # TODO: Version of classify that takes max_value and list of stats?
        # That matters if only stats (not list of floats) are given.
        classified_list = jumpavg.classify([parent_values, current_values])
        if len(classified_list) < 2:
            print(f"Test {name}: normal (no anomaly)")
            continue
        anomaly = classified_list[1].comment
        if anomaly == "regression":
            print(f"Test {name}: anomaly regression")
            exit_code = 3  # 1 or 2 can be caused by other errors
            continue
        print(f"Test {name}: anomaly {anomaly}")
    print(f"Exit code: {exit_code}")
    return exit_code


if __name__ == "__main__":
    sys.exit(main())