resources/tools/integrated/compare_perpatch.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127

# Copyright (c) 2019 Cisco and/or its affiliates.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at:
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Script for determining whether per-patch perf test votes -1.

This script assumes there exist two text files with processed BMRR results,
located at hardcoded relative paths (subdirs thereof), having several lines
of json-parseable lists of float values, corresponding to testcase results.
This script then uses jumpavg library to determine whether there was
a regression, progression or no change for each testcase.
If number of tests does not match, or there was a regression,
this script votes -1 (by exiting with code 1), otherwise it votes +1 (exit 0).
"""

import json
import sys

from resources.libraries.python import jumpavg


def main():
    """Execute the main logic, return the code to return as return code.

    :returns: Return code, 0 or 3 based on the comparison result.
    :rtype: int
    """
    iteration = -1
    parent_iterations = list()
    current_iterations = list()
    num_tests = None
    while 1:
        iteration += 1
        parent_lines = list()
        current_lines = list()
        filename = f"csit_parent/{iteration}/results.txt"
        try:
            with open(filename) as parent_file:
                parent_lines = parent_file.readlines()
        except IOError:
            break
        num_lines = len(parent_lines)
        filename = f"csit_current/{iteration}/results.txt"
        with open(filename) as current_file:
            current_lines = current_file.readlines()
        if num_lines != len(current_lines):
            print(
                f"Number of tests does not match within iteration {iteration}",
                file=sys.stderr
            )
            return 1
        if num_tests is None:
            num_tests = num_lines
        elif num_tests != num_lines:
            print(
                f"Number of tests does not match previous at iteration "
                f"{iteration}", file=sys.stderr
            )
            return 1
        parent_iterations.append(parent_lines)
        current_iterations.append(current_lines)
    exit_code = 0
    for test_index in range(num_tests):
        parent_values = list()
        current_values = list()
        for iteration_index in range(len(parent_iterations)):
            parent_values.extend(
                json.loads(parent_iterations[iteration_index][test_index])
            )
            current_values.extend(
                json.loads(current_iterations[iteration_index][test_index])
            )
        print(f"Time-ordered MRR values for parent build: {parent_values}")
        print(f"Time-ordered MRR values for current build: {current_values}")
        parent_values = sorted(parent_values)
        current_values = sorted(current_values)
        max_value = max([1.0] + parent_values + current_values)
        parent_stats = jumpavg.AvgStdevStats.for_runs(parent_values)
        current_stats = jumpavg.AvgStdevStats.for_runs(current_values)
        parent_group_list = jumpavg.BitCountingGroupList(
            max_value=max_value).append_group_of_runs([parent_stats])
        combined_group_list = parent_group_list.copy(
            ).extend_runs_to_last_group([current_stats])
        separated_group_list = parent_group_list.append_group_of_runs(
            [current_stats])
        print(f"Value-ordered MRR values for parent build: {parent_values}")
        print(f"Value-ordered MRR values for current build: {current_values}")
        avg_diff = (current_stats.avg - parent_stats.avg) / parent_stats.avg
        print(f"Difference of averages relative to parent: {100 * avg_diff}%")
        print(f"Jumpavg representation of parent group: {parent_stats}")
        print(f"Jumpavg representation of current group: {current_stats}")
        print(
            f"Jumpavg representation of both as one group:"
            f" {combined_group_list[0].stats}"
        )
        bits_diff = separated_group_list.bits - combined_group_list.bits
        compared = u"longer" if bits_diff >= 0 else u"shorter"
        print(
            f"Separate groups are {compared} than single group"
            f" by {abs(bits_diff)} bits"
        )
        # TODO: Version of classify that takes max_value and list of stats?
        # That matters if only stats (not list of floats) are given.
        classified_list = jumpavg.classify([parent_values, current_values])
        if len(classified_list) < 2:
            print(f"Test test_index {test_index}: normal (no anomaly)")
            continue
        anomaly = classified_list[1].comment
        if anomaly == u"regression":
            print(f"Test test_index {test_index}: anomaly regression")
            exit_code = 3  # 1 or 2 can be caused by other errors
            continue
        print(f"Test test_index {test_index}: anomaly {anomaly}")
    print(f"Exit code: {exit_code}")
    return exit_code

if __name__ == u"__main__":
    sys.exit(main())