resources/tools/scripts/compare_perpatch.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131

# Copyright (c) 2019 Cisco and/or its affiliates.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at:
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Script for determining whether per-patch perf test votes -1.

This script assumes there exist two text files with processed BMRR results,
located at hardcoded relative paths (subdirs thereof), having several lines
of json-parseable lists of float values, corresponding to testcase results.
This script then uses jumpavg library to determine whether there was
a regression, progression or no change for each testcase.
If number of tests does not match, or there was a regression,
this script votes -1 (by exiting with code 1), otherwise it votes +1 (exit 0).
"""

import json
import sys

from jumpavg.BitCountingMetadataFactory import BitCountingMetadataFactory
from jumpavg.BitCountingClassifier import BitCountingClassifier


def hack(value_list):
    """Return middle two quartiles, hoping to reduce influence of outliers.

    Currently "middle two" is "all", but that can change in future.

    :param value_list: List to pick subset from.
    :type value_list: list of float
    :returns: New list containing middle values.
    :rtype: list of float
    """
    tmp = sorted(value_list)
    eight = len(tmp) / 8
    ret = tmp[3*eight:-eight]
    return tmp # ret

iteration = -1
parent_iterations = list()
current_iterations = list()
num_tests = None
while 1:
    iteration += 1
    parent_lines = list()
    current_lines = list()
    filename = "csit_parent/{iter}/results.txt".format(iter=iteration)
    try:
        with open(filename) as parent_file:
            parent_lines = parent_file.readlines()
    except IOError:
        break
    num_lines = len(parent_lines)
    filename = "csit_current/{iter}/results.txt".format(iter=iteration)
    with open(filename) as current_file:
        current_lines = current_file.readlines()
    if num_lines != len(current_lines):
        print "Number of tests does not match within iteration", iteration
        sys.exit(1)
    if num_tests is None:
        num_tests = num_lines
    elif num_tests != num_lines:
        print "Number of tests does not match previous at iteration", iteration
        sys.exit(1)
    parent_iterations.append(parent_lines)
    current_iterations.append(current_lines)
classifier = BitCountingClassifier()
exit_code = 0
for test_index in range(num_tests):
    val_max = 1.0
    parent_values = list()
    current_values = list()
    for iteration_index in range(len(parent_iterations)):
        parent_values.extend(
            json.loads(parent_iterations[iteration_index][test_index]))
        current_values.extend(
            json.loads(current_iterations[iteration_index][test_index]))
    print "Time-ordered MRR values for parent build: {p}".format(
        p=parent_values)
    print "Time-ordered MRR values for current build: {c}".format(
        c=current_values)
    parent_values = hack(parent_values)
    current_values = hack(current_values)
    parent_max = BitCountingMetadataFactory.find_max_value(parent_values)
    current_max = BitCountingMetadataFactory.find_max_value(current_values)
    val_max = max(val_max, parent_max, current_max)
    factory = BitCountingMetadataFactory(val_max)
    parent_stats = factory.from_data(parent_values)
    current_factory = BitCountingMetadataFactory(val_max, parent_stats.avg)
    current_stats = current_factory.from_data(current_values)
    both_stats = factory.from_data(parent_values + current_values)
    print "Value-ordered MRR values for parent build: {p}".format(
        p=parent_values)
    print "Value-ordered MRR values for current build: {c}".format(
        c=current_values)
    difference = (current_stats.avg - parent_stats.avg) / parent_stats.avg
    print "Difference of averages relative to parent: {d}%".format(
        d=100 * difference)
    print "Jumpavg representation of parent group: {p}".format(
        p=parent_stats)
    print "Jumpavg representation of current group: {c}".format(
        c=current_stats)
    print "Jumpavg representation of both as one group: {b}".format(
        b=both_stats)
    bits = parent_stats.bits + current_stats.bits - both_stats.bits
    compared = "longer" if bits >= 0 else "shorter"
    print "Separate groups are {cmp} than single group by {bit} bits".format(
        cmp=compared, bit=abs(bits))
    classified_list = classifier.classify([parent_stats, current_stats])
    if len(classified_list) < 2:
        print "Test test_index {test_index}: normal (no anomaly)".format(
            test_index=test_index)
        continue
    anomaly = classified_list[1].metadata.classification
    if anomaly == "regression":
        print "Test test_index {test_index}: anomaly regression".format(
            test_index=test_index)
        exit_code = 1
        continue
    print "Test test_index {test_index}: anomaly {anomaly}".format(
        test_index=test_index, anomaly=anomaly)
print "Exit code {code}".format(code=exit_code)
sys.exit(exit_code)