aboutsummaryrefslogtreecommitdiffstats
path: root/resources/tools/scripts/compare_perpatch.py
blob: cc9ffd89923e7a5abd7e0fdd8afb45d397665889 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
# Copyright (c) 2018 Cisco and/or its affiliates.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at:
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Script for determining whether per-patch perf test votes -1.

This script assumes there exist two text files with processed BMRR results,
located at hardcoded relative paths, having several lines
of json-parseable lists of float values, corresponding to testcase results.
This script then uses jumpavg library to determine whether there was
a regression, progression or no change for each testcase.
If number of tests does not match, or there was a regression,
this script votes -1 (by exiting with code 1), otherwise it votes +1 (exit 0).
"""

import json
import sys

from jumpavg.BitCountingMetadataFactory import BitCountingMetadataFactory
from jumpavg.BitCountingClassifier import BitCountingClassifier


def hack(value_list):
    """Return middle two quartiles, hoping to reduce influence of outliers.

    :param value_list: List to pick subset from.
    :type value_list: list of float
    :returns: New list containing middle values.
    :rtype: list of float
    """
    tmp = sorted(value_list)
    quarter = len(tmp) / 4
    ret = tmp[quarter:-quarter]
    return ret

parent_lines = list()
new_lines = list()
with open("csit_parent/results.txt") as parent_file:
    parent_lines = parent_file.readlines()
with open("csit_new/results.txt") as new_file:
    new_lines = new_file.readlines()
if len(parent_lines) != len(new_lines):
    print "Number of passed tests does not match!"
    sys.exit(1)
classifier = BitCountingClassifier()
num_tests = len(parent_lines)
exit_code = 0
for index in range(num_tests):
    parent_values = hack(json.loads(parent_lines[index]))
    new_values = hack(json.loads(new_lines[index]))
    parent_max = BitCountingMetadataFactory.find_max_value(parent_values)
    new_max = BitCountingMetadataFactory.find_max_value(new_values)
    cmax = max(parent_max, new_max)
    factory = BitCountingMetadataFactory(cmax)
    parent_stats = factory.from_data(parent_values)
    factory = BitCountingMetadataFactory(cmax, parent_stats.avg)
    new_stats = factory.from_data(new_values)
    print "DEBUG parent: {p}".format(p=parent_stats)
    print "DEBUG new: {n}".format(n=new_stats)
    common_max = max(parent_stats.avg, new_stats.avg)
    difference = (new_stats.avg - parent_stats.avg) / common_max
    print "DEBUG difference: {d}%".format(d=100 * difference)
    classified_list = classifier.classify([parent_stats, new_stats])
    if len(classified_list) < 2:
        print "Test index {index}: normal (no anomaly)".format(
            index=index)
        continue
    anomaly = classified_list[1].metadata.classification
    if anomaly == "regression":
        print "Test index {index}: anomaly regression".format(index=index)
        exit_code = 1
        continue
    print "Test index {index}: anomaly {anomaly}".format(
        index=index, anomaly=anomaly)
print "DEBUG exit code {code}".format(code=exit_code)
sys.exit(exit_code)