From 661035ac4ce6e51649f302fe2b7a8218257c0587 Mon Sep 17 00:00:00 2001 From: Vratko Polak Date: Wed, 10 Apr 2019 10:33:17 +0200 Subject: Increase reliability of per patch perf job CSIT-1485 - Measurement results are inconsistent, creating false positives. - To avoid false positives, sensitivity has to be lowered. + Measurements are no more interleaved. + Measurement result are no longer filtered. + The job now runs faster. - Ip6base regressions smaller than 4% are tolerated. + Ip6base false positives can happen, but less than 10% of the time. - Part of job parameters is set on ci-management level. + Voter script output is improved. Change-Id: I0ec7d5df8b397daf7ff0277a1137ee9f36d8d866 Signed-off-by: Vratko Polak --- resources/tools/scripts/compare_perpatch.py | 67 ++++++++++++++++++----------- 1 file changed, 41 insertions(+), 26 deletions(-) (limited to 'resources/tools/scripts') diff --git a/resources/tools/scripts/compare_perpatch.py b/resources/tools/scripts/compare_perpatch.py index 3bda338d49..9c9bd20023 100644 --- a/resources/tools/scripts/compare_perpatch.py +++ b/resources/tools/scripts/compare_perpatch.py @@ -32,6 +32,8 @@ from jumpavg.BitCountingClassifier import BitCountingClassifier def hack(value_list): """Return middle two quartiles, hoping to reduce influence of outliers. + Currently "middle two" is "all", but that can change in future. + :param value_list: List to pick subset from. :type value_list: list of float :returns: New list containing middle values. @@ -40,16 +42,16 @@ def hack(value_list): tmp = sorted(value_list) eight = len(tmp) / 8 ret = tmp[3*eight:-eight] - return ret + return tmp # ret iteration = -1 parent_iterations = list() -new_iterations = list() +current_iterations = list() num_tests = None while 1: iteration += 1 parent_lines = list() - new_lines = list() + current_lines = list() filename = "csit_parent/{iter}/results.txt".format(iter=iteration) try: with open(filename) as parent_file: @@ -58,9 +60,9 @@ while 1: break num_lines = len(parent_lines) filename = "csit_current/{iter}/results.txt".format(iter=iteration) - with open(filename) as new_file: - new_lines = new_file.readlines() - if num_lines != len(new_lines): + with open(filename) as current_file: + current_lines = current_file.readlines() + if num_lines != len(current_lines): print "Number of tests does not match within iteration", iteration sys.exit(1) if num_tests is None: @@ -69,37 +71,50 @@ while 1: print "Number of tests does not match previous at iteration", iteration sys.exit(1) parent_iterations.append(parent_lines) - new_iterations.append(new_lines) + current_iterations.append(current_lines) classifier = BitCountingClassifier() exit_code = 0 for test_index in range(num_tests): val_max = 1.0 parent_values = list() - new_values = list() + current_values = list() for iteration_index in range(len(parent_iterations)): parent_values.extend( json.loads(parent_iterations[iteration_index][test_index])) - new_values.extend( - json.loads(new_iterations[iteration_index][test_index])) - print "TRACE pre-hack parent: {p}".format(p=parent_values) - print "TRACE pre-hack current: {n}".format(n=new_values) + current_values.extend( + json.loads(current_iterations[iteration_index][test_index])) + print "Time-ordered MRR values for parent build: {p}".format( + p=parent_values) + print "Time-ordered MRR values for current build: {c}".format( + c=current_values) parent_values = hack(parent_values) - new_values = hack(new_values) + current_values = hack(current_values) parent_max = BitCountingMetadataFactory.find_max_value(parent_values) - new_max = BitCountingMetadataFactory.find_max_value(new_values) - val_max = max(val_max, parent_max, new_max) + current_max = BitCountingMetadataFactory.find_max_value(current_values) + val_max = max(val_max, parent_max, current_max) factory = BitCountingMetadataFactory(val_max) parent_stats = factory.from_data(parent_values) - new_factory = BitCountingMetadataFactory(val_max, parent_stats.avg) - new_stats = new_factory.from_data(new_values) - print "TRACE parent: {p}".format(p=parent_values) - print "TRACE current: {n}".format(n=new_values) - print "DEBUG parent: {p}".format(p=parent_stats) - print "DEBUG current: {n}".format(n=new_stats) - common_max = max(parent_stats.avg, new_stats.avg) - difference = (new_stats.avg - parent_stats.avg) / common_max - print "DEBUG difference: {d}%".format(d=100 * difference) - classified_list = classifier.classify([parent_stats, new_stats]) + current_factory = BitCountingMetadataFactory(val_max, parent_stats.avg) + current_stats = current_factory.from_data(current_values) + both_stats = factory.from_data(parent_values + current_values) + print "Value-ordered MRR values for parent build: {p}".format( + p=parent_values) + print "Value-ordered MRR values for current build: {c}".format( + c=current_values) + difference = (current_stats.avg - parent_stats.avg) / parent_stats.avg + print "Difference of averages relative to parent: {d}%".format( + d=100 * difference) + print "Jumpavg representation of parent group: {p}".format( + p=parent_stats) + print "Jumpavg representation of current group: {c}".format( + c=current_stats) + print "Jumpavg representation of both as one group: {b}".format( + b=both_stats) + bits = parent_stats.bits + current_stats.bits - both_stats.bits + compared = "longer" if bits >= 0 else "shorter" + print "Separate groups are {cmp} than single group by {bit} bits".format( + cmp=compared, bit=abs(bits)) + classified_list = classifier.classify([parent_stats, current_stats]) if len(classified_list) < 2: print "Test test_index {test_index}: normal (no anomaly)".format( test_index=test_index) @@ -112,5 +127,5 @@ for test_index in range(num_tests): continue print "Test test_index {test_index}: anomaly {anomaly}".format( test_index=test_index, anomaly=anomaly) -print "DEBUG exit code {code}".format(code=exit_code) +print "Exit code {code}".format(code=exit_code) sys.exit(exit_code) -- cgit 1.2.3-korg