aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVratko Polak <vrpolak@cisco.com>2019-04-10 10:33:17 +0200
committerVratko Polak <vrpolak@cisco.com>2019-07-22 15:41:59 +0200
commita8e84e5082d63c21fdfb691216ec54e549958840 (patch)
treefbcfce0a14a643af10888a503dbdf08b48c54bd4
parentb9b89800946b33a8bbdedf197a2105eed9735e00 (diff)
Increase reliability of per patch perf job
CSIT-1485 - Measurement results are inconsistent, creating false positives. - To avoid false positives, sensitivity has to be lowered. + Measurements are no more interleaved. + Measurement result are no longer filtered. + The job now runs faster. - Ip6base regressions smaller than 4% are tolerated. + Ip6base false positives can happen, but less than 10% of the time. - Part of job parameters is set on ci-management level. + Voter script output is improved. Change-Id: I0ec7d5df8b397daf7ff0277a1137ee9f36d8d866 Signed-off-by: Vratko Polak <vrpolak@cisco.com>
-rw-r--r--resources/libraries/bash/entry/per_patch_perf.sh12
-rw-r--r--resources/tools/scripts/compare_perpatch.py67
2 files changed, 49 insertions, 30 deletions
diff --git a/resources/libraries/bash/entry/per_patch_perf.sh b/resources/libraries/bash/entry/per_patch_perf.sh
index d3aabfae17..82dadc2ed6 100644
--- a/resources/libraries/bash/entry/per_patch_perf.sh
+++ b/resources/libraries/bash/entry/per_patch_perf.sh
@@ -58,17 +58,21 @@ select_tags || die
compose_pybot_arguments || die
generate_tests || die
archive_tests || die
-iterations=8
+# Support for interleaved measurements is kept for future.
+iterations=1 # 8
for ((iter=0; iter<iterations; iter++)); do
- # TODO: Use less heavy way to avoid apt remove falilures.
- # Also, reserve_testbed has already called cleanup once.
- cleanup_topo
+ if ((iter)); then
+ # Reserve testbed has already cleaned it once,
+ # but we need to clean it explicitly on subsequent iterations.
+ cleanup_topo
+ fi
select_build "build_parent" || die
check_download_dir || die
run_pybot || die
copy_archives || die
archive_parse_test_results "csit_parent/${iter}" || die
die_on_pybot_error || die
+ # TODO: Use less heavy way to avoid apt remove failures.
cleanup_topo
select_build "build_current" || die
check_download_dir || die
diff --git a/resources/tools/scripts/compare_perpatch.py b/resources/tools/scripts/compare_perpatch.py
index 3bda338d49..9c9bd20023 100644
--- a/resources/tools/scripts/compare_perpatch.py
+++ b/resources/tools/scripts/compare_perpatch.py
@@ -32,6 +32,8 @@ from jumpavg.BitCountingClassifier import BitCountingClassifier
def hack(value_list):
"""Return middle two quartiles, hoping to reduce influence of outliers.
+ Currently "middle two" is "all", but that can change in future.
+
:param value_list: List to pick subset from.
:type value_list: list of float
:returns: New list containing middle values.
@@ -40,16 +42,16 @@ def hack(value_list):
tmp = sorted(value_list)
eight = len(tmp) / 8
ret = tmp[3*eight:-eight]
- return ret
+ return tmp # ret
iteration = -1
parent_iterations = list()
-new_iterations = list()
+current_iterations = list()
num_tests = None
while 1:
iteration += 1
parent_lines = list()
- new_lines = list()
+ current_lines = list()
filename = "csit_parent/{iter}/results.txt".format(iter=iteration)
try:
with open(filename) as parent_file:
@@ -58,9 +60,9 @@ while 1:
break
num_lines = len(parent_lines)
filename = "csit_current/{iter}/results.txt".format(iter=iteration)
- with open(filename) as new_file:
- new_lines = new_file.readlines()
- if num_lines != len(new_lines):
+ with open(filename) as current_file:
+ current_lines = current_file.readlines()
+ if num_lines != len(current_lines):
print "Number of tests does not match within iteration", iteration
sys.exit(1)
if num_tests is None:
@@ -69,37 +71,50 @@ while 1:
print "Number of tests does not match previous at iteration", iteration
sys.exit(1)
parent_iterations.append(parent_lines)
- new_iterations.append(new_lines)
+ current_iterations.append(current_lines)
classifier = BitCountingClassifier()
exit_code = 0
for test_index in range(num_tests):
val_max = 1.0
parent_values = list()
- new_values = list()
+ current_values = list()
for iteration_index in range(len(parent_iterations)):
parent_values.extend(
json.loads(parent_iterations[iteration_index][test_index]))
- new_values.extend(
- json.loads(new_iterations[iteration_index][test_index]))
- print "TRACE pre-hack parent: {p}".format(p=parent_values)
- print "TRACE pre-hack current: {n}".format(n=new_values)
+ current_values.extend(
+ json.loads(current_iterations[iteration_index][test_index]))
+ print "Time-ordered MRR values for parent build: {p}".format(
+ p=parent_values)
+ print "Time-ordered MRR values for current build: {c}".format(
+ c=current_values)
parent_values = hack(parent_values)
- new_values = hack(new_values)
+ current_values = hack(current_values)
parent_max = BitCountingMetadataFactory.find_max_value(parent_values)
- new_max = BitCountingMetadataFactory.find_max_value(new_values)
- val_max = max(val_max, parent_max, new_max)
+ current_max = BitCountingMetadataFactory.find_max_value(current_values)
+ val_max = max(val_max, parent_max, current_max)
factory = BitCountingMetadataFactory(val_max)
parent_stats = factory.from_data(parent_values)
- new_factory = BitCountingMetadataFactory(val_max, parent_stats.avg)
- new_stats = new_factory.from_data(new_values)
- print "TRACE parent: {p}".format(p=parent_values)
- print "TRACE current: {n}".format(n=new_values)
- print "DEBUG parent: {p}".format(p=parent_stats)
- print "DEBUG current: {n}".format(n=new_stats)
- common_max = max(parent_stats.avg, new_stats.avg)
- difference = (new_stats.avg - parent_stats.avg) / common_max
- print "DEBUG difference: {d}%".format(d=100 * difference)
- classified_list = classifier.classify([parent_stats, new_stats])
+ current_factory = BitCountingMetadataFactory(val_max, parent_stats.avg)
+ current_stats = current_factory.from_data(current_values)
+ both_stats = factory.from_data(parent_values + current_values)
+ print "Value-ordered MRR values for parent build: {p}".format(
+ p=parent_values)
+ print "Value-ordered MRR values for current build: {c}".format(
+ c=current_values)
+ difference = (current_stats.avg - parent_stats.avg) / parent_stats.avg
+ print "Difference of averages relative to parent: {d}%".format(
+ d=100 * difference)
+ print "Jumpavg representation of parent group: {p}".format(
+ p=parent_stats)
+ print "Jumpavg representation of current group: {c}".format(
+ c=current_stats)
+ print "Jumpavg representation of both as one group: {b}".format(
+ b=both_stats)
+ bits = parent_stats.bits + current_stats.bits - both_stats.bits
+ compared = "longer" if bits >= 0 else "shorter"
+ print "Separate groups are {cmp} than single group by {bit} bits".format(
+ cmp=compared, bit=abs(bits))
+ classified_list = classifier.classify([parent_stats, current_stats])
if len(classified_list) < 2:
print "Test test_index {test_index}: normal (no anomaly)".format(
test_index=test_index)
@@ -112,5 +127,5 @@ for test_index in range(num_tests):
continue
print "Test test_index {test_index}: anomaly {anomaly}".format(
test_index=test_index, anomaly=anomaly)
-print "DEBUG exit code {code}".format(code=exit_code)
+print "Exit code {code}".format(code=exit_code)
sys.exit(exit_code)