diff options
author | Vratko Polak <vrpolak@cisco.com> | 2019-04-10 10:33:17 +0200 |
---|---|---|
committer | Vratko Polak <vrpolak@cisco.com> | 2019-07-22 15:41:59 +0200 |
commit | a8e84e5082d63c21fdfb691216ec54e549958840 (patch) | |
tree | fbcfce0a14a643af10888a503dbdf08b48c54bd4 | |
parent | b9b89800946b33a8bbdedf197a2105eed9735e00 (diff) |
Increase reliability of per patch perf job
CSIT-1485
- Measurement results are inconsistent, creating false positives.
- To avoid false positives, sensitivity has to be lowered.
+ Measurements are no more interleaved.
+ Measurement result are no longer filtered.
+ The job now runs faster.
- Ip6base regressions smaller than 4% are tolerated.
+ Ip6base false positives can happen, but less than 10% of the time.
- Part of job parameters is set on ci-management level.
+ Voter script output is improved.
Change-Id: I0ec7d5df8b397daf7ff0277a1137ee9f36d8d866
Signed-off-by: Vratko Polak <vrpolak@cisco.com>
-rw-r--r-- | resources/libraries/bash/entry/per_patch_perf.sh | 12 | ||||
-rw-r--r-- | resources/tools/scripts/compare_perpatch.py | 67 |
2 files changed, 49 insertions, 30 deletions
diff --git a/resources/libraries/bash/entry/per_patch_perf.sh b/resources/libraries/bash/entry/per_patch_perf.sh index d3aabfae17..82dadc2ed6 100644 --- a/resources/libraries/bash/entry/per_patch_perf.sh +++ b/resources/libraries/bash/entry/per_patch_perf.sh @@ -58,17 +58,21 @@ select_tags || die compose_pybot_arguments || die generate_tests || die archive_tests || die -iterations=8 +# Support for interleaved measurements is kept for future. +iterations=1 # 8 for ((iter=0; iter<iterations; iter++)); do - # TODO: Use less heavy way to avoid apt remove falilures. - # Also, reserve_testbed has already called cleanup once. - cleanup_topo + if ((iter)); then + # Reserve testbed has already cleaned it once, + # but we need to clean it explicitly on subsequent iterations. + cleanup_topo + fi select_build "build_parent" || die check_download_dir || die run_pybot || die copy_archives || die archive_parse_test_results "csit_parent/${iter}" || die die_on_pybot_error || die + # TODO: Use less heavy way to avoid apt remove failures. cleanup_topo select_build "build_current" || die check_download_dir || die diff --git a/resources/tools/scripts/compare_perpatch.py b/resources/tools/scripts/compare_perpatch.py index 3bda338d49..9c9bd20023 100644 --- a/resources/tools/scripts/compare_perpatch.py +++ b/resources/tools/scripts/compare_perpatch.py @@ -32,6 +32,8 @@ from jumpavg.BitCountingClassifier import BitCountingClassifier def hack(value_list): """Return middle two quartiles, hoping to reduce influence of outliers. + Currently "middle two" is "all", but that can change in future. + :param value_list: List to pick subset from. :type value_list: list of float :returns: New list containing middle values. @@ -40,16 +42,16 @@ def hack(value_list): tmp = sorted(value_list) eight = len(tmp) / 8 ret = tmp[3*eight:-eight] - return ret + return tmp # ret iteration = -1 parent_iterations = list() -new_iterations = list() +current_iterations = list() num_tests = None while 1: iteration += 1 parent_lines = list() - new_lines = list() + current_lines = list() filename = "csit_parent/{iter}/results.txt".format(iter=iteration) try: with open(filename) as parent_file: @@ -58,9 +60,9 @@ while 1: break num_lines = len(parent_lines) filename = "csit_current/{iter}/results.txt".format(iter=iteration) - with open(filename) as new_file: - new_lines = new_file.readlines() - if num_lines != len(new_lines): + with open(filename) as current_file: + current_lines = current_file.readlines() + if num_lines != len(current_lines): print "Number of tests does not match within iteration", iteration sys.exit(1) if num_tests is None: @@ -69,37 +71,50 @@ while 1: print "Number of tests does not match previous at iteration", iteration sys.exit(1) parent_iterations.append(parent_lines) - new_iterations.append(new_lines) + current_iterations.append(current_lines) classifier = BitCountingClassifier() exit_code = 0 for test_index in range(num_tests): val_max = 1.0 parent_values = list() - new_values = list() + current_values = list() for iteration_index in range(len(parent_iterations)): parent_values.extend( json.loads(parent_iterations[iteration_index][test_index])) - new_values.extend( - json.loads(new_iterations[iteration_index][test_index])) - print "TRACE pre-hack parent: {p}".format(p=parent_values) - print "TRACE pre-hack current: {n}".format(n=new_values) + current_values.extend( + json.loads(current_iterations[iteration_index][test_index])) + print "Time-ordered MRR values for parent build: {p}".format( + p=parent_values) + print "Time-ordered MRR values for current build: {c}".format( + c=current_values) parent_values = hack(parent_values) - new_values = hack(new_values) + current_values = hack(current_values) parent_max = BitCountingMetadataFactory.find_max_value(parent_values) - new_max = BitCountingMetadataFactory.find_max_value(new_values) - val_max = max(val_max, parent_max, new_max) + current_max = BitCountingMetadataFactory.find_max_value(current_values) + val_max = max(val_max, parent_max, current_max) factory = BitCountingMetadataFactory(val_max) parent_stats = factory.from_data(parent_values) - new_factory = BitCountingMetadataFactory(val_max, parent_stats.avg) - new_stats = new_factory.from_data(new_values) - print "TRACE parent: {p}".format(p=parent_values) - print "TRACE current: {n}".format(n=new_values) - print "DEBUG parent: {p}".format(p=parent_stats) - print "DEBUG current: {n}".format(n=new_stats) - common_max = max(parent_stats.avg, new_stats.avg) - difference = (new_stats.avg - parent_stats.avg) / common_max - print "DEBUG difference: {d}%".format(d=100 * difference) - classified_list = classifier.classify([parent_stats, new_stats]) + current_factory = BitCountingMetadataFactory(val_max, parent_stats.avg) + current_stats = current_factory.from_data(current_values) + both_stats = factory.from_data(parent_values + current_values) + print "Value-ordered MRR values for parent build: {p}".format( + p=parent_values) + print "Value-ordered MRR values for current build: {c}".format( + c=current_values) + difference = (current_stats.avg - parent_stats.avg) / parent_stats.avg + print "Difference of averages relative to parent: {d}%".format( + d=100 * difference) + print "Jumpavg representation of parent group: {p}".format( + p=parent_stats) + print "Jumpavg representation of current group: {c}".format( + c=current_stats) + print "Jumpavg representation of both as one group: {b}".format( + b=both_stats) + bits = parent_stats.bits + current_stats.bits - both_stats.bits + compared = "longer" if bits >= 0 else "shorter" + print "Separate groups are {cmp} than single group by {bit} bits".format( + cmp=compared, bit=abs(bits)) + classified_list = classifier.classify([parent_stats, current_stats]) if len(classified_list) < 2: print "Test test_index {test_index}: normal (no anomaly)".format( test_index=test_index) @@ -112,5 +127,5 @@ for test_index in range(num_tests): continue print "Test test_index {test_index}: anomaly {anomaly}".format( test_index=test_index, anomaly=anomaly) -print "DEBUG exit code {code}".format(code=exit_code) +print "Exit code {code}".format(code=exit_code) sys.exit(exit_code) |