diff options
author | Vratko Polak <vrpolak@cisco.com> | 2018-10-19 12:21:47 +0200 |
---|---|---|
committer | Maciek Konstantynowicz <mkonstan@cisco.com> | 2018-10-22 21:17:26 +0000 |
commit | d465d9fba33a323703a2bf40c499d74d0f017091 (patch) | |
tree | 3678767f68a7d9faf22a93f4be0d5f9b7b807869 | |
parent | 8e7582edf682a7ba7edcb5ec0a45d00e361ec868 (diff) |
Per patch: multiple BMRR calls
This is to reduce sensitivity on testbed unstable performance.
Also add topo_cleanupbefore every pybot run
to avoid issues with VPP uninstall.
TRACE prints are left there to simplify investigation
of false positives if/when they happen.
Change-Id: I9b0cdcfbbe4aa0735a0596746ac32c9e93af0897
Signed-off-by: Vratko Polak <vrpolak@cisco.com>
-rw-r--r-- | resources/libraries/bash/entry/per_patch_perf.sh | 34 | ||||
-rw-r--r-- | resources/libraries/bash/function/common.sh | 22 | ||||
-rw-r--r-- | resources/libraries/bash/function/per_patch.sh | 137 | ||||
-rw-r--r-- | resources/tools/scripts/compare_perpatch.py | 81 |
4 files changed, 167 insertions, 107 deletions
diff --git a/resources/libraries/bash/entry/per_patch_perf.sh b/resources/libraries/bash/entry/per_patch_perf.sh index 9fb266d106..abb1001652 100644 --- a/resources/libraries/bash/entry/per_patch_perf.sh +++ b/resources/libraries/bash/entry/per_patch_perf.sh @@ -24,7 +24,7 @@ set -exuo pipefail # + Everything needed to build VPP is already installed locally. # Consequences: # + At the end, VPP repo has parent commit checked out and built. -# + Directories build_root, dpdk and csit are reset during the run. +# + Directories build_root, build and csit are reset during the run. # + The following directories (relative to VPP repo) are (re)created: # ++ csit_new, csit_parent, build_new, build_parent, # ++ archive, csit/archive, csit_download_dir. @@ -44,9 +44,10 @@ set_perpatch_vpp_dir || die build_vpp_ubuntu_amd64 "NEW" || die prepare_build_parent || die build_vpp_ubuntu_amd64 "PARENT" || die -prepare_test_new || die +prepare_test || die ## Replace previous 4 lines with this to speed up testing. -#download_builds "REPLACE_WITH_URL" || die +#download_builds "https://jenkins.fd.io/sandbox/job/vpp-csit-verify-perf-master-2n-skx/2/artifact/*zip*/archive.zip" || die +initialize_csit_dirs || die get_test_tag_string || die get_test_code "${1-}" || die set_perpatch_dut || die @@ -55,15 +56,24 @@ activate_virtualenv "${VPP_DIR}" || die reserve_testbed || die select_tags || die compose_pybot_arguments || die -check_download_dir || die -run_pybot "10" || die -copy_archives || die -die_on_pybot_error || die -prepare_test_parent || die -check_download_dir || die -run_pybot "10" || die +iterations=8 +for ((iter=0; iter<iterations; iter++)); do + # TODO: Use less heavy way to avoid apt remove falilures. + cleanup_topo + select_build "build_parent" || die + check_download_dir || die + run_pybot || die + copy_archives || die + archive_parse_test_results "csit_parent/${iter}" || die + die_on_pybot_error || die + cleanup_topo + select_build "build_new" || die + check_download_dir || die + run_pybot || die + copy_archives || die + archive_parse_test_results "csit_new/${iter}" || die + die_on_pybot_error || die +done untrap_and_unreserve_testbed || die -copy_archives || die -die_on_pybot_error || die compare_test_results # The error code becomes this script's error code. # TODO: After merging, make sure archiving works as expected. diff --git a/resources/libraries/bash/function/common.sh b/resources/libraries/bash/function/common.sh index 8b83ab4e7e..69c70935c7 100644 --- a/resources/libraries/bash/function/common.sh +++ b/resources/libraries/bash/function/common.sh @@ -82,6 +82,20 @@ function check_download_dir () { } +function cleanup_topo () { + + set -exuo pipefail + + # Variables read: + # - WORKING_TOPOLOGY - Path to topology yaml file of the reserved testbed. + # - PYTHON_SCRIPTS_DIR - Path to directory holding the reservation script. + + python "${PYTHON_SCRIPTS_DIR}/topo_cleanup.py" -t "${WORKING_TOPOLOGY}" + # Not using "|| die" as some callers might want to ignore errors, + # e.g. in teardowns, such as unreserve. +} + + function common_dirs () { set -exuo pipefail @@ -178,7 +192,7 @@ function copy_archives () { # automatically archived to logs.fd.io. if [[ -n "${WORKSPACE-}" ]]; then mkdir -p "${WORKSPACE}/archives/" || die "Archives dir create failed." - cp -r "${ARCHIVE_DIR}"/* "${WORKSPACE}/archives" || die "Copy failed." + cp -rf "${ARCHIVE_DIR}"/* "${WORKSPACE}/archives" || die "Copy failed." fi } @@ -310,7 +324,7 @@ function reserve_testbed () { } die "Trap attempt failed, unreserve succeeded. Aborting." } - python "${PYTHON_SCRIPTS_DIR}/topo_cleanup.py" -t "${topo}" || { + cleanup_topo || { die "Testbed cleanup failed." } break @@ -630,9 +644,9 @@ function untrap_and_unreserve_testbed () { wt="${WORKING_TOPOLOGY}" # Just to avoid too long lines. if [[ -z "${wt-}" ]]; then set -eu - echo "Testbed looks unreserved already. Trap removal failed before?" + warn "Testbed looks unreserved already. Trap removal failed before?" else - python "${PYTHON_SCRIPTS_DIR}/topo_cleanup.py" -t "${wt}" || true + cleanup_topo || true python "${PYTHON_SCRIPTS_DIR}/topo_reservation.py" -c -t "${wt}" || { die "${1:-FAILED TO UNRESERVE, FIX MANUALLY.}" 2 } diff --git a/resources/libraries/bash/function/per_patch.sh b/resources/libraries/bash/function/per_patch.sh index 4353a5e1a5..244b6f448c 100644 --- a/resources/libraries/bash/function/per_patch.sh +++ b/resources/libraries/bash/function/per_patch.sh @@ -21,6 +21,35 @@ set -exuo pipefail # TODO: Add a link to bash style guide. +function archive_parse_test_results () { + + set -exuo pipefail + + # Arguments: + # - ${1}: Directory to archive to. Required. Parent has to exist. + # Variables read: + # - ARCHIVE_DIR - Path to where robot result files are created in. + # - VPP_DIR - Path to existing directory, root for to relative paths. + # Directories updated: + # - ${1} - Created, and robot and parsing files are moved/created there. + # Functions called: + # - die - Print to stderr and exit, defined in common.sh + # - parse_bmrr_results - See definition in this file. + + cd "${VPP_DIR}" || die "Change directory command failed." + target="$(readlink -f "$1")" + mkdir -p "${target}" || die "Directory creation failed." + for filename in "output.xml" "log.html" "report.html"; do + mv "${ARCHIVE_DIR}/${filename}" "${target}/${filename}" || { + die "Attempt to move '${filename}' failed." + } + done + parse_bmrr_results "${target}" || { + die "The function should have died on error." + } +} + + function build_vpp_ubuntu_amd64 () { set -exuo pipefail @@ -50,7 +79,7 @@ function build_vpp_ubuntu_amd64 () { # On the other hand, if parent uses different dpdk version, # The new vpp-ext-deps is built, but the old one is not removed # from the build directory if present. (Further functions move both, - # and during test dpkg decides on its own which version gets installed.) + # and during test dpkg would decide randomly which version gets installed.) # As per_patch is too dumb (yet) to detect any of that, # the only safe solution is to clean build directory and force rebuild. # TODO: Make this function smarter and skip DPDK rebuilds if possible. @@ -70,7 +99,6 @@ function build_vpp_ubuntu_amd64 () { mv -v "build/external/vpp-ext-deps"*".deb" "build-root"/ || { die "*.deb move failed." } - echo "*******************************************************************" echo "* VPP ${1-} BUILD SUCCESSFULLY COMPLETED" || { die "Argument not found." @@ -98,17 +126,6 @@ function compare_test_results () { # - 1 - If the comparison utility sees a regression (or data error). cd "${VPP_DIR}" || die "Change directory operation failed." - rm -rf "csit_parent" || die "Remove operation failed." - mkdir -p "csit_parent" || die "Directory creation failed." - for filename in "output.xml" "log.html" "report.html"; do - mv "${ARCHIVE_DIR}/${filename}" "csit_parent/${filename}" || { - die "Attempt to move '${filename}' failed." - } - done - parse_bmrr_results "csit_parent" || { - die "The function should have died on error." - } - # Reusing CSIT main virtualenv. pip install -r "${PYTHON_SCRIPTS_DIR}/perpatch_requirements.txt" || { die "Perpatch Python requirements installation failed." @@ -128,12 +145,10 @@ function download_builds () { # - ${1} - URL to download VPP builds from. # Variables read: # - VPP_DIR - Path to WORKSPACE, parent of created directories. - # - DOWNLOAD_DIR - Path to directory pybot takes the build to test from. # Directories created: # - archive - Ends up empty, not to be confused with ${ARCHIVE_DIR}. # - build_new - Holding built artifacts of the patch under test (PUT). # - built_parent - Holding built artifacts of parent of PUT. - # - csit_new - (Re)set to a symlink to archive robot results on failure. # Functions called: # - die - Print to stderr and exit, defined in common.sh @@ -145,11 +160,30 @@ function download_builds () { unzip "archive.zip" || die "Archive extraction failed." mv "archive/build_parent" ./ || die "Move operation failed." mv "archive/build_new" ./ || die "Move operation failed." - cp -r "build_new"/*".deb" "${DOWNLOAD_DIR}" || { - die "Copy operation failed." +} + + +function initialize_csit_dirs () { + + set -exuo pipefail + + # This could be in prepare_test, but download_builds also needs this. + # + # Variables read: + # - VPP_DIR - Path to WORKSPACE, parent of created directories. + # Directories created: + # - csit_new - Holding test results of the patch under test (PUT). + # - csit_parent - Holding test results of parent of PUT. + # Functions called: + # - die - Print to stderr and exit, defined in common.sh + + cd "${VPP_DIR}" || die "Change directory operation failed." + rm -rf "csit_new" "csit_parent" || { + die "Directory deletion failed." + } + mkdir -p "csit_new" "csit_parent" || { + die "Directory creation failed." } - # Create symlinks so that if job fails on robot, results can be archived. - ln -s "${ARCHIVE_DIR}" "csit_new" || die "Symbolic link creation failed." } @@ -172,11 +206,9 @@ function parse_bmrr_results () { rel_dir="$(readlink -e "${1}")" || die "Readlink failed." in_file="${rel_dir}/output.xml" out_file="${rel_dir}/results.txt" - # TODO: Do we need to check echo exit code explicitly? echo "Parsing ${in_file} putting results into ${out_file}" echo "TODO: Re-use parts of PAL when they support subsample test parsing." - pattern='Maximum Receive Rate trial results in packets' pattern+=' per second: .*\]</status>' grep -o "${pattern}" "${in_file}" | grep -o '\[.*\]' > "${out_file}" || { @@ -204,10 +236,9 @@ function prepare_build_parent () { mkdir -p "build_new" || die "Directory creation failed." mv "build-root"/*".deb" "build_new"/ || die "Move operation failed." # The previous build could have left some incompatible leftovers, - # e.g. DPDK artifacts of different version. - # "make -C dpdk clean" does not actually remove such .deb file. + # e.g. DPDK artifacts of different version (in build/external). # Also, there usually is a copy of dpdk artifact in build-root. - git clean -dffx "dpdk"/ "build-root"/ || die "Git clean operation failed." + git clean -dffx "build"/ "build-root"/ || die "Git clean operation failed." # Finally, check out the parent commit. git checkout HEAD~ || die "Git checkout operation failed." # Display any other leftovers. @@ -215,74 +246,48 @@ function prepare_build_parent () { } -function prepare_test_new () { +function prepare_test () { set -exuo pipefail # Variables read: # - VPP_DIR - Path to existing directory, parent of accessed directories. - # - DOWNLOAD_DIR - Path to directory where Robot takes builds to test from. - # - ARCHIVE_DIR - Path to where robot result files are created in. # Directories read: # - build-root - Existing directory with built VPP artifacts (also DPDK). # Directories updated: - # - build_parent - Old directory removed, build-root moved to become this. - # - ${DOWNLOAD_DIR} - Old content removed, files from build_new copied here. - # - csit_new - Currently a symlink to to archive robot results on failure. + # - build_parent - Old directory removed, build-root debs moved here. # Functions called: # - die - Print to stderr and exit, defined in common.sh - cd "${VPP_DIR}" || die "Change directory operationf failed." - rm -rf "build_parent" "csit_new" "${DOWNLOAD_DIR}"/* || die "Remove failed." + cd "${VPP_DIR}" || die "Change directory operation failed." + rm -rf "build_parent" || die "Remove failed." mkdir -p "build_parent" || die "Directory creation operation failed." mv "build-root"/*".deb" "build_parent"/ || die "Move operation failed." - cp "build_new"/*".deb" "${DOWNLOAD_DIR}" || die "Copy operation failed." - # Create symlinks so that if job fails on robot, results can be archived. - ln -s "${ARCHIVE_DIR}" "csit_new" || die "Symbolic link creation failed." } -function prepare_test_parent () { +function select_build () { set -exuo pipefail + # Arguments: + # - ${1} - Path to directory to copy VPP artifacts from. Required. # Variables read: - # - VPP_DIR - Path to existing directory, parent of accessed directories. - # - CSIT_DIR - Path to existing root of local CSIT git repository. - # - ARCHIVE_DIR and DOWNLOAD_DIR - Paths to directories to update. + # - DOWNLOAD_DIR - Path to directory where Robot takes builds to test from. + # - VPP_DIR - Path to existing directory, root for relative paths. # Directories read: - # - build_parent - Build artifacts (to test next) are copied from here. + # - ${1} - Existing directory with built new VPP artifacts (and DPDK). # Directories updated: - # - csit_new - Deleted, then recreated and latest robot results copied here. - # - ${CSIT_DIR} - Subjected to git reset and git clean. - # - ${ARCHIVE_DIR} - Created if not existing (if deleted by git clean). - # - ${DOWNLOAD_DIR} - Created after git clean, parent build copied here. - # - csit_parent - Currently a symlink to csit/ to archive robot results. + # - ${DOWNLOAD_DIR} - Old content removed, .deb files from ${1} copied here. # Functions called: # - die - Print to stderr and exit, defined in common.sh - # - parse_bmrr_results - See definition in this file. cd "${VPP_DIR}" || die "Change directory operation failed." - rm -rf "csit_new" "csit_parent" || die "Remove operation failed." - mkdir -p "csit_new" || die "Create directory operation failed." - for filename in "output.xml" "log.html" "report.html"; do - mv "${ARCHIVE_DIR}/${filename}" "csit_new/${filename}" || { - die "Move operation of '${filename}' failed." - } - done - parse_bmrr_results "csit_new" || { - die "The function should have died on error." - } - - pushd "${CSIT_DIR}" || die "Change directory operation failed." - git reset --hard HEAD || die "Git reset operation failed." - git clean -dffx || die "Git clean operation failed." - popd || die "Change directory operation failed." - mkdir -p "${ARCHIVE_DIR}" "${DOWNLOAD_DIR}" || die "Dir creation failed." - - cp "build_parent"/*".deb" "${DOWNLOAD_DIR}"/ || die "Copy failed." - # Create symlinks so that if job fails on robot, results can be archived. - ln -s "${ARCHIVE_DIR}" "csit_parent" || die "Symlink creation failed." + source_dir="$(readlink -e "$1")" + rm -rf "${DOWNLOAD_DIR}"/* || die "Cleanup of download dir failed." + cp "${source_dir}"/*".deb" "${DOWNLOAD_DIR}" || die "Copy operation failed." + # TODO: Is there a nice way to create symlinks, + # so that if job fails on robot, results can be archived? } diff --git a/resources/tools/scripts/compare_perpatch.py b/resources/tools/scripts/compare_perpatch.py index cc9ffd8992..1f8a1cf892 100644 --- a/resources/tools/scripts/compare_perpatch.py +++ b/resources/tools/scripts/compare_perpatch.py @@ -14,7 +14,7 @@ """Script for determining whether per-patch perf test votes -1. This script assumes there exist two text files with processed BMRR results, -located at hardcoded relative paths, having several lines +located at hardcoded relative paths (subdirs thereof), having several lines of json-parseable lists of float values, corresponding to testcase results. This script then uses jumpavg library to determine whether there was a regression, progression or no change for each testcase. @@ -38,32 +38,62 @@ def hack(value_list): :rtype: list of float """ tmp = sorted(value_list) - quarter = len(tmp) / 4 - ret = tmp[quarter:-quarter] + eight = len(tmp) / 8 + ret = tmp[3*eight:-eight] return ret -parent_lines = list() -new_lines = list() -with open("csit_parent/results.txt") as parent_file: - parent_lines = parent_file.readlines() -with open("csit_new/results.txt") as new_file: - new_lines = new_file.readlines() -if len(parent_lines) != len(new_lines): - print "Number of passed tests does not match!" - sys.exit(1) +iteration = -1 +parent_iterations = list() +new_iterations = list() +num_tests = None +while 1: + iteration += 1 + parent_lines = list() + new_lines = list() + filename = "csit_parent/{iter}/results.txt".format(iter=iteration) + try: + with open(filename) as parent_file: + parent_lines = parent_file.readlines() + except IOError: + break + num_lines = len(parent_lines) + filename = "csit_new/{iter}/results.txt".format(iter=iteration) + with open(filename) as new_file: + new_lines = new_file.readlines() + if num_lines != len(new_lines): + print "Number of tests does not match within iteration", iteration + sys.exit(1) + if num_tests is None: + num_tests = num_lines + elif num_tests != num_lines: + print "Number of tests does not match previous at iteration", iteration + sys.exit(1) + parent_iterations.append(parent_lines) + new_iterations.append(new_lines) classifier = BitCountingClassifier() -num_tests = len(parent_lines) exit_code = 0 -for index in range(num_tests): - parent_values = hack(json.loads(parent_lines[index])) - new_values = hack(json.loads(new_lines[index])) +for test_index in range(num_tests): + val_max = 1.0 + parent_values = list() + new_values = list() + for iteration_index in range(len(parent_iterations)): + parent_values.extend( + json.loads(parent_iterations[iteration_index][test_index])) + new_values.extend( + json.loads(new_iterations[iteration_index][test_index])) + print "TRACE pre-hack parent: {p}".format(p=parent_values) + print "TRACE pre-hack new: {n}".format(n=new_values) + parent_values = hack(parent_values) + new_values = hack(new_values) parent_max = BitCountingMetadataFactory.find_max_value(parent_values) new_max = BitCountingMetadataFactory.find_max_value(new_values) - cmax = max(parent_max, new_max) - factory = BitCountingMetadataFactory(cmax) + val_max = max(val_max, parent_max, new_max) + factory = BitCountingMetadataFactory(val_max) parent_stats = factory.from_data(parent_values) - factory = BitCountingMetadataFactory(cmax, parent_stats.avg) - new_stats = factory.from_data(new_values) + new_factory = BitCountingMetadataFactory(val_max, parent_stats.avg) + new_stats = new_factory.from_data(new_values) + print "TRACE parent: {p}".format(p=parent_values) + print "TRACE new: {n}".format(n=new_values) print "DEBUG parent: {p}".format(p=parent_stats) print "DEBUG new: {n}".format(n=new_stats) common_max = max(parent_stats.avg, new_stats.avg) @@ -71,15 +101,16 @@ for index in range(num_tests): print "DEBUG difference: {d}%".format(d=100 * difference) classified_list = classifier.classify([parent_stats, new_stats]) if len(classified_list) < 2: - print "Test index {index}: normal (no anomaly)".format( - index=index) + print "Test test_index {test_index}: normal (no anomaly)".format( + test_index=test_index) continue anomaly = classified_list[1].metadata.classification if anomaly == "regression": - print "Test index {index}: anomaly regression".format(index=index) + print "Test test_index {test_index}: anomaly regression".format( + test_index=test_index) exit_code = 1 continue - print "Test index {index}: anomaly {anomaly}".format( - index=index, anomaly=anomaly) + print "Test test_index {test_index}: anomaly {anomaly}".format( + test_index=test_index, anomaly=anomaly) print "DEBUG exit code {code}".format(code=exit_code) sys.exit(exit_code) |