diff options
Diffstat (limited to 'resources/libraries/bash')
-rw-r--r-- | resources/libraries/bash/entry/bootstrap_verify_perf.sh | 2 | ||||
-rw-r--r-- | resources/libraries/bash/entry/per_patch_perf.sh | 4 | ||||
-rw-r--r-- | resources/libraries/bash/function/common.sh | 36 |
3 files changed, 34 insertions, 8 deletions
diff --git a/resources/libraries/bash/entry/bootstrap_verify_perf.sh b/resources/libraries/bash/entry/bootstrap_verify_perf.sh index 0b46ff1e57..b9299b1422 100644 --- a/resources/libraries/bash/entry/bootstrap_verify_perf.sh +++ b/resources/libraries/bash/entry/bootstrap_verify_perf.sh @@ -42,7 +42,7 @@ check_download_dir || die activate_virtualenv || die generate_tests || die archive_tests || die -reserve_testbed || die +reserve_and_cleanup_testbed || die ansible_hosts "calibration" || die select_tags || die compose_pybot_arguments || die diff --git a/resources/libraries/bash/entry/per_patch_perf.sh b/resources/libraries/bash/entry/per_patch_perf.sh index b5e7bb9117..195d1a70e0 100644 --- a/resources/libraries/bash/entry/per_patch_perf.sh +++ b/resources/libraries/bash/entry/per_patch_perf.sh @@ -56,14 +56,14 @@ select_os || die activate_virtualenv "${VPP_DIR}" || die generate_tests || die archive_tests || die -reserve_testbed || die +reserve_and_cleanup_testbed || die select_tags || die compose_pybot_arguments || die # Support for interleaved measurements is kept for future. iterations=1 # 8 for ((iter=0; iter<iterations; iter++)); do if ((iter)); then - # Reserve testbed has already cleaned it once, + # Function reserve_and_cleanup_testbed has already cleaned it once, # but we need to clean it explicitly on subsequent iterations. cleanup_topo fi diff --git a/resources/libraries/bash/function/common.sh b/resources/libraries/bash/function/common.sh index 810500eb6b..6d078e5a8b 100644 --- a/resources/libraries/bash/function/common.sh +++ b/resources/libraries/bash/function/common.sh @@ -504,29 +504,35 @@ function get_test_tag_string () { } -function reserve_testbed () { +function reserve_and_cleanup_testbed () { set -exuo pipefail # Reserve physical testbed, perform cleanup, register trap to unreserve. + # When cleanup fails, remove from topologies and keep retrying + # until all topologies are removed. # # Variables read: # - TOPOLOGIES - Array of paths to topology yaml to attempt reservation on. # - PYTHON_SCRIPTS_DIR - Path to directory holding the reservation script. # Variables set: + # - TOPOLOGIES - Array of paths to topologies, with failed cleanups removed. # - WORKING_TOPOLOGY - Path to topology yaml file of the reserved testbed. # Functions called: # - die - Print to stderr and exit. # Traps registered: # - EXIT - Calls cancel_all for ${WORKING_TOPOLOGY}. - while true; do + while [[ ${TOPOLOGIES[@]} ]]; do for topo in "${TOPOLOGIES[@]}"; do set +e python "${PYTHON_SCRIPTS_DIR}/topo_reservation.py" -t "${topo}" result="$?" set -e if [[ "${result}" == "0" ]]; then + # Trap unreservation before cleanup check, + # so multiple jobs showing failed cleanup improve chances + # of humans to notice and fix. WORKING_TOPOLOGY="${topo}" echo "Reserved: ${WORKING_TOPOLOGY}" trap "untrap_and_unreserve_testbed" EXIT || { @@ -536,9 +542,28 @@ function reserve_testbed () { } die "Trap attempt failed, unreserve succeeded. Aborting." } - cleanup_topo || { - die "Testbed cleanup failed." - } + # Cleanup check. + set +e + cleanup_topo + result="$?" + set -e + if [[ "${result}" == "0" ]]; then + break + fi + warn "Testbed cleanup failed: ${topo}" + untrap_and_unreserve_testbed "Fail of unreserve after cleanup." + # WORKING_TOPOLOGY is now empty again. + # Build new topology array. + # TOPOLOGIES=("${TOPOLOGIES[@]/$topo}") + # does not really work, see: + # https://stackoverflow.com/questions/16860877/remove-an-element-from-a-bash-array + new_topologies=() + for item in "${TOPOLOGIES[@]}"; do + if [[ "${item}" != "${topo}" ]]; then + new_topologies+=("${item}") + fi + done + TOPOLOGIES=("${new_topologies[@]}") break fi done @@ -555,6 +580,7 @@ function reserve_testbed () { echo "Sleeping ${sleep_time}" sleep "${sleep_time}" || die "Sleep failed." done + die "Run out of operational testbeds!" } |