aboutsummaryrefslogtreecommitdiffstats
path: root/resources
diff options
context:
space:
mode:
authorVratko Polak <vrpolak@cisco.com>2019-08-08 15:11:38 +0200
committerVratko Polak <vrpolak@cisco.com>2019-08-09 14:36:09 +0000
commit6f1ff57c83763556450e8bfcb4571a9c017abe70 (patch)
treed4dd20a29b4f6a83700db36d6ef8737fb5795384 /resources
parent5b28bc8c52a5f7ef72f03d03806f61ba45ecc623 (diff)
Skip bad testbeds in reservation
Previously, ssh-inaccesible testbeds were handled the same way as reserved, never ending the wait loop. With this, if no testbed is accessible, the job fails early. Change-Id: I01bc79094fe0232a47d795e53e3daa52e8742bac Signed-off-by: Vratko Polak <vrpolak@cisco.com>
Diffstat (limited to 'resources')
-rw-r--r--resources/libraries/bash/function/common.sh61
-rwxr-xr-xresources/tools/scripts/topo_reservation.py15
2 files changed, 54 insertions, 22 deletions
diff --git a/resources/libraries/bash/function/common.sh b/resources/libraries/bash/function/common.sh
index a03e90422c..4a560ff98c 100644
--- a/resources/libraries/bash/function/common.sh
+++ b/resources/libraries/bash/function/common.sh
@@ -553,6 +553,36 @@ function installed () {
}
+function remove_topo () {
+
+ # Remove the argument from list of available topologies.
+ #
+ # Just a de-duplicated block of code
+ #
+ # Argument:
+ # - ${1} - The topology item to remove. Required.
+ # Variable read and re-written:
+ # - TOPOLOGIES - Array of paths to topologies, with failed cleanups removed.
+
+ set -exuo pipefail
+
+ warn "Testbed ${topo} seems unsuitable, removing from the list."
+
+ # Build new topology array.
+ # TOPOLOGIES=("${TOPOLOGIES[@]/$topo}")
+ # does not really work, see:
+ # https://stackoverflow.com/questions/16860877/remove-an-element-from-a-bash-array
+
+ new_topologies=()
+ for item in "${TOPOLOGIES[@]}"; do
+ if [[ "${item}" != "${1}" ]]; then
+ new_topologies+=("${item}")
+ fi
+ done
+ TOPOLOGIES=("${new_topologies[@]}")
+}
+
+
function reserve_and_cleanup_testbed () {
# Reserve physical testbed, perform cleanup, register trap to unreserve.
@@ -576,7 +606,7 @@ function reserve_and_cleanup_testbed () {
set -exuo pipefail
- while [[ ${TOPOLOGIES[@]} ]]; do
+ while true; do
for topo in "${TOPOLOGIES[@]}"; do
set +e
scrpt="${PYTHON_SCRIPTS_DIR}/topo_reservation.py"
@@ -609,38 +639,31 @@ function reserve_and_cleanup_testbed () {
warn "Testbed cleanup failed: ${topo}"
untrap_and_unreserve_testbed "Fail of unreserve after cleanup."
# WORKING_TOPOLOGY is now empty again.
- # Build new topology array.
- # TOPOLOGIES=("${TOPOLOGIES[@]/$topo}")
- # does not really work, see:
- # https://stackoverflow.com/questions/16860877/remove-an-element-from-a-bash-array
- new_topologies=()
- for item in "${TOPOLOGIES[@]}"; do
- if [[ "${item}" != "${topo}" ]]; then
- new_topologies+=("${item}")
- fi
- done
- TOPOLOGIES=("${new_topologies[@]}")
- break
+ remove_topo "${topo}"
+ elif [[ "${result}" != "2" ]]; then
+ # 1 or unexpected return code, testbed is probably unusable.
+ remove_topo "${topo}"
fi
+ # Else testbed is accessible but currently reserved, moving on.
done
if [[ -n "${WORKING_TOPOLOGY-}" ]]; then
# Exit the infinite while loop if we made a reservation.
+ warn "Reservation and cleanup successful."
break
fi
+ if [[ "${#TOPOLOGIES[@]}" == "0" ]]; then
+ die "Run out of operational testbeds!"
+ fi
+
# Wait ~3minutes before next try.
- sleep_time="$[ ( $RANDOM % 20 ) + 180 ]s" || {
+ sleep_time="$[ ( ${RANDOM} % 20 ) + 180 ]s" || {
die "Sleep time calculation failed."
}
echo "Sleeping ${sleep_time}"
sleep "${sleep_time}" || die "Sleep failed."
done
- if [[ ${TOPOLOGIES[@]} ]]; then
- echo "Reservation and cleanup successful."
- else
- die "Run out of operational testbeds!"
- fi
}
diff --git a/resources/tools/scripts/topo_reservation.py b/resources/tools/scripts/topo_reservation.py
index a04709b2d5..9f26677f42 100755
--- a/resources/tools/scripts/topo_reservation.py
+++ b/resources/tools/scripts/topo_reservation.py
@@ -74,6 +74,15 @@ def main():
via a graphical terminal, which does not allow copying of text,
as they need less keypresses to identify the test run holding the testbed.
Also, the listing shows timestamps, which is useful for both audiences.
+
+ This all assumes the target system accepts ssh connections.
+ If it does not, the caller probably wants to stop trying
+ to reserve this system. Therefore this script can return 3 different codes.
+ Return code 0 means the reservation was successful.
+ Return code 1 means the system is inaccessible (or similarly unsuitable).
+ Return code 2 means the system is accessible, but already reserved.
+ The reason unsuitable systems return 1 is because that is also the value
+ Python returns on encountering and unexcepted exception.
"""
parser = argparse.ArgumentParser()
parser.add_argument("-t", "--topo", required=True,
@@ -117,8 +126,8 @@ def main():
ret, _, err = exec_cmd(tgn, "mkdir '{dir}'".format(dir=RESERVATION_DIR))
# Critical section is over.
if ret:
- print "Reservation unsuccessful:\n{}".format(err)
- return ret
+ print "Already reserved by another job:\n{}".format(err)
+ return 2
# Here the script knows it is the only owner of the testbed.
print "Success, writing test run info to reservation dir."
# TODO: Add optional argument to exec_cmd_no_error to print message
@@ -129,7 +138,7 @@ def main():
if ret2:
print "Writing test run info failed, but continuing anyway:\n{}".format(
err)
- return ret
+ return 0
if __name__ == "__main__":