diff options
author | Peter Mikus <pmikus@cisco.com> | 2019-08-16 06:47:53 +0000 |
---|---|---|
committer | Peter Mikus <pmikus@cisco.com> | 2019-08-19 05:22:03 +0000 |
commit | db24a2e63a447599b5125da4b6f93f0f9184bfcc (patch) | |
tree | 6b07fb0413e9b9bc898341669dabb73f860c6958 /resources | |
parent | bbcacad812c4e4144b97ec480e7db428b248ab22 (diff) |
FIX: Topology reservation
Last attempt [0] is doing more harm then good. If testbed is down
but recovered quickly enough (reboot, temporary ssh connectivity
issue, ...) it never gets back into pool making other testbeds
overloaded and queued.
This patch is suppose to partially revert previous behavior until
proper patch will follow.
[0] https://gerrit.fd.io/r/c/csit/+/21148
Signed-off-by: Peter Mikus <pmikus@cisco.com>
Change-Id: I8203946d10d3e7dd51e97519d679246b5dae59e3
Diffstat (limited to 'resources')
-rw-r--r-- | resources/libraries/bash/function/common.sh | 35 | ||||
-rwxr-xr-x | resources/tools/scripts/topo_reservation.py | 35 |
2 files changed, 19 insertions, 51 deletions
diff --git a/resources/libraries/bash/function/common.sh b/resources/libraries/bash/function/common.sh index 4352724924..7a55d56871 100644 --- a/resources/libraries/bash/function/common.sh +++ b/resources/libraries/bash/function/common.sh @@ -556,36 +556,6 @@ function installed () { } -function remove_topo () { - - # Remove the argument from list of available topologies. - # - # Just a de-duplicated block of code - # - # Argument: - # - ${1} - The topology item to remove. Required. - # Variable read and re-written: - # - TOPOLOGIES - Array of paths to topologies, with failed cleanups removed. - - set -exuo pipefail - - warn "Testbed ${topo} seems unsuitable, removing from the list." - - # Build new topology array. - # TOPOLOGIES=("${TOPOLOGIES[@]/$topo}") - # does not really work, see: - # https://stackoverflow.com/questions/16860877/remove-an-element-from-a-bash-array - - new_topologies=() - for item in "${TOPOLOGIES[@]}"; do - if [[ "${item}" != "${1}" ]]; then - new_topologies+=("${item}") - fi - done - TOPOLOGIES=("${new_topologies[@]}") -} - - function reserve_and_cleanup_testbed () { # Reserve physical testbed, perform cleanup, register trap to unreserve. @@ -638,11 +608,6 @@ function reserve_and_cleanup_testbed () { fi warn "Testbed cleanup failed: ${topo}" untrap_and_unreserve_testbed "Fail of unreserve after cleanup." - # WORKING_TOPOLOGY is now empty again. - remove_topo "${topo}" - elif [[ "${result}" != "2" ]]; then - # 1 or unexpected return code, testbed is probably unusable. - remove_topo "${topo}" fi # Else testbed is accessible but currently reserved, moving on. done diff --git a/resources/tools/scripts/topo_reservation.py b/resources/tools/scripts/topo_reservation.py index 77d84efeba..e7e1ff6bab 100755 --- a/resources/tools/scripts/topo_reservation.py +++ b/resources/tools/scripts/topo_reservation.py @@ -28,6 +28,7 @@ from resources.libraries.python.ssh import exec_cmd RESERVATION_DIR = "/tmp/reservation_dir" +RESERVATION_NODE = "TG" def diag_cmd(node, cmd): @@ -38,9 +39,9 @@ def diag_cmd(node, cmd): :type ssh: dict :type cmd: str """ - print "+", cmd + print('+ {cmd}'.format(cmd=cmd)) _, stdout, _ = exec_cmd(node, cmd) - print stdout + print(stdout) def main(): @@ -88,37 +89,39 @@ def main(): # we are using it, because testing shows SSH access to DUT # during test affects its performance (bursts of lost packets). try: - tgn = topology["TG"] + node = topology[RESERVATION_NODE] except KeyError: - print "Topology file does not contain 'TG' node" + print("Topology file does not contain '{node}' node". + format(node=RESERVATION_NODE)) return 1 # For system reservation we use mkdir it is an atomic operation and we can # store additional data (time, client_ID, ..) within reservation directory. if args.cancel: - ret, _, err = exec_cmd(tgn, "rm -r {}".format(RESERVATION_DIR)) + ret, _, err = exec_cmd(node, "rm -r {dir}".format(dir=RESERVATION_DIR)) if ret: - print "Cancellation unsuccessful:\n{}".format(err) + print("Cancellation unsuccessful:\n{err}".format(err=err)) return ret # Before critical section, output can be outdated already. print("Diagnostic commands:") # -d and * are to supress "total <size>", see https://askubuntu.com/a/61190 - diag_cmd(tgn, "ls --full-time -cd '{dir}'/*".format(dir=RESERVATION_DIR)) - print("Attempting reservation.") + diag_cmd(node, "ls --full-time -cd '{dir}'/*".format(dir=RESERVATION_DIR)) + print("Attempting testbed reservation.") # Entering critical section. - ret, _, err = exec_cmd(tgn, "mkdir '{dir}'".format(dir=RESERVATION_DIR)) + ret, _, _ = exec_cmd(node, "mkdir '{dir}'".format(dir=RESERVATION_DIR)) # Critical section is over. if ret: - print("Already reserved by another job:\n{}".format(err)) + _, stdo, _ = exec_cmd(node, "ls '{dir}'/*".format(dir=RESERVATION_DIR)) + print("Testbed already reserved by:\n{stdo}".format(stdo=stdo)) return 2 # Here the script knows it is the only owner of the testbed. - print("Success, writing test run info to reservation dir.") - ret2, _, err = exec_cmd( - tgn, "touch '{dir}/{runtag}'"\ + print("Reservation success, writing additional info to reservation dir.") + ret, _, err = exec_cmd( + node, "touch '{dir}/{runtag}'"\ .format(dir=RESERVATION_DIR, runtag=args.runtag)) - if ret2: - print("Writing test run info failed, but continuing anyway:\n{}".format( - err)) + if ret: + print("Writing test run info failed, but continuing anyway:\n{err}". + format(err=err)) return 0 |