From db24a2e63a447599b5125da4b6f93f0f9184bfcc Mon Sep 17 00:00:00 2001 From: Peter Mikus Date: Fri, 16 Aug 2019 06:47:53 +0000 Subject: [PATCH] FIX: Topology reservation Last attempt [0] is doing more harm then good. If testbed is down but recovered quickly enough (reboot, temporary ssh connectivity issue, ...) it never gets back into pool making other testbeds overloaded and queued. This patch is suppose to partially revert previous behavior until proper patch will follow. [0] https://gerrit.fd.io/r/c/csit/+/21148 Signed-off-by: Peter Mikus Change-Id: I8203946d10d3e7dd51e97519d679246b5dae59e3 --- resources/libraries/bash/function/common.sh | 35 ----------------------------- resources/tools/scripts/topo_reservation.py | 35 ++++++++++++++++------------- 2 files changed, 19 insertions(+), 51 deletions(-) diff --git a/resources/libraries/bash/function/common.sh b/resources/libraries/bash/function/common.sh index 4352724924..7a55d56871 100644 --- a/resources/libraries/bash/function/common.sh +++ b/resources/libraries/bash/function/common.sh @@ -556,36 +556,6 @@ function installed () { } -function remove_topo () { - - # Remove the argument from list of available topologies. - # - # Just a de-duplicated block of code - # - # Argument: - # - ${1} - The topology item to remove. Required. - # Variable read and re-written: - # - TOPOLOGIES - Array of paths to topologies, with failed cleanups removed. - - set -exuo pipefail - - warn "Testbed ${topo} seems unsuitable, removing from the list." - - # Build new topology array. - # TOPOLOGIES=("${TOPOLOGIES[@]/$topo}") - # does not really work, see: - # https://stackoverflow.com/questions/16860877/remove-an-element-from-a-bash-array - - new_topologies=() - for item in "${TOPOLOGIES[@]}"; do - if [[ "${item}" != "${1}" ]]; then - new_topologies+=("${item}") - fi - done - TOPOLOGIES=("${new_topologies[@]}") -} - - function reserve_and_cleanup_testbed () { # Reserve physical testbed, perform cleanup, register trap to unreserve. @@ -638,11 +608,6 @@ function reserve_and_cleanup_testbed () { fi warn "Testbed cleanup failed: ${topo}" untrap_and_unreserve_testbed "Fail of unreserve after cleanup." - # WORKING_TOPOLOGY is now empty again. - remove_topo "${topo}" - elif [[ "${result}" != "2" ]]; then - # 1 or unexpected return code, testbed is probably unusable. - remove_topo "${topo}" fi # Else testbed is accessible but currently reserved, moving on. done diff --git a/resources/tools/scripts/topo_reservation.py b/resources/tools/scripts/topo_reservation.py index 77d84efeba..e7e1ff6bab 100755 --- a/resources/tools/scripts/topo_reservation.py +++ b/resources/tools/scripts/topo_reservation.py @@ -28,6 +28,7 @@ from resources.libraries.python.ssh import exec_cmd RESERVATION_DIR = "/tmp/reservation_dir" +RESERVATION_NODE = "TG" def diag_cmd(node, cmd): @@ -38,9 +39,9 @@ def diag_cmd(node, cmd): :type ssh: dict :type cmd: str """ - print "+", cmd + print('+ {cmd}'.format(cmd=cmd)) _, stdout, _ = exec_cmd(node, cmd) - print stdout + print(stdout) def main(): @@ -88,37 +89,39 @@ def main(): # we are using it, because testing shows SSH access to DUT # during test affects its performance (bursts of lost packets). try: - tgn = topology["TG"] + node = topology[RESERVATION_NODE] except KeyError: - print "Topology file does not contain 'TG' node" + print("Topology file does not contain '{node}' node". + format(node=RESERVATION_NODE)) return 1 # For system reservation we use mkdir it is an atomic operation and we can # store additional data (time, client_ID, ..) within reservation directory. if args.cancel: - ret, _, err = exec_cmd(tgn, "rm -r {}".format(RESERVATION_DIR)) + ret, _, err = exec_cmd(node, "rm -r {dir}".format(dir=RESERVATION_DIR)) if ret: - print "Cancellation unsuccessful:\n{}".format(err) + print("Cancellation unsuccessful:\n{err}".format(err=err)) return ret # Before critical section, output can be outdated already. print("Diagnostic commands:") # -d and * are to supress "total ", see https://askubuntu.com/a/61190 - diag_cmd(tgn, "ls --full-time -cd '{dir}'/*".format(dir=RESERVATION_DIR)) - print("Attempting reservation.") + diag_cmd(node, "ls --full-time -cd '{dir}'/*".format(dir=RESERVATION_DIR)) + print("Attempting testbed reservation.") # Entering critical section. - ret, _, err = exec_cmd(tgn, "mkdir '{dir}'".format(dir=RESERVATION_DIR)) + ret, _, _ = exec_cmd(node, "mkdir '{dir}'".format(dir=RESERVATION_DIR)) # Critical section is over. if ret: - print("Already reserved by another job:\n{}".format(err)) + _, stdo, _ = exec_cmd(node, "ls '{dir}'/*".format(dir=RESERVATION_DIR)) + print("Testbed already reserved by:\n{stdo}".format(stdo=stdo)) return 2 # Here the script knows it is the only owner of the testbed. - print("Success, writing test run info to reservation dir.") - ret2, _, err = exec_cmd( - tgn, "touch '{dir}/{runtag}'"\ + print("Reservation success, writing additional info to reservation dir.") + ret, _, err = exec_cmd( + node, "touch '{dir}/{runtag}'"\ .format(dir=RESERVATION_DIR, runtag=args.runtag)) - if ret2: - print("Writing test run info failed, but continuing anyway:\n{}".format( - err)) + if ret: + print("Writing test run info failed, but continuing anyway:\n{err}". + format(err=err)) return 0 -- 2.16.6