aboutsummaryrefslogtreecommitdiffstats
path: root/resources/tools/scripts/topo_reservation.py
blob: 9f26677f427f9fa8dbc563c985ee5baf823552a6 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
#!/usr/bin/env python2

# Copyright (c) 2019 Cisco and/or its affiliates.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at:
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Script managing reservation and un-reservation of testbeds.

This script provides simple reservation mechanism to avoid
simultaneous use of nodes listed in topology file.
As source of truth, TG node from the topology file is used.
"""

import sys
import argparse
import yaml

from resources.libraries.python.ssh import exec_cmd


RESERVATION_DIR = "/tmp/reservation_dir"


def diag_cmd(node, cmd):
    """Execute cmd, print cmd and stdout, ignore stderr and rc; return None.

    :param node: Node object as parsed from topology file to execute cmd on.
    :param cmd: Command to execute.
    :type ssh: dict
    :type cmd: str
    """
    print "+", cmd
    _, stdout, _ = exec_cmd(node, cmd)
    print stdout


def main():
    """Parse arguments, perform the action, write useful output, propagate RC.

    If the intended action is cancellation, reservation dir is deleted.

    If the intended action is reservation, the list is longer:
    1. List contents of reservation dir.
    2. List contents of test.url file in the dir.
    3. Create reservation dir.
    4. Touch file according to -r option.
    5. Put -u option string to file test.url
    From these 5 steps, 1 and 2 are performed always, their RC ignored.
    RC of step 3 gives the overall result.
    If the result is success, steps 4-5 are executed without any output,
    their RC is ignored.

    The two files in reservation dir are there for reporting
    which test run holds the reservation, so people can manually fix the testbed
    if the rest run has been aborted, or otherwise failed to unregister.

    The two files have different audiences.

    The URL content is useful for people scheduling their test runs
    and wondering why the reservation takes so long.
    For them, a URL (if available) to copy and paste into browser
    to see which test runs are blocking testbeds is the most convenient.

    The "run tag" as a filename is useful for admins accessing the testbed
    via a graphical terminal, which does not allow copying of text,
    as they need less keypresses to identify the test run holding the testbed.
    Also, the listing shows timestamps, which is useful for both audiences.

    This all assumes the target system accepts ssh connections.
    If it does not, the caller probably wants to stop trying
    to reserve this system. Therefore this script can return 3 different codes.
    Return code 0 means the reservation was successful.
    Return code 1 means the system is inaccessible (or similarly unsuitable).
    Return code 2 means the system is accessible, but already reserved.
    The reason unsuitable systems return 1 is because that is also the value
    Python returns on encountering and unexcepted exception.
    """
    parser = argparse.ArgumentParser()
    parser.add_argument("-t", "--topo", required=True,
                        help="Topology file")
    parser.add_argument("-c", "--cancel", help="Cancel reservation",
                        action="store_true")
    parser.add_argument("-r", "--runtag", required=False, default="Unknown",
                        help="Identifier for test run suitable as filename")
    parser.add_argument("-u", "--url", required=False, default="Unknown",
                        help="Identifier for test run suitable as URL")
    args = parser.parse_args()

    with open(args.topo, "r") as topo_file:
        topology = yaml.load(topo_file.read())['nodes']

    # Even if TG is not guaranteed to be a Linux host,
    # we are using it, because testing shows SSH access to DUT
    # during test affects its performance (bursts of lost packets).
    try:
        tgn = topology["TG"]
    except KeyError:
        print "Topology file does not contain 'TG' node"
        return 1

    # For system reservation we use mkdir it is an atomic operation and we can
    # store additional data (time, client_ID, ..) within reservation directory.
    if args.cancel:
        ret, _, err = exec_cmd(tgn, "rm -r {}".format(RESERVATION_DIR))
        if ret:
            print "Cancellation unsuccessful:\n{}".format(err)
        return ret
    # Before critical section, output can be outdated already.
    print "Diagnostic commands:"
    # -d and * are to supress "total <size>", see https://askubuntu.com/a/61190
    diag_cmd(tgn, "ls --full-time -cd '{dir}'/*".format(dir=RESERVATION_DIR))
    diag_cmd(tgn, "head -1 '{dir}/run.url'".format(dir=RESERVATION_DIR))
    print "Attempting reservation."
    # Entering critical section.
    # TODO: Add optional argument to exec_cmd_no_error to make it
    # sys.exit(ret) instead raising? We do not want to deal with stacktrace.
    ret, _, err = exec_cmd(tgn, "mkdir '{dir}'".format(dir=RESERVATION_DIR))
    # Critical section is over.
    if ret:
        print "Already reserved by another job:\n{}".format(err)
        return 2
    # Here the script knows it is the only owner of the testbed.
    print "Success, writing test run info to reservation dir."
    # TODO: Add optional argument to exec_cmd_no_error to print message
    # to console instead raising? We do not want to deal with stacktrace.
    ret2, _, err = exec_cmd(
        tgn, "touch '{dir}/{runtag}' && ( echo '{url}' > '{dir}/run.url' )"\
        .format(dir=RESERVATION_DIR, runtag=args.runtag, url=args.url))
    if ret2:
        print "Writing test run info failed, but continuing anyway:\n{}".format(
            err)
    return 0


if __name__ == "__main__":
    sys.exit(main())