Fix start-testcase script on VIRL
[csit.git] / resources / tools / virl / bin / start-testcase
1 #!/usr/bin/python
2
3 # Copyright (c) 2016 Cisco and/or its affiliates.
4 # Licensed under the Apache License, Version 2.0 (the "License");
5 # you may not use this file except in compliance with the License.
6 # You may obtain a copy of the License at:
7 #
8 #     http://www.apache.org/licenses/LICENSE-2.0
9 #
10 # Unless required by applicable law or agreed to in writing, software
11 # distributed under the License is distributed on an "AS IS" BASIS,
12 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 # See the License for the specific language governing permissions and
14 # limitations under the License.
15
16 """This script is handling starting of VIRL simulations."""
17
18 __author__ = 'ckoester@cisco.com'
19
20 import sys
21 import re
22 import os
23 import argparse
24 import tempfile
25 import shutil
26 import time
27 import paramiko
28 import netifaces
29
30 import requests
31
32 def indent(lines, amount, fillchar=' '):
33     """Indent the string by amount of fill chars.
34
35     :param lines: String to indent.
36     :param amount: Number of fill chars.
37     :param fillchar: Filling character.
38     :type lines: str
39     :type amount: int
40     :type fillchar: str
41     :returns: Indented string.
42     :rtype: str
43     """
44     padding = amount * fillchar
45     return padding + ('\n'+padding).join(lines.split('\n'))
46
47 def print_to_stderr(msg, end='\n'):
48     """Writes any text to stderr.
49
50     :param msg: Message to print.
51     :param end: By default print new line at the end.
52     :type msg: str
53     :type end: str
54     """
55     try:
56         sys.stderr.write(str(msg) + end)
57     except ValueError:
58         pass
59
60 #
61 # FIXME: Right now, this is really coded like a shell script, as one big
62 # function executed in sequence. This should be broken down into multiple
63 # functions.
64 #
65 def main():
66     """ Main function."""
67     #
68     # Get our default interface IP address. This will become the default
69     # value for the "NFS Server IP" option.
70     #
71     gws = netifaces.gateways()
72     addrs = netifaces.ifaddresses(gws['default'][netifaces.AF_INET][1])
73     default_addr = addrs[netifaces.AF_INET][0]['addr']
74
75     #
76     # Verify CLI parameters and try to download our VPP image into a temporary
77     # file first
78     #
79     parser = argparse.ArgumentParser()
80     parser.add_argument("topology", help="the base topology to be started")
81     parser.add_argument("packages", help="Path to the VPP .deb(s) or .rpm(s) " +
82                         "that is/are to be installed", nargs='+')
83     parser.add_argument("-c", "--copy", help="Copy the VPP packages, " +
84                         "leaving the originals in place. Default is to " +
85                         "move them.", action='store_true')
86     parser.add_argument("-k", "--keep", help="Keep (do not delete) the " +
87                         "simulation in case of error", action='store_true')
88     parser.add_argument("-v", "--verbosity", action="count", default=0)
89     parser.add_argument("-nip", "--nfs-server-ip", help="NFS server (our) IP " +
90                         "default is derived from routing table: " +
91                         "{}".format(default_addr), default=default_addr)
92     parser.add_argument("-ns", "--nfs-scratch-directory",
93                         help="Server location for NFS scratch directory",
94                         default="/nfs/scratch")
95     parser.add_argument("-nc", "--nfs-common-directory",
96                         help="Server location for NFS common (read-only) " +
97                         "directory", default="/nfs/common")
98     parser.add_argument("-wc", "--wait-count",
99                         help="number of intervals to wait for simulation to " +
100                         "be ready", type=int, default=48)
101     parser.add_argument("-wt", "--wait-time",
102                         help="length of a single interval to wait for " +
103                         "simulation to be ready", type=int, default=5)
104     parser.add_argument("-vip", "--virl-ip",
105                         help="VIRL IP and Port (e.g. 127.0.0.1:19399)",
106                         default="127.0.0.1:19399")
107     parser.add_argument("-u", "--username", help="VIRL username",
108                         default="tb4-virl")
109     parser.add_argument("-au", "--admin-username", help="VIRL admin username",
110                         default="uwmadmin")
111     parser.add_argument("-p", "--password", help="VIRL password",
112                         default="Cisco1234")
113     parser.add_argument("-su", "--ssh-user", help="SSH username",
114                         default="cisco")
115     parser.add_argument("-e", "--expiry", help="Simulation expiry",
116                         default="120")
117     parser.add_argument("-spr", "--ssh-privkey", help="SSH private keyfile",
118                         default="/home/jenkins-in/.ssh/id_rsa_virl")
119     parser.add_argument("-spu", "--ssh-pubkey", help="SSH public keyfile",
120                         default="/home/jenkins-in/.ssh/id_rsa_virl.pub")
121     parser.add_argument("-r", "--release", help="VM disk image/release " +
122                         "(ex. \"csit-ubuntu-16.04.1_2016-12-19_1.6\")",
123                         default="csit-ubuntu-16.04.1_2016-12-19_1.6")
124     parser.add_argument("--topology-directory", help="Topology directory",
125                         default="/home/jenkins-in/testcase-infra/topologies")
126
127     args = parser.parse_args()
128
129     #
130     # Check if topology and template exist
131     #
132     if args.verbosity >= 2:
133         print_to_stderr("DEBUG: Running with topology {}"
134                         .format(args.topology))
135
136     topology_virl_filename = os.path.join(args.topology_directory,
137                                           args.topology + ".virl")
138     topology_yaml_filename = os.path.join(args.topology_directory,
139                                           args.topology + ".yaml")
140
141     if not os.path.isfile(topology_virl_filename):
142         print_to_stderr("ERROR: Topology VIRL file {} does not exist"
143                         .format(topology_virl_filename))
144         sys.exit(1)
145     if not os.path.isfile(topology_yaml_filename):
146         print_to_stderr("ERROR: Topology YAML file {} does not exist"
147                         .format(topology_yaml_filename))
148         sys.exit(1)
149
150     #
151     # Check if VPP package exists
152     #
153     for package in args.packages:
154         if args.verbosity >= 2:
155             print_to_stderr("DEBUG: Checking if file {} exists"
156                             .format(package))
157         if not os.path.isfile(package):
158             print_to_stderr("ERROR: Debian package {} does not exist"
159                             .format(package))
160             sys.exit(1)
161
162     #
163     # Start VIRL topology
164     #
165     if args.verbosity >= 1:
166         print_to_stderr("DEBUG: Starting VIRL topology")
167     temp_handle, temp_topology = tempfile.mkstemp()
168     with open(args.ssh_pubkey, 'r') as pubkey_file:
169         pub_key = pubkey_file.read().replace('\n', '')
170     with open(temp_topology, 'w') as new_file, \
171         open(topology_virl_filename, 'r') as old_file:
172         for line in old_file:
173             line = line.replace("  - VIRL-USER-SSH-PUBLIC-KEY", "  - "+pub_key)
174             line = line.replace("$$NFS_SERVER_SCRATCH$$",
175                                 args.nfs_server_ip+":"+args.nfs_scratch_directory)
176             line = line.replace("$$NFS_SERVER_COMMON$$",
177                                 args.nfs_server_ip+":"+args.nfs_common_directory)
178             line = line.replace("$$VM_IMAGE$$", "server-"+args.release)
179             new_file.write(line)
180     os.close(temp_handle)
181
182     try:
183         data = open(temp_topology, 'rb')
184         req = requests.post('http://' + args.virl_ip + '/simengine/rest/launch',
185                             auth=(args.username, args.password),
186                             data=data)
187         if args.verbosity >= 2:
188             print_to_stderr("DEBUG: - Request URL {}"
189                             .format(req.url))
190             print_to_stderr("{}"
191                             .format(req.text))
192             print_to_stderr("DEBUG: - Response Code {}"
193                             .format(req.status_code))
194         new_file.close()
195         if req.status_code != 200:
196             raise RuntimeError("ERROR: Launching VIRL simulation - "
197                                "Status other than 200 HTTP OK:\n{}"
198                                .format(req.content))
199     except (requests.exceptions.RequestException,
200             RuntimeError) as ex_error:
201         print_to_stderr(ex_error)
202         os.remove(temp_topology)
203         sys.exit(1)
204
205     # If we got here, we had a good response. The response content is the
206     # session ID.
207     session_id = req.content
208     if args.verbosity >= 1:
209         print_to_stderr("DEBUG: VIRL simulation session-id: {}"
210                         .format(session_id))
211
212     # Set session expiry to autokill sessions if not done from jenkins
213     if not args.keep:
214         if args.verbosity >= 1:
215             print_to_stderr("DEBUG: Setting expire for session-id: {}"
216                             .format(session_id))
217         try:
218             req = requests.put('http://' + args.virl_ip +
219                                '/simengine/rest/admin-update/' + session_id +
220                                '/expiry',
221                                auth=(args.admin_username, args.password),
222                                params={'user': args.username,
223                                        'expires': args.expiry})
224             if args.verbosity >= 2:
225                 print_to_stderr("DEBUG: - Request URL {}"
226                                 .format(req.url))
227                 print_to_stderr("{}"
228                                 .format(req.text))
229                 print_to_stderr("DEBUG: - Response Code {}"
230                                 .format(req.status_code))
231             if req.status_code != 200:
232                 raise RuntimeError("ERROR: Setting expiry to simulation - "
233                                    "Status other than 200 HTTP OK:\n{}"
234                                    .format(req.content))
235         except (requests.exceptions.RequestException,
236                 RuntimeError) as ex_error:
237             print_to_stderr(ex_error)
238             req = requests.get('http://' + args.virl_ip +
239                                '/simengine/rest/stop/' + session_id,
240                                auth=(args.username, args.password))
241             os.remove(temp_topology)
242             print "{}".format(session_id)
243             sys.exit(1)
244
245     #
246     # Create simulation scratch directory. Move topology file into that
247     # directory. Copy or move debian packages into that directory.
248     #
249     scratch_directory = os.path.join(args.nfs_scratch_directory, session_id)
250     os.mkdir(scratch_directory)
251     shutil.move(temp_topology, os.path.join(scratch_directory,
252                                             "virl_topology.virl"))
253     os.mkdir(os.path.join(scratch_directory, "vpp"))
254     for package in args.packages:
255         if args.copy:
256             shutil.copy(package, os.path.join(scratch_directory, "vpp",
257                                               os.path.basename(package)))
258         else:
259             shutil.move(package, os.path.join(scratch_directory, "vpp",
260                                               os.path.basename(package)))
261
262     #
263     # Wait for simulation to become active
264     #
265     if args.verbosity >= 1:
266         print_to_stderr("DEBUG: Waiting for simulation to become active")
267
268     sim_is_started = False
269     nodelist = []
270
271     count = args.wait_count
272     while (count > 0) and not sim_is_started:
273         time.sleep(args.wait_time)
274         count -= 1
275
276         req = requests.get('http://' + args.virl_ip + '/simengine/rest/nodes/' +
277                            session_id, auth=(args.username, args.password))
278         data = req.json()
279
280         active = 0
281         total = 0
282
283         # Flush the node list every time, keep the last one
284         nodelist = []
285
286         # Hosts are the keys of the inner dictionary
287         for key in data[session_id].keys():
288             if data[session_id][key]['management-proxy'] == "self":
289                 continue
290             nodelist.append(key)
291             total += 1
292             if data[session_id][key]['state'] == "ACTIVE":
293                 active += 1
294         if args.verbosity >= 2:
295             print_to_stderr("DEBUG: - Attempt {} out of {}, total {} hosts, "
296                             "{} active".format(args.wait_count-count,
297                                                args.wait_count, total, active))
298         if active == total:
299             sim_is_started = True
300
301     if not sim_is_started:
302         print_to_stderr("ERROR: Simulation nodes never changed to ACTIVE state")
303         print_to_stderr("Last VIRL response:")
304         print_to_stderr(data)
305         if not args.keep:
306             req = requests.get('http://' + args.virl_ip +
307                                '/simengine/rest/stop/' + session_id,
308                                auth=(args.username, args.password))
309             try:
310                 shutil.rmtree(scratch_directory)
311             except shutil.Error:
312                 print_to_stderr("ERROR: Removing scratch directory")
313         print "{}".format(session_id)
314         sys.exit(1)
315
316     if args.verbosity >= 2:
317         print_to_stderr("DEBUG: Nodes: {}"
318                         .format(", ".join(nodelist)))
319
320     #
321     # Fetch simulation's IPs and create files
322     # (ansible hosts file, topology YAML file)
323     #
324     try:
325         req = requests.get('http://' + args.virl_ip +
326                            '/simengine/rest/interfaces/' + session_id,
327                            auth=(args.username, args.password),
328                            params={'fetch-state': '1'})
329         if args.verbosity >= 2:
330             print_to_stderr("DEBUG: - Request URL {}"
331                             .format(req.url))
332             print_to_stderr("DEBUG: - Request Text")
333             print_to_stderr("{}".format(req.text))
334             print_to_stderr("DEBUG: - Response Code {}"
335                             .format(req.status_code))
336         if req.status_code != 200:
337             raise RuntimeError("ERROR:Fetching IP's of simulation - "
338                                "Status other than 200 HTTP OK:\n{}"
339                                .format(req.content))
340     except (requests.exceptions.RequestException,
341             RuntimeError) as ex_error:
342         print_to_stderr(ex_error)
343         if not args.keep:
344             req = requests.get('http://' + args.virl_ip +
345                                '/simengine/rest/stop/' + session_id,
346                                auth=(args.username, args.password))
347             try:
348                 shutil.rmtree(scratch_directory)
349             except shutil.Error:
350                 print_to_stderr("ERROR: Removing scratch directory")
351         print "{}".format(session_id)
352         sys.exit(1)
353     data = req.json()
354
355     # Populate node addresses
356     nodeaddrs = {}
357     topology = {}
358     for key in nodelist:
359         nodetype = re.split('[0-9]', key)[0]
360         if not nodetype in nodeaddrs:
361             nodeaddrs[nodetype] = {}
362         nodeaddrs[nodetype][key] = re.split('\\/', \
363             data[session_id][key]['management']['ip-address'])[0]
364         if args.verbosity >= 2:
365             print_to_stderr("DEBUG: Node {} is of type {} and has mgmt IP {}"
366                             .format(key, nodetype, nodeaddrs[nodetype][key]))
367
368         topology[key] = {}
369         for key2 in data[session_id][key]:
370             topology[key]["nic-"+key2] = data[session_id][key][key2]
371             if 'ip-address' in topology[key]["nic-"+key2]:
372                 if topology[key]["nic-"+key2]['ip-address'] is not None:
373                     topology[key]["nic-"+key2]['ip-addr'] = re.split('\\/', \
374                         topology[key]["nic-"+key2]['ip-address'])[0]
375
376     # Write ansible file
377     ansiblehosts = open(os.path.join(scratch_directory, 'ansible-hosts'), 'w')
378     for key1 in nodeaddrs:
379         ansiblehosts.write("[{}]\n".format(key1))
380         for key2 in nodeaddrs[key1]:
381             ansiblehosts.write("{} hostname={}\n".format(nodeaddrs[key1][key2],
382                                                          key2))
383     ansiblehosts.close()
384
385     # Process topology YAML template
386     with open(args.ssh_privkey, 'r') as privkey_file:
387         priv_key = indent(privkey_file.read(), 6)
388
389     with open(os.path.join(scratch_directory, "topology.yaml"), 'w') as \
390         new_file, open(topology_yaml_filename, 'r') as old_file:
391         for line in old_file:
392             new_file.write(line.format(priv_key=priv_key, topology=topology))
393
394     #
395     # Wait for hosts to become reachable over SSH
396     #
397     if args.verbosity >= 1:
398         print_to_stderr("DEBUG: Waiting for hosts to become reachable over SSH")
399
400     missing = -1
401     count = args.wait_count
402     while (count > 0) and missing != 0:
403         time.sleep(args.wait_time)
404         count -= 1
405
406         missing = 0
407         for key in nodelist:
408             if not os.path.exists(os.path.join(scratch_directory, key)):
409                 missing += 1
410         if args.verbosity >= 2:
411             print_to_stderr("DEBUG: Attempt {} out of {}, waiting for {} hosts"
412                             .format(args.wait_count-count, args.wait_count,
413                                     missing))
414
415     if missing != 0:
416         print_to_stderr("ERROR: Simulation started OK but {} hosts never "
417                         "mounted their NFS directory".format(missing))
418         if not args.keep:
419             req = requests.get('http://' + args.virl_ip +
420                                '/simengine/rest/stop/' + session_id,
421                                auth=(args.username, args.password))
422             try:
423                 shutil.rmtree(scratch_directory)
424             except shutil.Error:
425                 print_to_stderr("ERROR: Removing scratch directory")
426         print "{}".format(session_id)
427         sys.exit(1)
428
429     #
430     # Upgrade VPP
431     #
432     if args.verbosity >= 1:
433         print_to_stderr("DEBUG: Uprading VPP")
434
435     for key1 in nodeaddrs:
436         if not key1 == 'tg':
437             for key2 in nodeaddrs[key1]:
438                 ipaddr = nodeaddrs[key1][key2]
439                 if args.verbosity >= 2:
440                     print_to_stderr("DEBUG: Upgrading VPP on node {}"
441                                     .format(ipaddr))
442                 paramiko.util.log_to_file(os.path.join(scratch_directory,
443                                                        "ssh.log"))
444                 client = paramiko.SSHClient()
445                 client.load_system_host_keys()
446                 client.load_host_keys("/dev/null")
447                 client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
448                 client.connect(ipaddr, username=args.ssh_user,
449                                key_filename=args.ssh_privkey)
450                 if 'centos' in args.topology:
451                     if args.verbosity >= 1:
452                         print_to_stderr("DEBUG: Installing RPM packages")
453                     vpp_install_command = 'sudo rpm -ivh /scratch/vpp/*.rpm'
454                 elif 'trusty' in args.topology or 'xenial' in args.topology:
455                     if args.verbosity >= 1:
456                         print_to_stderr("DEBUG: Installing DEB packages")
457                     vpp_install_command = 'sudo dpkg -i --force-all ' \
458                                           '/scratch/vpp/*.deb'
459                 else:
460                     print_to_stderr("ERROR: Unsupported OS requested: {}"
461                                     .format(args.topology))
462                     vpp_install_command = ''
463                 _, stdout, stderr = \
464                     client.exec_command(vpp_install_command)
465                 c_stdout = stdout.read()
466                 c_stderr = stderr.read()
467                 if args.verbosity >= 2:
468                     print_to_stderr("DEBUG: Command output was:")
469                     print_to_stderr(c_stdout)
470                     print_to_stderr("DEBUG: Command stderr was:")
471                     print_to_stderr(c_stderr)
472
473     #
474     # Write a file with timestamp to scratch directory. We can use this to track
475     # how long a simulation has been running.
476     #
477     with open(os.path.join(scratch_directory, 'start_time'), 'a') as \
478         timestampfile:
479         timestampfile.write('{}\n'.format(int(time.time())))
480
481     #
482     # Declare victory
483     #
484     if args.verbosity >= 1:
485         print_to_stderr("SESSION ID: {}".format(session_id))
486
487     print "{}".format(session_id)
488
489 if __name__ == "__main__":
490     sys.exit(main())