#!/usr/bin/python # Copyright (c) 2017 Cisco and/or its affiliates. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at: # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """This script is handling starting of VIRL simulations.""" import argparse import netifaces import os import paramiko import random import re import shutil import sys import tempfile import time import requests IPS_PER_SIMULATION = 5 def indent(lines, amount, fillchar=' '): """Indent the string by amount of fill chars. :param lines: String to indent. :param amount: Number of fill chars. :param fillchar: Filling character. :type lines: str :type amount: int :type fillchar: str :returns: Indented string. :rtype: str """ padding = amount * fillchar return padding + ('\n'+padding).join(lines.split('\n')) def print_to_stderr(msg, end='\n'): """Writes any text to stderr. :param msg: Message to print. :param end: By default print new line at the end. :type msg: str :type end: str """ try: sys.stderr.write(str(msg) + end) except ValueError: pass def get_assigned_interfaces(args, network="flat"): """Retrieve assigned interfaces in openstack network. :param args: Command line params. :param network: Openstack network. :type args: ArgumentParser :type network: str :returns: Assigned interfaces. :rtype: list :raises RuntimeError: If response is not 200. """ req = requests.get('http://{}/openstack/rest/ports/{}' .format(args.virl_ip, network), auth=(args.username, args.password)) if req.status_code == 200: return req.json() else: raise RuntimeError("ERROR: Retrieving ports in use - " "Status other than 200 HTTP OK:\n{}" .format(req.content)) def get_assigned_interfaces_count(args, network="flat"): """Count assigned interfaces in openstack network. :param args: Command line params. :param network: Openstack network. :type args: ArgumentParser :type network: str :returns: Assigned interfaces count. :rtype: int """ return len(get_assigned_interfaces(args, network=network)) def check_ip_addresses(args): """Check IP address availability. :param args: Command line params. :type args: ArgumentParser :raises RuntimeError: If not enough free addresses available. """ for i in range(args.wait_count): if args.quota - \ get_assigned_interfaces_count(args) >= IPS_PER_SIMULATION: break if args.verbosity >= 2: print_to_stderr("DEBUG: - Attempt {} out of {}, waiting for free " "IP addresses".format(i, args.wait_count)) # Wait random amount of time within range 1-3 minutes time.sleep(random.randint(60, 180)) else: raise RuntimeError("ERROR: Not enough IP addresses to run simulation") def check_virl_resources(args): """Check virl resources availability. :param args: Command line params. :type args: ArgumentParser """ check_ip_addresses(args) # # FIXME: Right now, this is really coded like a shell script, as one big # function executed in sequence. This should be broken down into multiple # functions. # def main(): """ Main function.""" # # Get our default interface IP address. This will become the default # value for the "NFS Server IP" option. # gws = netifaces.gateways() addrs = netifaces.ifaddresses(gws['default'][netifaces.AF_INET][1]) default_addr = addrs[netifaces.AF_INET][0]['addr'] # # Verify CLI parameters and try to download our VPP image into a temporary # file first # parser = argparse.ArgumentParser() parser.add_argument("topology", help="the base topology to be started") parser.add_argument("packages", help="Path to the tldk package file that " + "is/are to be installed", nargs='+') parser.add_argument("-c", "--copy", help="Copy the tldk packages, " + "leaving the originals in place. Default is to " + "move them.", action='store_true') parser.add_argument("-k", "--keep", help="Keep (do not delete) the " + "simulation in case of error", action='store_true') parser.add_argument("-v", "--verbosity", action="count", default=0) parser.add_argument("-nip", "--nfs-server-ip", help="NFS server (our) IP " + "default is derived from routing table: " + "{}".format(default_addr), default=default_addr) parser.add_argument("-ns", "--nfs-scratch-directory", help="Server location for NFS scratch diretory", default="/nfs/scratch") parser.add_argument("-nc", "--nfs-common-directory", help="Server location for NFS common (read-only) " + "directory", default="/nfs/common") parser.add_argument("-wc", "--wait-count", help="number of intervals to wait for simulation to " + "be ready", type=int, default=24) parser.add_argument("-wt", "--wait-time", help="length of a single interval to wait for " + "simulation to be ready", type=int, default=5) parser.add_argument("-vip", "--virl-ip", help="VIRL IP and Port (e.g. 127.0.0.1:19399)", default="127.0.0.1:19399") parser.add_argument("-u", "--username", help="VIRL username", default="tb4-virl") parser.add_argument("-au", "--admin-username", help="VIRL admin username", default="uwmadmin") parser.add_argument("-p", "--password", help="VIRL password", default="Cisco1234") parser.add_argument("-su", "--ssh-user", help="SSH username", default="cisco") parser.add_argument("-e", "--expiry", help="Simulation expiry", default="120") parser.add_argument("-spr", "--ssh-privkey", help="SSH private keyfile", default="/home/jenkins-in/.ssh/id_rsa_virl") parser.add_argument("-spu", "--ssh-pubkey", help="SSH public keyfile", default="/home/jenkins-in/.ssh/id_rsa_virl.pub") parser.add_argument("-r", "--release", help="VM disk image/release " + "(ex. \"csit-ubuntu-14.04.4_2016-05-25_1.0\")", default="csit-ubuntu-14.04.4_2016-05-25_1.0") parser.add_argument("--topology-directory", help="Topology directory", default="/home/jenkins-in/testcase-infra/topologies") parser.add_argument("-q", "--quota", help="VIRL quota for max number of allowed IPs", type=int, default=74) args = parser.parse_args() # # Check if topology and template exist # if args.verbosity >= 2: print_to_stderr("DEBUG: Running with topology {}" .format(args.topology)) topology_virl_filename = os.path.join(args.topology_directory, args.topology + ".virl") topology_yaml_filename = os.path.join(args.topology_directory, args.topology + ".yaml") if not os.path.isfile(topology_virl_filename): print_to_stderr("ERROR: Topology VIRL file {} does not exist" .format(topology_virl_filename)) sys.exit(1) if not os.path.isfile(topology_yaml_filename): print_to_stderr("ERROR: Topology YAML file {} does not exist" .format(topology_yaml_filename)) sys.exit(1) # # Check if TLDK package exists # for package in args.packages: if args.verbosity >= 2: print_to_stderr("DEBUG: Checking if file {} exists" .format(package)) if not os.path.isfile(package): print_to_stderr("ERROR: TLDK package {} does not exist." .format(package)) sys.exit(1) # # Start VIRL topology # if args.verbosity >= 1: print "DEBUG: Starting VIRL topology" temp_handle, temp_topology = tempfile.mkstemp() with open(args.ssh_pubkey, 'r') as pubkey_file: pub_key = pubkey_file.read().replace('\n', '') with open(temp_topology, 'w') as new_file, \ open(topology_virl_filename, 'r') as old_file: for line in old_file: line = line.replace(" - VIRL-USER-SSH-PUBLIC-KEY", " - "+pub_key) line = line.replace("$$NFS_SERVER_SCRATCH$$", \ args.nfs_server_ip+":"+args.nfs_scratch_directory) line = line.replace("$$NFS_SERVER_COMMON$$", \ args.nfs_server_ip+":"+args.nfs_common_directory) line = line.replace("$$VM_IMAGE$$", "server-"+args.release) new_file.write(line) os.close(temp_handle) try: data = open(temp_topology, 'rb') check_virl_resources(args) req = requests.post('http://' + args.virl_ip + '/simengine/rest/launch', auth=(args.username, args.password), data=data) if args.verbosity >= 2: print_to_stderr("DEBUG: - Request URL {}" .format(req.url)) print_to_stderr("{}" .format(req.text)) print_to_stderr("DEBUG: - Response Code {}" .format(req.status_code)) new_file.close() if req.status_code != 200: raise RuntimeError("ERROR: Launching VIRL simulation - " "Status other than 200 HTTP OK:\n{}" .format(req.content)) except (requests.exceptions.RequestException, RuntimeError) as ex_error: print_to_stderr(ex_error) os.remove(temp_topology) sys.exit(1) # If we got here, we had a good response. The response content is the # session ID. session_id = req.content if args.verbosity >= 1: print_to_stderr("DEBUG: VIRL simulation session-id: {}" .format(session_id)) # Set session expiry to autokill sessions if not done from jenkins if not args.keep: if args.verbosity >= 1: print_to_stderr("DEBUG: Setting expire for session-id: {}" .format(session_id)) try: req = requests.put('http://' + args.virl_ip + '/simengine/rest/admin-update/' + session_id + '/expiry', auth=(args.admin_username, args.password), params={'user': args.username, 'expires': args.expiry}) if args.verbosity >= 2: print_to_stderr("DEBUG: - Request URL {}" .format(req.url)) print_to_stderr("{}" .format(req.text)) print_to_stderr("DEBUG: - Response Code {}" .format(req.status_code)) if req.status_code != 200: raise RuntimeError("ERROR: Setting expiry to simulation - " "Status other than 200 HTTP OK:\n{}" .format(req.content)) except (requests.exceptions.RequestException, RuntimeError) as ex_error: print_to_stderr(ex_error) req = requests.get('http://' + args.virl_ip + '/simengine/rest/stop/' + session_id, auth=(args.username, args.password)) os.remove(temp_topology) print "{}".format(session_id) sys.exit(1) # # Create simulation scratch directory. Move topology file into that # directory. Copy or move TLDK packages into that directory. # scratch_directory = os.path.join(args.nfs_scratch_directory, session_id) os.mkdir(scratch_directory) shutil.move(temp_topology, os.path.join(scratch_directory, "virl_topology.virl")) os.mkdir(os.path.join(scratch_directory, "tldktest")) for package in args.packages: if args.copy: shutil.copy(package, os.path.join(scratch_directory, "tldktest", os.path.basename(package))) else: shutil.move(package, os.path.join(scratch_directory, "tldktest", os.path.basename(package))) # # Wait for simulation to become active # if args.verbosity >= 1: print_to_stderr("DEBUG: Waiting for simulation to become active") sim_is_started = False nodelist = [] count = args.wait_count while (count > 0) and not sim_is_started: time.sleep(args.wait_time) count -= 1 req = requests.get('http://' + args.virl_ip + '/simengine/rest/nodes/' + session_id, auth=(args.username, args.password)) data = req.json() active = 0 total = 0 # Flush the node list every time, keep the last one nodelist = [] # Hosts are the keys of the inner dictionary for key in data[session_id].keys(): if data[session_id][key]['management-proxy'] == "self": continue nodelist.append(key) total += 1 if data[session_id][key]['state'] == "ACTIVE": active += 1 if args.verbosity >= 2: print_to_stderr("DEBUG: - Attempt {} out of {}, total {} hosts, " "{} active".format(args.wait_count-count, args.wait_count, total, active)) if active == total: sim_is_started = True if not sim_is_started: print_to_stderr("ERROR: Simulation nodes never changed to ACTIVE state") print_to_stderr("Last VIRL response:") print_to_stderr(data) if not args.keep: req = requests.get('http://' + args.virl_ip + '/simengine/rest/stop/' + session_id, auth=(args.username, args.password)) try: shutil.rmtree(scratch_directory) except shutil.Error: print_to_stderr("ERROR: Removing scratch directory") print "{}".format(session_id) sys.exit(1) if args.verbosity >= 2: print_to_stderr("DEBUG: Nodes: {}" .format(", ".join(nodelist))) # # Fetch simulation's IPs and create files # (ansible hosts file, topology YAML file) # try: req = requests.get('http://' + args.virl_ip + '/simengine/rest/interfaces/' + session_id, auth=(args.username, args.password), params={'fetch-state': '1'}) if args.verbosity >= 2: print_to_stderr("DEBUG: - Request URL {}" .format(req.url)) print_to_stderr("DEBUG: - Request Text") print_to_stderr("{}".format(req.text)) print_to_stderr("DEBUG: - Response Code {}" .format(req.status_code)) if req.status_code != 200: raise RuntimeError("ERROR:Fetching IP's of simulation - " "Status other than 200 HTTP OK:\n{}" .format(req.content)) except (requests.exceptions.RequestException, RuntimeError) as ex_error: print_to_stderr(ex_error) if not args.keep: req = requests.get('http://' + args.virl_ip + '/simengine/rest/stop/' + session_id, auth=(args.username, args.password)) try: shutil.rmtree(scratch_directory) except shutil.Error: print_to_stderr("ERROR: Removing scratch directory") print "{}".format(session_id) sys.exit(1) data = req.json() # Populate node addresses nodeaddrs = {} topology = {} for key in nodelist: nodetype = re.split('[0-9]', key)[0] if not nodetype in nodeaddrs: nodeaddrs[nodetype] = {} nodeaddrs[nodetype][key] = re.split('\\/', \ data[session_id][key]['management']['ip-address'])[0] if args.verbosity >= 2: print_to_stderr("DEBUG: Node {} is of type {} and has mgmt IP {}" .format(key, nodetype, nodeaddrs[nodetype][key])) topology[key] = {} for key2 in data[session_id][key]: topology[key]["nic-"+key2] = data[session_id][key][key2] if 'ip-address' in topology[key]["nic-"+key2]: if topology[key]["nic-"+key2]['ip-address'] is not None: topology[key]["nic-"+key2]['ip-addr'] = re.split('\\/', \ topology[key]["nic-"+key2]['ip-address'])[0] # Write ansible file ansiblehosts = open(os.path.join(scratch_directory, 'ansible-hosts'), 'w') for key1 in nodeaddrs: ansiblehosts.write("[{}]\n".format(key1)) for key2 in nodeaddrs[key1]: ansiblehosts.write("{} hostname={}\n".format(nodeaddrs[key1][key2], key2)) ansiblehosts.close() # Process topology YAML template with open(args.ssh_privkey, 'r') as privkey_file: priv_key = indent(privkey_file.read(), 6) with open(os.path.join(scratch_directory, "topology.yaml"), 'w') as \ new_file, open(topology_yaml_filename, 'r') as old_file: for line in old_file: new_file.write(line.format(priv_key=priv_key, topology=topology)) # # Wait for hosts to become reachable over SSH # if args.verbosity >= 1: print_to_stderr("DEBUG: Waiting for hosts to become reachable over SSH") missing = -1 count = args.wait_count while (count > 0) and missing != 0: time.sleep(args.wait_time) count -= 1 missing = 0 for key in nodelist: if not os.path.exists(os.path.join(scratch_directory, key)): missing += 1 if args.verbosity >= 2: print_to_stderr("DEBUG: Attempt {} out of {}, waiting for {} hosts" .format(args.wait_count-count, args.wait_count, missing)) if missing != 0: print_to_stderr("ERROR: Simulation started OK but {} hosts never " "mounted their NFS directory".format(missing)) if not args.keep: req = requests.get('http://' + args.virl_ip + '/simengine/rest/stop/' + session_id, auth=(args.username, args.password)) try: shutil.rmtree(scratch_directory) except shutil.Error: print_to_stderr("ERROR: Removing scratch directory") print "{}".format(session_id) sys.exit(1) # # just decompress the TLDK tar packages # if args.verbosity >= 1: print_to_stderr("DEBUG: Uprading TLDK") for key1 in nodeaddrs: if not key1 == 'tg': for key2 in nodeaddrs[key1]: ipaddr = nodeaddrs[key1][key2] if args.verbosity >= 2: print_to_stderr("DEBUG: Upgrading TLDK on node {}" .format(ipaddr)) paramiko.util.log_to_file(os.path.join(scratch_directory, "ssh.log")) client = paramiko.SSHClient() client.load_system_host_keys() client.load_host_keys("/dev/null") client.set_missing_host_key_policy(paramiko.AutoAddPolicy()) client.connect(ipaddr, username=args.ssh_user, key_filename=args.ssh_privkey) _, stdout, stderr = \ client.exec_command('cd /scratch/tldktest/ && sudo tar zxf tldk_depends.tar.gz') c_stdout = stdout.read() c_stderr = stderr.read() if args.verbosity >= 2: print_to_stderr("DEBUG: Command output was:") print_to_stderr(c_stdout) print_to_stderr("DEBUG: Command stderr was:") print_to_stderr(c_stderr) # # Write a file with timestamp to scratch directory. We can use this to track # how long a simulation has been running. # with open(os.path.join(scratch_directory, 'start_time'), 'a') as \ timestampfile: timestampfile.write('{}\n'.format(int(time.time()))) # # Declare victory # if args.verbosity >= 1: print_to_stderr("SESSION ID: {}".format(session_id)) print "{}".format(session_id) if __name__ == "__main__": sys.exit(main())