From: Tibor Frank Date: Wed, 29 Jun 2016 14:44:34 +0000 (+0200) Subject: Add automated deployment of Honeycomb on DUTs X-Git-Url: https://gerrit.fd.io/r/gitweb?p=csit.git;a=commitdiff_plain;h=bff480ceb6e7dd9f624f3c8e39f7d7ece1f9248e Add automated deployment of Honeycomb on DUTs JIRA: CSIT-56 - create bootstrap_vpp_honeycomb.sh script which deploys VPP and Honeycomb on DUTs - Modify start-testcase script Change-Id: I88511479cb8681168675c934f4fccbea83fa34bc Signed-off-by: Tibor Frank --- diff --git a/bootstrap-vpp-honeycomb.sh b/bootstrap-vpp-honeycomb.sh new file mode 100644 index 0000000000..2e89d9d747 --- /dev/null +++ b/bootstrap-vpp-honeycomb.sh @@ -0,0 +1,173 @@ +#!/bin/bash +# Copyright (c) 2016 Cisco and/or its affiliates. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -x + +cat /etc/hostname +cat /etc/hosts + +export DEBIAN_FRONTEND=noninteractive +sudo apt-get -y update +sudo apt-get -y install libpython2.7-dev python-virtualenv + +# Source the VIRL server parameters: +source virl_params.sh + +SSH_OPTIONS="-i ${VIRL_PKEY} -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o BatchMode=yes -o LogLevel=error" + +function ssh_do() { + echo + echo "### " ssh $@ + ssh ${SSH_OPTIONS} $@ +} + +rm -f ${VIRL_PKEY} +cat > ${VIRL_PKEY} <&1) + echo VIRL HOST $virl_server_candidate status is \"$virl_server_status\" + if [ "$virl_server_status" == "$VIRL_SERVER_EXPECTED_STATUS" ] + then + # Candidate is in good status. Select this server. + VIRL_SERVER="$virl_server_candidate" + else + # Candidate is in bad status. Remove from array. + VIRL_SERVERS=("${VIRL_SERVERS[@]:0:$element}" "${VIRL_SERVERS[@]:$[$element+1]}") + fi +done + +SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" + +cat ${VIRL_PKEY} + +# Copy start-honeycomb-testcase to VIRL +START_FILE="resources/tools/virl/bin/start-honeycomb-testcase" +DST_DIR_1="/home/jenkins-in/testcase-infra/bin/" +DST_DIR_2="/home/jenkins-in/bin/" + +scp ${SSH_OPTIONS} ${START_FILE} ${VIRL_USERNAME}@${VIRL_SERVER}:${DST_DIR_1} +scp ${SSH_OPTIONS} ${START_FILE} ${VIRL_USERNAME}@${VIRL_SERVER}:${DST_DIR_2} + +# Start a simulation on VIRL server +echo "Starting simulation on VIRL server" + +function stop_virl_simulation { + ssh ${SSH_OPTIONS} ${VIRL_USERNAME}@${VIRL_SERVER}\ + "stop-testcase ${VIRL_SID}" +} + +VIRL_SID=$(ssh ${SSH_OPTIONS} \ + ${VIRL_USERNAME}@${VIRL_SERVER} \ + "start-honeycomb-testcase double-ring-nested") +retval=$? +if [ "$?" -ne "0" ]; then + echo "VIRL simulation start failed" + exit ${retval} +fi + +if [[ ! "${VIRL_SID}" =~ session-[a-zA-Z0-9_]{6} ]]; then + echo "No VIRL session ID reported." + exit 127 +fi + +# Upon script exit, cleanup the simulation execution +trap stop_virl_simulation EXIT +echo ${VIRL_SID} + +ssh_do ${VIRL_USERNAME}@${VIRL_SERVER} cat /scratch/${VIRL_SID}/topology.yaml + +# Download the topology file from virl session +scp ${SSH_OPTIONS} \ + ${VIRL_USERNAME}@${VIRL_SERVER}:/scratch/${VIRL_SID}/topology.yaml \ + topologies/enabled/topology.yaml + +retval=$? +if [ "$?" -ne "0" ]; then + echo "Failed to copy topology file from VIRL simulation" + exit ${retval} +fi + +virtualenv --system-site-packages env +. env/bin/activate + +echo pip install +pip install -r ${SCRIPT_DIR}/requirements.txt + +pykwalify -s ${SCRIPT_DIR}/resources/topology_schemas/3_node_topology.sch.yaml \ + -s ${SCRIPT_DIR}/resources/topology_schemas/topology.sch.yaml \ + -d ${SCRIPT_DIR}/topologies/enabled/topology.yaml \ + -vvv + +if [ "$?" -ne "0" ]; then + echo "Topology schema validation failed." + echo "However, the tests will start." +fi + +PYTHONPATH=`pwd` pybot -L TRACE -W 136\ + -v TOPOLOGY_PATH:${SCRIPT_DIR}/topologies/enabled/topology.yaml \ + --suite "tests.func" \ + --include honeycomb_sanity \ + --noncritical EXPECTED_FAILING \ + tests/ diff --git a/resources/libraries/python/honeycomb/HoneycombSetup.py b/resources/libraries/python/honeycomb/HoneycombSetup.py index 1c232f08a5..04af9a5a8b 100644 --- a/resources/libraries/python/honeycomb/HoneycombSetup.py +++ b/resources/libraries/python/honeycomb/HoneycombSetup.py @@ -53,6 +53,9 @@ class HoneycombSetup(object): :type nodes: list :raises HoneycombError: If Honeycomb fails to start. """ + + HoneycombSetup.print_environment(nodes) + logger.console("\nStarting Honeycomb service ...") cmd = "{0}/bin/start".format(Const.REMOTE_HC_DIR) @@ -123,6 +126,7 @@ class HoneycombSetup(object): for node in nodes: if node['type'] == NodeType.DUT: + HoneycombSetup.print_ports(node) status_code, _ = HTTPRequest.get(node, path, timeout=10, enable_logging=False) if status_code == HTTPCodes.OK: @@ -185,3 +189,53 @@ class HoneycombSetup(object): logger.info("Honeycomb on node {0} has stopped". format(node['host'])) return True + + @staticmethod + def print_environment(nodes): + """Print information about the nodes to log. The information is defined + by commands in cmds tuple at the beginning of this method. + + :param nodes: List of DUT nodes to get information about. + :type nodes: list + """ + + # TODO: When everything is set and running in VIRL env, transform this + # method to a keyword checking the environment. + + cmds = ("uname -a", + "df -lh", + "echo $JAVA_HOME", + "echo $PATH", + "which java", + "java -version", + "dpkg --list | grep openjdk", + "ls -la /opt/honeycomb", + "ls -la /opt/honeycomb/v3po-karaf-1.0.0-SNAPSHOT") + + for node in nodes: + if node['type'] == NodeType.DUT: + logger.info("Checking node {} ...".format(node['host'])) + for cmd in cmds: + logger.info("Command: {}".format(cmd)) + ssh = SSH() + ssh.connect(node) + ssh.exec_command_sudo(cmd) + + @staticmethod + def print_ports(node): + """Uses "sudo netstat -anp | grep java" to print port where a java + application listens. + + :param node: Honeycomb node where we want to print the ports. + :type node: dict + """ + + cmds = ("netstat -anp | grep java", + "ps -ef | grep karaf") + + logger.info("Checking node {} ...".format(node['host'])) + for cmd in cmds: + logger.info("Command: {}".format(cmd)) + ssh = SSH() + ssh.connect(node) + ssh.exec_command_sudo(cmd) diff --git a/resources/tools/testbed-setup/playbooks/03-virl-post-install.yaml b/resources/tools/testbed-setup/playbooks/03-virl-post-install.yaml index 59bcfe9d65..b0e857ca57 100644 --- a/resources/tools/testbed-setup/playbooks/03-virl-post-install.yaml +++ b/resources/tools/testbed-setup/playbooks/03-virl-post-install.yaml @@ -41,6 +41,10 @@ command: ln -sf /home/jenkins-in/testcase-infra/bin/start-testcase /home/jenkins-in/bin/start-testcase args: creates: /home/jenkins-in/bin/start-testcase + - name: Link start-honeycomb-testcase executable + command: ln -sf /home/jenkins-in/testcase-infra/bin/start-honeycomb-testcase /home/jenkins-in/bin/start-honeycomb-testcase + args: + creates: /home/jenkins-in/bin/start-honeycomb-testcase - name: Link stop-testcase executable command: ln -sf /home/jenkins-in/testcase-infra/bin/stop-testcase /home/jenkins-in/bin/stop-testcase args: diff --git a/resources/tools/virl/bin/start-honeycomb-testcase b/resources/tools/virl/bin/start-honeycomb-testcase new file mode 100755 index 0000000000..f38d1a783b --- /dev/null +++ b/resources/tools/virl/bin/start-honeycomb-testcase @@ -0,0 +1,375 @@ +#!/usr/bin/python + +# Copyright (c) 2016 Cisco and/or its affiliates. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +__author__ = 'ckoester@cisco.com' + +import sys +import requests +import re +import os +import argparse +import tempfile +import shutil +import time +import paramiko +import netifaces + +# Commands needed to install VPP and Honeycomb from repository +inst_cmds = [ + 'sudo rm /etc/apt/sources.list.d/99fd.io.list || true', + 'echo "deb [trusted=yes] https://nexus.fd.io/content/repositories/' + 'fd.io.master.ubuntu.trusty.main/ ./" | ' + 'sudo tee -a /etc/apt/sources.list.d/99fd.io.list', + 'sudo apt-get -y update', + 'sudo apt-get -y install vpp vpp-dbg vpp-dev vpp-dpdk-dev vpp-dpdk-dkms ' + 'vpp-lib honeycomb' +] + +# +# Helper function to indent a text string +# +def indent(lines, amount, fillchar=' '): + padding = amount * fillchar + return padding + ('\n'+padding).join(lines.split('\n')) + +def perform_remote_command(ssh_client, cmd, verbosity): + """This function performs a command on the specified remote host using given + ssh client. Depending on the level of verbosity, it prints stdout and + stderr. + + :param ssh_client: SSH client used for communication. + :param cmd: Command to be performed on th rremote client. + :param verbosity: Verbosity level. + :type ssh_client: paramiko.SSHClient + :type cmd: str + :type verbosity: int + """ + + _, stdout, stderr = ssh_client.exec_command(cmd) + c_stdout = stdout.read() + c_stderr = stderr.read() + if verbosity >= 2: + print("DEBUG: Command output was:\n{0}".format(c_stdout)) + print("DEBUG: Command stderr was:\n{0}".format(c_stderr)) + +# +# Main function. +# FIXME: Right now, this is really coded like a shell script, as one big +# function executed in sequence. This should be broken down into multiple +# functions. +# +def main(): + # + # Get our default interface IP address. This will become the default + # value for the "NFS Server IP" option. + # + gws = netifaces.gateways() + addrs = netifaces.ifaddresses(gws['default'][netifaces.AF_INET][1]) + default_addr = addrs[netifaces.AF_INET][0]['addr'] + + # + # Verify CLI parameters and try to download our VPP image into a temporary + # file first + # + parser = argparse.ArgumentParser() + parser.add_argument("topology", help="the base topology to be started") + parser.add_argument("-k", "--keep", help="Keep (do not delete) the " + + "simulation in case of error", action='store_true') + parser.add_argument("-v", "--verbosity", action="count", default=0) + parser.add_argument("-nip", "--nfs-server-ip", help="NFS server (our) IP " + + "default is derived from routing table: " + + "{}".format(default_addr), default=default_addr) + parser.add_argument("-ns", "--nfs-scratch-directory", + help="Server location for NFS scratch diretory", + default="/nfs/scratch") + parser.add_argument("-nc", "--nfs-common-directory", + help="Server location for NFS common (read-only) " + + "directory", default="/nfs/common") + parser.add_argument("-wc", "--wait-count", + help="number of intervals to wait for simulation to " + + "be ready", type=int, default=24) + parser.add_argument("-wt", "--wait-time", + help="length of a single interval to wait for " + + "simulation to be ready", type=int, default=5) + parser.add_argument("-vip", "--virl-ip", + help="VIRL IP and Port (e.g. 127.0.0.1:19399)", + default="127.0.0.1:19399") + parser.add_argument("-u", "--username", help="VIRL username", + default="tb4-virl") + parser.add_argument("-p", "--password", help="VIRL password", + default="Cisco1234") + parser.add_argument("-su", "--ssh-user", help="SSH username", + default="cisco") + parser.add_argument("-spr", "--ssh-privkey", help="SSH private keyfile", + default="/home/jenkins-in/.ssh/id_rsa_virl") + parser.add_argument("-spu", "--ssh-pubkey", help="SSH public keyfile", + default="/home/jenkins-in/.ssh/id_rsa_virl.pub") + parser.add_argument("-r", "--release", help="VM disk image/release " + + "(ex. \"csit-ubuntu-14.04.4_2016-05-25_1.0\")", + default="csit-ubuntu-14.04.4_2016-05-25_1.0") + parser.add_argument("--topology-directory", help="Topology directory", + default="/home/jenkins-in/testcase-infra/topologies") + + args = parser.parse_args() + + # + # Check if topology and template exist + # + if args.verbosity >= 2: + print "DEBUG: Running with topology {}".format(args.topology) + + topology_virl_filename = os.path.join(args.topology_directory, + args.topology + ".virl") + topology_yaml_filename = os.path.join(args.topology_directory, + args.topology + ".yaml") + + if not os.path.isfile(topology_virl_filename): + print "ERROR: Topology VIRL file {} does not exist".\ + format(topology_virl_filename) + sys.exit(1) + if not os.path.isfile(topology_yaml_filename): + print "ERROR: Topology YAML file {} does not exist".\ + format(topology_yaml_filename) + sys.exit(1) + + # + # Start VIRL topology + # + if args.verbosity >= 1: + print "DEBUG: Starting VIRL topology" + temp_handle, temp_topology = tempfile.mkstemp() + with open(args.ssh_pubkey, 'r') as pubkey_file: + pub_key = pubkey_file.read().replace('\n', '') + with open(temp_topology, 'w') as new_file, \ + open(topology_virl_filename, 'r') as old_file: + for line in old_file: + line = line.replace(" - VIRL-USER-SSH-PUBLIC-KEY", " - "+pub_key) + line = line.replace("$$NFS_SERVER_SCRATCH$$", \ + args.nfs_server_ip+":"+args.nfs_scratch_directory) + line = line.replace("$$NFS_SERVER_COMMON$$", \ + args.nfs_server_ip+":"+args.nfs_common_directory) + line = line.replace("$$VM_IMAGE$$", "server-"+args.release) + new_file.write(line) + os.close(temp_handle) + + try: + new_file = open(temp_topology, 'rb') + headers = {'Content-Type': 'text/xml'} + req = requests.post('http://' + args.virl_ip + '/simengine/rest/launch', + headers=headers, + auth=(args.username, args.password), data=new_file) + if args.verbosity >= 2: + print "DEBUG: - Response Code {}".format(req.status_code) + new_file.close() + + except: + print "ERROR: Launching VIRL simulation - received invalid response" + print req + os.remove(temp_topology) + sys.exit(1) + + if req.status_code != 200: + print "ERROR: Launching VIRL simulation - received status other " + \ + "than 200 HTTP OK" + print "Status was: {} \n".format(req.status_code) + print "Response content was: " + print req.content + os.remove(temp_topology) + sys.exit(1) + + # If we got here, we had a good response. The response content is the + # session ID. + session_id = req.content + + # + # Create simulation scratch directory. Move topology file into that + # directory. Copy or move debian packages into that directory. + # + scratch_directory = os.path.join(args.nfs_scratch_directory, session_id) + os.mkdir(scratch_directory) + shutil.move(temp_topology, os.path.join(scratch_directory, + "virl_topology.virl")) + os.mkdir(os.path.join(scratch_directory, "vpp")) + + # + # Wait for simulation to become active + # + if args.verbosity >= 1: + print "DEBUG: Waiting for simulation to become active" + + sim_is_started = False + nodelist = [] + + count = args.wait_count + while (count > 0) and not sim_is_started: + time.sleep(args.wait_time) + count -= 1 + + req = requests.get('http://' + args.virl_ip + '/simengine/rest/nodes/' + + session_id, auth=(args.username, args.password)) + data = req.json() + + active = 0 + total = 0 + + # Flush the node list every time, keep the last one + nodelist = [] + + # Hosts are the keys of the inner dictionary + for key in data[session_id].keys(): + if data[session_id][key]['management-proxy'] == "self": + continue + nodelist.append(key) + total += 1 + if data[session_id][key]['state'] == "ACTIVE": + active += 1 + if args.verbosity >= 2: + print "DEBUG: - Attempt {} out of {}, total {} hosts, {} active".\ + format(args.wait_count-count, args.wait_count, total, active) + if active == total: + sim_is_started = True + + if not sim_is_started: + print "ERROR: Simulation started OK but devices never changed to " + \ + "ACTIVE state" + print "Last VIRL response:" + print data + if not args.keep: + shutil.rmtree(scratch_directory) + req = requests.get('http://' + args.virl_ip + + '/simengine/rest/stop/' + session_id, + auth=(args.username, args.password)) + + if args.verbosity >= 2: + print "DEBUG: Nodes: " + ", ".join(nodelist) + + # + # Fetch simulation's IPs and create files + # (ansible hosts file, topology YAML file) + # + req = requests.get('http://' + args.virl_ip + + '/simengine/rest/interfaces/' + session_id + + '?fetch-state=1', auth=(args.username, args.password)) + data = req.json() + + # Populate node addresses + nodeaddrs = {} + topology = {} + for key in nodelist: + nodetype = re.split('[0-9]', key)[0] + if not nodetype in nodeaddrs: + nodeaddrs[nodetype] = {} + nodeaddrs[nodetype][key] = re.split('\\/', \ + data[session_id][key]['management']['ip-address'])[0] + if args.verbosity >= 2: + print "DEBUG: Node {} is of type {} and has management IP {}".\ + format(key, nodetype, nodeaddrs[nodetype][key]) + + topology[key] = {} + for key2 in data[session_id][key]: + topology[key]["nic-"+key2] = data[session_id][key][key2] + if 'ip-address' in topology[key]["nic-"+key2]: + if topology[key]["nic-"+key2]['ip-address'] is not None: + topology[key]["nic-"+key2]['ip-addr'] = re.split('\\/', \ + topology[key]["nic-"+key2]['ip-address'])[0] + + # Write ansible file + ansiblehosts = open(os.path.join(scratch_directory, 'ansible-hosts'), 'w') + for key1 in nodeaddrs: + ansiblehosts.write("[{}]\n".format(key1)) + for key2 in nodeaddrs[key1]: + ansiblehosts.write("{} hostname={}\n".format(nodeaddrs[key1][key2], + key2)) + ansiblehosts.close() + + # Process topology YAML template + with open(args.ssh_privkey, 'r') as privkey_file: + priv_key = indent(privkey_file.read(), 6) + + with open(os.path.join(scratch_directory, "topology.yaml"), 'w') as \ + new_file, open(topology_yaml_filename, 'r') as old_file: + for line in old_file: + new_file.write(line.format(priv_key=priv_key, topology=topology)) + + # + # Wait for hosts to become reachable over SSH + # + if args.verbosity >= 1: + print "DEBUG: Waiting for hosts to become reachable using SSH" + + missing = -1 + count = args.wait_count + while (count > 0) and missing != 0: + time.sleep(args.wait_time) + count -= 1 + + missing = 0 + for key in nodelist: + if not os.path.exists(os.path.join(scratch_directory, key)): + missing += 1 + if args.verbosity >= 2: + print "DEBUG: - Attempt {} out of {}, waiting for {} hosts".\ + format(args.wait_count-count, args.wait_count, missing) + + if missing != 0: + print "ERROR: Simulation started OK but {} hosts ".format(missing) + \ + "never mounted their NFS directory" + if not args.keep: + shutil.rmtree(scratch_directory) + req = requests.get('http://' + args.virl_ip + + '/simengine/rest/stop/' + session_id, + auth=(args.username, args.password)) + + # + # Upgrade VPP + # + if args.verbosity >= 1: + print("DEBUG: Uprading VPP") + + for key1 in nodeaddrs: + if not key1 == 'tg': + for key2 in nodeaddrs[key1]: + ipaddr = nodeaddrs[key1][key2] + if args.verbosity >= 2: + print("DEBUG: Upgrading VPP on node {}".format(ipaddr)) + paramiko.util.log_to_file(os.path.join(scratch_directory, + "ssh.log")) + client = paramiko.SSHClient() + client.load_system_host_keys() + client.load_host_keys("/dev/null") + client.set_missing_host_key_policy(paramiko.AutoAddPolicy()) + client.connect(ipaddr, username=args.ssh_user, + key_filename=args.ssh_privkey) + for cmd in inst_cmds: + perform_remote_command(client, cmd, args.verbosity) + + # + # Write a file with timestamp to scratch directory. We can use this to track + # how long a simulation has been running. + # + with open(os.path.join(scratch_directory, 'start_time'), 'a') as \ + timestampfile: + timestampfile.write('{}\n'.format(int(time.time()))) + + # + # Declare victory + # + if args.verbosity >= 1: + print "SESSION ID: {}".format(session_id) + + print "{}".format(session_id) + +if __name__ == "__main__": + sys.exit(main()) diff --git a/virl_params.sh b/virl_params.sh new file mode 100644 index 0000000000..e36058d06d --- /dev/null +++ b/virl_params.sh @@ -0,0 +1,22 @@ +#!/bin/bash +# Copyright (c) 2016 Cisco and/or its affiliates. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# VIRL server parameters used by bootstrap scrips: +VIRL_SERVERS=("10.30.51.28" "10.30.51.29" "10.30.51.30") +VIRL_SERVER="" + +VIRL_USERNAME=jenkins-in +VIRL_PKEY=priv_key +VIRL_SERVER_STATUS_FILE="status" +VIRL_SERVER_EXPECTED_STATUS="PRODUCTION"