Tolerate failures when setting MTU
[csit.git] / bootstrap.sh
1 #!/bin/bash
2 # Copyright (c) 2018 Cisco and/or its affiliates.
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at:
6 #
7 #     http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14
15 set -x
16
17 cat /etc/hostname
18 cat /etc/hosts
19
20 SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
21 export PYTHONPATH=${SCRIPT_DIR}
22
23 OS_ID=$(grep '^ID=' /etc/os-release | cut -f2- -d= | sed -e 's/\"//g')
24 OS_VERSION_ID=$(grep '^VERSION_ID=' /etc/os-release | cut -f2- -d= | sed -e 's/\"//g')
25
26 if [ "$OS_ID" == "centos" ]; then
27     DISTRO="CENTOS"
28     PACKAGE="rpm"
29     sudo yum install -y python-devel python-virtualenv
30 elif [ "$OS_ID" == "ubuntu" ]; then
31     DISTRO="UBUNTU"
32     PACKAGE="deb"
33     export DEBIAN_FRONTEND=noninteractive
34     sudo apt-get -y update
35     sudo apt-get -y install libpython2.7-dev python-virtualenv
36 else
37     echo "$OS_ID is not yet supported."
38     exit 1
39 fi
40
41 # Temporarily download VPP and DPDK packages from nexus.fd.io
42 if [ "${#}" -ne "0" ]; then
43     arr=(${@})
44     echo ${arr[0]}
45     SKIP_PATCH="skip_patchORskip_vpp_patch"
46 else
47     VPP_VERSION=$(< ${SCRIPT_DIR}/VPP_STABLE_VER_${DISTRO})
48     CSIT_DIR=${SCRIPT_DIR}
49     source "${SCRIPT_DIR}/resources/libraries/bash/function/artifacts.sh"
50     download_artifacts
51     # Need to revert -euo as the rest of script is not optimized for this.
52     set +euo pipefail
53 fi
54
55 VIRL_DIR_LOC="/tmp/"
56 VPP_PKGS=(*vpp*.$PACKAGE)
57 VPP_PKGS_FULL=("${VPP_PKGS[@]/#/${VIRL_DIR_LOC}}")
58 echo ${VPP_PKGS[@]}
59
60 VIRL_TOPOLOGY=$(cat ${SCRIPT_DIR}/VIRL_TOPOLOGY_${DISTRO})
61 VIRL_RELEASE=$(cat ${SCRIPT_DIR}/VIRL_RELEASE_${DISTRO})
62 VIRL_SERVERS=("10.30.51.28" "10.30.51.29" "10.30.51.30")
63 IPS_PER_VIRL=( "10.30.51.28:252"
64                "10.30.51.29:252"
65                "10.30.51.30:252" )
66 SIMS_PER_VIRL=( "10.30.51.28:13"
67                "10.30.51.29:13"
68                "10.30.51.30:13" )
69 IPS_PER_SIMULATION=5
70
71 function get_max_ip_nr() {
72     virl_server=$1
73     IP_VALUE="0"
74     for item in "${IPS_PER_VIRL[@]}" ; do
75         if [ "${item%%:*}" == "${virl_server}" ]
76         then
77             IP_VALUE=${item#*:}
78             break
79         fi
80     done
81     echo "$IP_VALUE"
82 }
83
84 function get_max_sim_nr() {
85     virl_server=$1
86     SIM_VALUE="0"
87     for item in "${SIMS_PER_VIRL[@]}" ; do
88         if [ "${item%%:*}" == "${virl_server}" ]
89         then
90             SIM_VALUE=${item#*:}
91             break
92         fi
93     done
94     echo "$SIM_VALUE"
95 }
96
97 VIRL_USERNAME=jenkins-in
98 VIRL_PKEY=priv_key
99 VIRL_SERVER_STATUS_FILE="status"
100 VIRL_SERVER_EXPECTED_STATUS="PRODUCTION"
101
102 SSH_OPTIONS="-i ${VIRL_PKEY} -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o BatchMode=yes -o LogLevel=error"
103
104 TEST_GROUPS=("ip6,ip4_tunnels,l2bd" "ip4,ip6_tunnels,l2xc")
105 SUITE_PATH="tests.vpp.func"
106 SKIP_PATCH="SKIP_PATCH"
107
108 # Create tmp dir
109 mkdir ${SCRIPT_DIR}/tmp
110
111 # Use tmp dir to store log files
112 LOG_PATH="${SCRIPT_DIR}/tmp"
113
114 # Use tmp dir for tarballs
115 export TMPDIR="${SCRIPT_DIR}/tmp"
116
117 function ssh_do() {
118     echo
119     echo "### "  ssh $@
120     ssh ${SSH_OPTIONS} $@
121 }
122
123 rm -f ${VIRL_PKEY}
124 cat > ${VIRL_PKEY} <<EOF
125 -----BEGIN RSA PRIVATE KEY-----
126 MIIEpQIBAAKCAQEA+IHXq87GcqMR1C47rzx6Cbip5Ghq8pKrbqKrP5Nf41HcYrT6
127 GOXl9nFWKsMOzIlIn+8y7Il27eZh7csQGApbg8QLiHMtcYEmWNzKZpkqg4nuAPxX
128 VXwlKgnKX902SrET9Gp9TDayiHtCRWVfrlPPPSA0UEXW6BjLN/uHJ+W/Xzrrab+9
129 asBVa05vT2W6n0KJ66zfCaeDM912mQ6SttscAwFoWDmdHlegiVqrlIG2ABxOvxxz
130 L3dM3iSmlmQlzv9bThjo+nI4KFYh6m5wrZmAo5r/4q9CIJc21HVnTqkGOWJIZz6J
131 73lePJVSq5gYqaoGw3swFEA/MDkOx7baWKSoLQIDAQABAoIBAQCNBeolNp+JWJ76
132 gQ4fwLsknyXSV6sxYyhkDW4PEwwcTU06uqce0AAzXVffxne0fMe48x47+zqBgPbb
133 4huM+Pu8B9nfojUMr5TaYtl9Zbgpk3F8H7dT7LKOa6XrxvZTZrADSRc30+Z26zPN
134 e9zTaf42Gvt0/l0Zs1BHwbaOXqO+XuwJ3/F9Sf3PQYWXD3EOWjpHDP/X/1vAs6lV
135 SLkm6J/9KKE1m6I6LTYjIXuYt4SXybW6N2TSy54hhQtYcDUnIU2hR/PHVWKrGA0J
136 kELgrtTNTdbML27O5gFWU4PLUEYTZ9fN11D6qUZKxLcPOiPPHXkiILMRCCnG5DYI
137 ksBAU/YlAoGBAPxZO9VO18TYc8THV1nLKcvT2+1oSs1UcA2wNQMU55t910ZYinRa
138 MRwUhMOf8Mv5wOeiZaRICQB1PnVWtDVmGECgPpK6jUxqAwn8rgJcnoafLGL5YKMY
139 RVafTe6N5LXgCaOcJrk21wxs6v7ninEbUxxc575urOvZMBkymDw91dwbAoGBAPwa
140 YRhKhrzFKZzdK0RadVjnxKvolUllpoqqg3XuvmeAJHAOAnaOgVWq68NAcp5FZJv0
141 2D2Up7TX8pjf9MofP1SJbcraKBpK4NzfNkA0dSdEi+FhVofAJ9umB2o5LW1n7sab
142 UIrjsdzSJK/9Zb9yTTHPyibYzNEgaJV1HsbxfEFXAoGAYO2RmvRm0phll18OQVJV
143 IpKk9kLKAKZ/R/K32hAsikBC8SVPQTPniyaifFWx81diblalff2hX4ipTf7Yx24I
144 wMIMZuW7Im/R7QMef4+94G3Bad7p7JuE/qnAEHJ2OBnu+eYfxaK35XDsrq6XMazS
145 NqHE7hOq3giVfgg+C12hCKMCgYEAtu9dbYcG5owbehxzfRI2/OCRsjz/t1bv1seM
146 xVMND4XI6xb/apBWAZgZpIFrqrWoIBM3ptfsKipZe91ngBPUnL9s0Dolx452RVAj
147 yctHB8uRxWYgqDkjsxtzXf1HnZBBkBS8CUzYj+hdfuddoeKLaY3invXLCiV+PpXS
148 U4KAK9kCgYEAtSv0m5+Fg74BbAiFB6kCh11FYkW94YI6B/E2D/uVTD5dJhyEUFgZ
149 cWsudXjMki8734WSpMBqBp/J8wG3C9ZS6IpQD+U7UXA+roB7Qr+j4TqtWfM+87Rh
150 maOpG56uAyR0w5Z9BhwzA3VakibVk9KwDgZ29WtKFzuATLFnOtCS46E=
151 -----END RSA PRIVATE KEY-----
152 EOF
153 chmod 600 ${VIRL_PKEY}
154
155 #
156 # The server must be reachable and have a "status" file with
157 # the content "PRODUCTION" to be selected.
158 #
159 # If the server is not reachable or does not have the correct
160 # status remove it from the array and start again.
161 #
162 # Abort if there are no more servers left in the array.
163 #
164 VIRL_PROD_SERVERS=()
165 for index in "${!VIRL_SERVERS[@]}"; do
166     virl_server_status=$(ssh ${SSH_OPTIONS} ${VIRL_USERNAME}@${VIRL_SERVERS[$index]} cat $VIRL_SERVER_STATUS_FILE 2>&1)
167     echo VIRL HOST ${VIRL_SERVERS[$index]} status is \"$virl_server_status\"
168     if [ "$virl_server_status" == "$VIRL_SERVER_EXPECTED_STATUS" ]
169     then
170         # Candidate is in good status. Add to array.
171         VIRL_PROD_SERVERS+=(${VIRL_SERVERS[$index]})
172     fi
173 done
174
175 VIRL_SERVERS=("${VIRL_PROD_SERVERS[@]}")
176 echo "VIRL servers in production: ${VIRL_SERVERS[@]}"
177 num_hosts=${#VIRL_SERVERS[@]}
178 if [ $num_hosts == 0 ]
179 then
180     echo "No more VIRL candidate hosts available, failing."
181     exit 127
182 fi
183
184 # Get the LOAD of each server based on number of active simulations (testcases)
185 VIRL_SERVER_LOAD=()
186 for index in "${!VIRL_SERVERS[@]}"; do
187     VIRL_SERVER_LOAD[${index}]=$(ssh ${SSH_OPTIONS} ${VIRL_USERNAME}@${VIRL_SERVERS[$index]} "list-testcases | grep session | wc -l")
188 done
189
190 # Pick for each TEST_GROUP least loaded server
191 VIRL_SERVER=()
192 for index in "${!TEST_GROUPS[@]}"; do
193     least_load_server_idx=$(echo "${VIRL_SERVER_LOAD[*]}" | tr -s ' ' '\n' | awk '{print($0" "NR)}' | sort -g -k1,1 | head -1 | cut -f2 -d' ')
194     least_load_server=${VIRL_SERVERS[$least_load_server_idx-1]}
195     VIRL_SERVER+=($least_load_server)
196     # Adjusting load as we are not going run simulation immediately
197     VIRL_SERVER_LOAD[$least_load_server_idx-1]=$((VIRL_SERVER_LOAD[$least_load_server_idx-1]+1))
198 done
199
200 echo "Selected VIRL servers: ${VIRL_SERVER[@]}"
201
202 cat ${VIRL_PKEY}
203
204 # Copy the files to VIRL hosts
205 DONE=""
206 for index in "${!VIRL_SERVER[@]}"; do
207     # Do not copy files in case they have already been copied to the VIRL host
208     [[ "${DONE[@]}" =~ "${VIRL_SERVER[${index}]}" ]] && copy=0 || copy=1
209
210     if [ "${copy}" -eq "0" ]; then
211         echo "VPP packages have already been copied to the VIRL host ${VIRL_SERVER[${index}]}"
212     else
213         scp ${SSH_OPTIONS} ${VPP_PKGS[@]} \
214         ${VIRL_USERNAME}@${VIRL_SERVER[${index}]}:${VIRL_DIR_LOC}
215
216         result=$?
217         if [ "${result}" -ne "0" ]; then
218             echo "Failed to copy VPP packages to VIRL host ${VIRL_SERVER[${index}]}"
219             echo ${result}
220             exit ${result}
221         else
222             echo "VPP packages successfully copied to the VIRL host ${VIRL_SERVER[${index}]}"
223         fi
224         DONE+=(${VIRL_SERVER[${index}]})
225     fi
226 done
227
228 # Start a simulation on VIRL server
229
230 function stop_virl_simulation {
231     for index in "${!VIRL_SERVER[@]}"; do
232         ssh ${SSH_OPTIONS} ${VIRL_USERNAME}@${VIRL_SERVER[${index}]}\
233             "stop-testcase ${VIRL_SID[${index}]}"
234     done
235 }
236
237 # Upon script exit, cleanup the simulation execution
238 trap stop_virl_simulation EXIT
239
240 for index in "${!VIRL_SERVER[@]}"; do
241     echo "Starting simulation nr. ${index} on VIRL server ${VIRL_SERVER[${index}]}"
242     # Get given VIRL server limits for max. number of VMs and IPs
243     max_ips=$(get_max_ip_nr ${VIRL_SERVER[${index}]})
244     max_ips_from_sims=$(($(get_max_sim_nr ${VIRL_SERVER[${index}]})*IPS_PER_SIMULATION))
245     # Set quota to lower value
246     IP_QUOTA=$([ $max_ips -le $max_ips_from_sims ] && echo "$max_ips" || echo "$max_ips_from_sims")
247     # Start the simulation
248     VIRL_SID[${index}]=$(ssh ${SSH_OPTIONS} ${VIRL_USERNAME}@${VIRL_SERVER[${index}]} \
249         "start-testcase -vv \
250             --quota ${IP_QUOTA} \
251             --copy ${VIRL_TOPOLOGY} \
252             --expiry 180 \
253             --release ${VIRL_RELEASE} \
254             ${VPP_PKGS_FULL[@]}")
255         # TODO: remove param ${VPP_PKGS_FULL[@]} when start-testcase script is
256         # updated on all virl servers
257     retval=$?
258     if [ ${retval} -ne "0" ]; then
259         echo "VIRL simulation start failed on ${VIRL_SERVER[${index}]}"
260         exit ${retval}
261     fi
262     if [[ ! "${VIRL_SID[${index}]}" =~ session-[a-zA-Z0-9_]{6} ]]; then
263         echo "No VIRL session ID reported."
264         exit 127
265     fi
266     echo "VIRL simulation nr. ${index} started on ${VIRL_SERVER[${index}]}"
267
268     ssh_do ${VIRL_USERNAME}@${VIRL_SERVER[${index}]}\
269      cat /scratch/${VIRL_SID[${index}]}/topology.yaml
270
271     # Download the topology file from VIRL session and rename it
272     scp ${SSH_OPTIONS} \
273         ${VIRL_USERNAME}@${VIRL_SERVER[${index}]}:/scratch/${VIRL_SID[${index}]}/topology.yaml \
274         topologies/enabled/topology${index}.yaml
275
276     retval=$?
277     if [ ${retval} -ne "0" ]; then
278         echo "Failed to copy topology file from VIRL simulation nr. ${index} on VIRL server ${VIRL_SERVER[${index}]}"
279         exit ${retval}
280     fi
281 done
282
283 echo ${VIRL_SID[@]}
284
285 virtualenv --system-site-packages env
286 . env/bin/activate
287
288 echo pip install
289 pip install -r ${SCRIPT_DIR}/requirements.txt
290
291 for index in "${!VIRL_SERVER[@]}"; do
292     pykwalify -s ${SCRIPT_DIR}/resources/topology_schemas/3_node_topology.sch.yaml \
293               -s ${SCRIPT_DIR}/resources/topology_schemas/topology.sch.yaml \
294               -d ${SCRIPT_DIR}/topologies/enabled/topology${index}.yaml \
295               -vvv
296     if [ "$?" -ne "0" ]; then
297         echo "Topology${index} schema validation failed."
298         echo "However, the tests will start."
299     fi
300 done
301
302 function run_test_set() {
303     set +x
304     OLDIFS=$IFS
305     IFS=","
306     nr=$(echo $1)
307     rm -f ${LOG_PATH}/test_run${nr}.log
308     exec &> >(while read line; do echo "$(date +'%H:%M:%S') $line" \
309      >> ${LOG_PATH}/test_run${nr}.log; done;)
310     suite_str=""
311     for suite in ${TEST_GROUPS[${nr}]}; do
312         suite_str="${suite_str} --suite ${SUITE_PATH}.${suite}"
313     done
314     IFS=$OLDIFS
315
316     echo "PYTHONPATH=`pwd` pybot -L TRACE -W 136\
317         -v TOPOLOGY_PATH:${SCRIPT_DIR}/topologies/enabled/topology${nr}.yaml \
318         ${suite_str} \
319         --include vm_envAND3_node_single_link_topo \
320         --include vm_envAND3_node_double_link_topo \
321         --exclude PERFTEST \
322         --exclude SOFTWIRE \
323         --exclude ${SKIP_PATCH} \
324         --exclude SKIP_TEST \
325         --noncritical EXPECTED_FAILING \
326         --output ${LOG_PATH}/log_test_set_run${nr} \
327         tests/"
328
329     PYTHONPATH=`pwd` pybot -L TRACE -W 136\
330         -v TOPOLOGY_PATH:${SCRIPT_DIR}/topologies/enabled/topology${nr}.yaml \
331         ${suite_str} \
332         --include vm_envAND3_node_single_link_topo \
333         --include vm_envAND3_node_double_link_topo \
334         --exclude PERFTEST \
335         --exclude SOFTWIRE \
336         --exclude ${SKIP_PATCH} \
337         --exclude SKIP_TEST \
338         --noncritical EXPECTED_FAILING \
339         --output ${LOG_PATH}/log_test_set_run${nr} \
340         tests/
341
342     local local_run_rc=$?
343     set -x
344     echo ${local_run_rc} > ${LOG_PATH}/rc_test_run${nr}
345 }
346
347 set +x
348 # Send to background an instance of the run_test_set() function for each number,
349 # record the pid.
350 for index in "${!VIRL_SERVER[@]}"; do
351     run_test_set ${index} &
352     pid=$!
353     echo "Sent to background: Test_set${index} (pid=$pid)"
354     pids[$pid]=$index
355 done
356
357 echo
358 echo -n "Waiting..."
359
360 # Watch the stable of background processes.
361 # If a pid goes away, remove it from the array.
362 while [ -n "${pids[*]}" ]; do
363     for i in $(seq 0 9); do
364         sleep 1
365         echo -n "."
366     done
367     for pid in "${!pids[@]}"; do
368         if ! ps "$pid" >/dev/null; then
369             echo -e "\n"
370             echo "Test_set${pids[$pid]} with PID $pid finished."
371             unset pids[$pid]
372         fi
373     done
374     if [ -z "${!pids[*]}" ]; then
375         break
376     fi
377     echo -n -e "\nStill waiting for test set(s): ${pids[*]} ..."
378 done
379
380 echo
381 echo "All test set runs finished."
382 echo
383
384 set -x
385
386 RC=0
387 for index in "${!VIRL_SERVER[@]}"; do
388     echo "Test_set${index} log:"
389     cat ${LOG_PATH}/test_run${index}.log
390     RC_PARTIAL_RUN=$(cat ${LOG_PATH}/rc_test_run${index})
391     if [ -z "$RC_PARTIAL_RUN" ]; then
392         echo "Failed to retrieve return code from test run ${index}"
393         exit 1
394     fi
395     RC=$((RC+RC_PARTIAL_RUN))
396     rm -f ${LOG_PATH}/rc_test_run${index}
397     rm -f ${LOG_PATH}/test_run${index}.log
398     echo
399 done
400
401 # Log the final result
402 if [ "${RC}" -eq "0" ]; then
403     set +x
404     echo
405     echo "========================================================================================================================================"
406     echo "Final result of all test loops:                                                                                                 | PASS |"
407     echo "All critical tests have passed."
408     echo "========================================================================================================================================"
409     echo
410     set -x
411 else
412     if [ "${RC}" -eq "1" ]; then
413         HLP_STR="test has"
414     else
415         HLP_STR="tests have"
416     fi
417     set +x
418     echo
419     echo "========================================================================================================================================"
420     echo "Final result of all test loops:                                                                                                 | FAIL |"
421     echo "${RC} critical ${HLP_STR} failed."
422     echo "========================================================================================================================================"
423     echo
424     set -x
425 fi
426
427 echo Post-processing test data...
428
429 partial_logs=""
430 for index in "${!VIRL_SERVER[@]}"; do
431     partial_logs="${partial_logs} ${LOG_PATH}/log_test_set_run${index}.xml"
432 done
433
434 # Rebot output post-processing
435 rebot --noncritical EXPECTED_FAILING \
436       --output output.xml ${partial_logs}
437
438 # Remove unnecessary log files
439 rm -f ${partial_logs}
440
441 echo Post-processing finished.
442
443 if [ ${RC} -eq 0 ]; then
444     RETURN_STATUS=0
445 else
446     RETURN_STATUS=1
447 fi
448
449 exit ${RETURN_STATUS}