From: Vratko Polak Date: Fri, 10 Aug 2018 08:20:30 +0000 (+0200) Subject: CSIT-1135: Scripts for VPP per-patch testing X-Git-Url: https://gerrit.fd.io/r/gitweb?p=csit.git;a=commitdiff_plain;h=7db6faf25da39820d321222f7f8fcb191585add9 CSIT-1135: Scripts for VPP per-patch testing + Scripts do not rely on (other) bootstraps. + Perf verify bootstrap is also the new style sript now. + Scripts are divided to functions for better re-use. + Functions are sourced from small number of large "library" files. - Still using jumpavg from pipy. - Perpatch has specific simplified parsing (instead of the PAL one). - Bash style document is in a separate Change. Change-Id: If88fa528ce155ea86b614e3d77c0550b91bbdf11 Signed-off-by: Vratko Polak --- diff --git a/bootstrap-verify-perf.sh b/bootstrap-verify-perf.sh index c72d6ec7dd..afcfd1eca9 100755 --- a/bootstrap-verify-perf.sh +++ b/bootstrap-verify-perf.sh @@ -12,400 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. -set -xo pipefail +set -exuo pipefail -# FUNCTIONS -function warn () { - # Prints the message to standard error. - echo "$@" >&2 -} - -function die () { - # Prints the message to standard error end exit with error code specified - # by first argument. - status="$1" - shift - warn "$@" - exit "$status" -} - -function help () { - # Displays help message. - die 1 "Usage: `basename $0` csit-[dpdk|vpp|ligato]-[2n-skx|3n-skx|3n-hsw]" -} - -function cancel_all () { - # Trap function to get into consistent state. - python ${SCRIPT_DIR}/resources/tools/scripts/topo_cleanup.py -t $1 || { - die 1 "Failure during execution of topology cleanup script!" - } - python ${SCRIPT_DIR}/resources/tools/scripts/topo_reservation.py -c -t $1 || { - die 1 "Failure during execution of topology un-reservation script!" - } -} - -# VARIABLES -# Space separated list of available testbeds, described by topology files -TOPOLOGIES_3N_HSW=(topologies/available/lf_3n_hsw_testbed1.yaml - topologies/available/lf_3n_hsw_testbed2.yaml - topologies/available/lf_3n_hsw_testbed3.yaml) -TOPOLOGIES_2N_SKX=(topologies/available/lf_2n_skx_testbed21.yaml - topologies/available/lf_2n_skx_testbed24.yaml) -TOPOLOGIES_3N_SKX=(topologies/available/lf_3n_skx_testbed31.yaml - topologies/available/lf_3n_skx_testbed32.yaml) - -SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" -export PYTHONPATH=${SCRIPT_DIR} - -RESERVATION_DIR="/tmp/reservation_dir" -DOWNLOAD_DIR="${SCRIPT_DIR}/download_dir" -ARCHIVE_DIR="${SCRIPT_DIR}/archive" - -mkdir -p ${DOWNLOAD_DIR} || { - die 1 "Failed to create download dir!" -} -mkdir -p ${ARCHIVE_DIR} || { - die 1 "Failed to create archive dir!" -} - -# Get test code. -TEST_CODE=${JOB_NAME-} -if [[ -z ${TEST_CODE} ]]; then - TEST_CODE=${1} - shift -fi - -# TOPOLOGY SELECTION -case "$TEST_CODE" in - *2n-skx*) - TOPOLOGIES=${TOPOLOGIES_2N_SKX[@]} - TOPOLOGIES_TAGS="2_node_*_link_topo" - ;; - *3n-skx*) - TOPOLOGIES=${TOPOLOGIES_3N_SKX[@]} - TOPOLOGIES_TAGS="3_node_*_link_topo" - ;; - *) - # Fallback to 3-node Haswell by default (backward compatibility) - TOPOLOGIES=${TOPOLOGIES_3N_HSW[@]} - TOPOLOGIES_TAGS="3_node_*_link_topo" - ;; -esac - -if [[ -z "${TOPOLOGIES}" ]]; then - die 1 "No applicable topology found!" -fi - -cd ${DOWNLOAD_DIR} -case "$TEST_CODE" in - *hc2vpp*) - DUT="hc2vpp" - ;; - *vpp*) - DUT="vpp" - - case "$TEST_CODE" in - csit-vpp-*) - # Use downloaded packages with specific version - if [[ "$TEST_CODE" == *daily* ]] || \ - [[ "$TEST_CODE" == *weekly* ]] || \ - [[ "$TEST_CODE" == *timed* ]]; - then - echo Downloading latest VPP packages from NEXUS... - bash ${SCRIPT_DIR}/resources/tools/scripts/download_install_vpp_pkgs.sh \ - --skip-install || { - die 1 "Failed to get VPP packages!" - } - else - echo Downloading VPP packages of specific version from NEXUS... - DPDK_STABLE_VER=$(cat ${SCRIPT_DIR}/DPDK_STABLE_VER) - VPP_STABLE_VER=$(cat ${SCRIPT_DIR}/VPP_STABLE_VER_UBUNTU) - bash ${SCRIPT_DIR}/resources/tools/scripts/download_install_vpp_pkgs.sh \ - --skip-install --vpp ${VPP_STABLE_VER} --dkms ${DPDK_STABLE_VER} || { - die 1 "Failed to get VPP packages!" - } - fi - ;; - vpp-csit-*) - # Use local built packages. - mv ../${DUT}*.deb ${DOWNLOAD_DIR}/ - ;; - *) - die 1 "Unable to identify job type from: ${TEST_CODE}!" - ;; - esac - ;; - *ligato*) - DUT="kubernetes" - - case "$TEST_CODE" in - csit-*) - # Use downloaded packages with specific version - if [[ "$TEST_CODE" == *daily* ]] || \ - [[ "$TEST_CODE" == *weekly* ]] || \ - [[ "$TEST_CODE" == *timed* ]]; - then - echo Downloading latest VPP packages from NEXUS... - bash ${SCRIPT_DIR}/resources/tools/scripts/download_install_vpp_pkgs.sh \ - --skip-install || { - die 1 "Failed to get VPP packages!" - } - else - echo Downloading VPP packages of specific version from NEXUS... - DPDK_STABLE_VER=$(cat ${SCRIPT_DIR}/DPDK_STABLE_VER) - VPP_STABLE_VER=$(cat ${SCRIPT_DIR}/VPP_STABLE_VER_UBUNTU) - bash ${SCRIPT_DIR}/resources/tools/scripts/download_install_vpp_pkgs.sh \ - --skip-install --vpp ${VPP_STABLE_VER} --dkms ${DPDK_STABLE_VER} || { - die 1 "Failed to get VPP packages!" - } - fi - ;; - vpp-csit-*) - # Use local builded packages. - mv ../${DUT}*.deb ${DOWNLOAD_DIR}/ - ;; - *) - die 1 "Unable to identify job type from: ${TEST_CODE}!" - ;; - esac - # Extract VPP API to specific folder - dpkg -x ${DOWNLOAD_DIR}/vpp_*.deb /tmp/vpp || { - die 1 "Failed to extract ${DUT} package!" - } - - LIGATO_REPO_URL="https://github.com/ligato/" - VPP_AGENT_STABLE_VER=$(cat ${SCRIPT_DIR}/VPP_AGENT_STABLE_VER) - DOCKER_DEB="docker-ce_18.03.0~ce-0~ubuntu_amd64.deb" - - # Clone & checkout stable vnf-agent - cd ../.. - git clone -b ${VPP_AGENT_STABLE_VER} --single-branch \ - ${LIGATO_REPO_URL}/vpp-agent vpp-agent || { - die 1 "Failed to run: git clone ${LIGATO_REPO_URL}/vpp-agent!" - } - cd vpp-agent - - # Install Docker - wget -q https://download.docker.com/linux/ubuntu/dists/xenial/pool/stable/amd64/${DOCKER_DEB} || { - die 1 "Failed to download Docker package!" - } - - sudo dpkg -i ${DOCKER_DEB} || { - die 1 "Failed to install Docker!" - } - - # Pull ligato/dev_vpp_agent docker image and re-tag as local - sudo docker pull ligato/dev-vpp-agent:${VPP_AGENT_STABLE_VER} || { - die 1 "Failed to pull Docker image!" - } - - sudo docker tag ligato/dev-vpp-agent:${VPP_AGENT_STABLE_VER}\ - dev_vpp_agent:latest || { - die 1 "Failed to tag Docker image!" - } - - # Start dev_vpp_agent container as daemon - sudo docker run --rm -itd --name agentcnt dev_vpp_agent bash || { - die 1 "Failed to run Docker image!" - } - - # Copy latest vpp api into running container - sudo docker cp /tmp/vpp/usr/share/vpp/api agentcnt:/usr/share/vpp || { - die 1 "Failed to copy files Docker image!" - } - - for f in ${DOWNLOAD_DIR}/*; do - sudo docker cp $f agentcnt:/opt/vpp-agent/dev/vpp/build-root/ || { - die 1 "Failed to copy files Docker image!" - } - done - - # Recompile vpp-agent - sudo docker exec -i agentcnt \ - script -qec '. ~/.bashrc; cd /go/src/github.com/ligato/vpp-agent && make generate && make install' || { - die 1 "Failed to build vpp-agent in Docker image!" - } - # Save container state - sudo docker commit `sudo docker ps -q` dev_vpp_agent:latest || { - die 1 "Failed to commit state of Docker image!" - } - - # Build prod_vpp_agent docker image - cd docker/prod/ &&\ - sudo docker build --tag prod_vpp_agent --no-cache . || { - die 1 "Failed to build Docker image!" - } - # Export Docker image - sudo docker save prod_vpp_agent | gzip > prod_vpp_agent.tar.gz || { - die 1 "Failed to save Docker image!" - } - DOCKER_IMAGE="$( readlink -f prod_vpp_agent.tar.gz | tr '\n' ' ' )" - rm -r ${DOWNLOAD_DIR}/vpp* - mv ${DOCKER_IMAGE} ${DOWNLOAD_DIR}/ - ;; - *dpdk*) - DUT="dpdk" - - DPDK_REPO='https://fast.dpdk.org/rel/' - # Use downloaded packages with specific version - if [[ "$TEST_CODE" == *daily* ]] || \ - [[ "$TEST_CODE" == *weekly* ]] || \ - [[ "$TEST_CODE" == *timed* ]]; - then - echo "Downloading latest DPDK packages from repo..." - DPDK_STABLE_VER=$(wget --no-check-certificate --quiet -O - ${DPDK_REPO} | \ - grep -v '2015' | grep -Eo 'dpdk-[^\"]+xz' | tail -1) - else - echo "Downloading DPDK packages of specific version from repo..." - DPDK_STABLE_VER='dpdk-18.05.tar.xz' - fi - if [[ ! -f ${DPDK_STABLE_VER} ]]; then - wget --no-check-certificate ${DPDK_REPO}${DPDK_STABLE_VER} || { - die 1 "Failed to get DPDK package from ${DPDK_REPO}!" - } - fi - ;; - *) - die 1 "Unable to identify DUT type from: ${TEST_CODE}!" - ;; -esac -cd ${SCRIPT_DIR} - -if [[ ! "$(ls -A ${DOWNLOAD_DIR})" ]]; then - die 1 "No artifacts downloaded!" -fi - -# ENVIRONMENT PREPARATION -rm -rf env - -pip install virtualenv || { - die 1 "Failed to install virtual env!" -} -virtualenv --system-site-packages env || { - die 1 "Failed to create virtual env!" -} -source env/bin/activate || { - die 1 "Failed to activate virtual env!" -} -pip install -r requirements.txt || { - die 1 "Failed to install requirements to virtual env!" -} - -# We iterate over available topologies and wait until we reserve topology. -while :; do - for TOPOLOGY in ${TOPOLOGIES}; - do - python ${SCRIPT_DIR}/resources/tools/scripts/topo_reservation.py -t ${TOPOLOGY} - if [ $? -eq 0 ]; then - WORKING_TOPOLOGY=${TOPOLOGY} - echo "Reserved: ${WORKING_TOPOLOGY}" - # On script exit we clean testbed. - trap "cancel_all ${WORKING_TOPOLOGY}" EXIT - break - fi - done - - if [ -n "${WORKING_TOPOLOGY}" ]; then - # Exit the infinite while loop if we made a reservation. - break - fi - - # Wait ~3minutes before next try. - SLEEP_TIME=$[ ( $RANDOM % 20 ) + 180 ]s - echo "Sleeping ${SLEEP_TIME}" - sleep ${SLEEP_TIME} -done - -# Clean testbed before execution. -python ${SCRIPT_DIR}/resources/tools/scripts/topo_cleanup.py -t ${WORKING_TOPOLOGY} || { - die 1 "Failed to cleanup topologies!" -} - -# CSIT EXECUTION -PYBOT_ARGS="--outputdir ${ARCHIVE_DIR} --loglevel TRACE --variable TOPOLOGY_PATH:${WORKING_TOPOLOGY} --suite tests.${DUT}.perf" - -# NIC SELECTION -# All topologies NICs -TOPOLOGIES_NICS=($(grep -hoPR "model: \K.*" topologies/available/* | sort -u)) -# Selected topology NICs -TOPOLOGY_NICS=($(grep -hoPR "model: \K.*" ${WORKING_TOPOLOGY} | sort -u)) -# All topologies NICs - Selected topology NICs -EXCLUDE_NICS=($(comm -13 <(printf '%s\n' "${TOPOLOGY_NICS[@]}") <(printf '%s\n' "${TOPOLOGIES_NICS[@]}"))) - -case "$TEST_CODE" in - # Select specific performance tests based on jenkins job type variable. - *ndrpdr-weekly* ) - TEST_TAG_ARRAY=(ndrpdrAND64bAND1c - ndrpdrAND78bAND1c) - ;; - *mrr-daily* | *mrr-weekly* ) - TEST_TAG_ARRAY=(mrrAND64bAND1c - mrrAND64bAND2c - mrrAND64bAND4c - mrrAND78bAND1c - mrrAND78bAND2c - mrrAND78bAND4c - mrrANDimixAND1cANDvhost - mrrANDimixAND2cANDvhost - mrrANDimixAND4cANDvhost - mrrANDimixAND1cANDmemif - mrrANDimixAND2cANDmemif - mrrANDimixAND4cANDmemif) - ;; - * ) - if [[ -z "$TEST_TAG_STRING" ]]; then - # If nothing is specified, we will run pre-selected tests by - # following tags. Items of array will be concatenated by OR in Robot - # Framework. - TEST_TAG_ARRAY=(mrrANDnic_intel-x710AND1cAND64bANDip4base - mrrANDnic_intel-x710AND1cAND78bANDip6base - mrrANDnic_intel-x710AND1cAND64bANDl2bdbase) - else - # If trigger contains tags, split them into array. - TEST_TAG_ARRAY=(${TEST_TAG_STRING//:/ }) - fi - ;; -esac - -# We will add excluded NICs. -TEST_TAG_ARRAY+=("${EXCLUDE_NICS[@]/#/!NIC_}") - -TAGS=() - -# We will prefix with perftest to prevent running other tests (e.g. Functional). -prefix="perftestAND" -if [[ ${TEST_CODE} == vpp-* ]]; then - # Automatic prefixing for VPP jobs to limit the NIC used and - # traffic evaluation to MRR. - prefix="${prefix}mrrANDnic_intel-x710AND" -fi -for TAG in "${TEST_TAG_ARRAY[@]}"; do - if [[ ${TAG} == "!"* ]]; then - # Exclude tags are not prefixed. - TAGS+=("${TAG}") - else - TAGS+=("$prefix${TAG}") - fi -done - -# Catenate TAG selections -EXPANDED_TAGS=() -for TAG in "${TAGS[@]}"; do - if [[ ${TAG} == "!"* ]]; then - EXPANDED_TAGS+=(" --exclude ${TAG#$"!"} ") - else - EXPANDED_TAGS+=(" --include ${TOPOLOGIES_TAGS}AND${TAG} ") - fi -done - -# Execute the test -pybot ${PYBOT_ARGS}${EXPANDED_TAGS[@]} tests/ -RETURN_STATUS=$(echo $?) - -# We will create additional archive if workspace variable is set. This way if -# script is running in jenkins all will be automatically archived to logs.fd.io. -if [[ -n ${WORKSPACE-} ]]; then - cp -r ${ARCHIVE_DIR}/ $WORKSPACE/archives/ -fi - -exit ${RETURN_STATUS} +# TODO: Delete this file, perhaps replacing it with a symlink. +here=$(dirname $(readlink -e "${BASH_SOURCE[0]}")) +source "${here}/resources/libraries/bash/entry/bootstrap_verify_perf.sh" diff --git a/resources/libraries/bash/entry/README.txt b/resources/libraries/bash/entry/README.txt new file mode 100644 index 0000000000..1f6cf522bb --- /dev/null +++ b/resources/libraries/bash/entry/README.txt @@ -0,0 +1,3 @@ +Scripts in this directory are to be executed (or sourced) +directly from Jenkins executor (or manually by user when testing locally), +as opposed to indirectly from other CSIT scripts (or from Robot). diff --git a/resources/libraries/bash/entry/bootstrap_verify_perf.sh b/resources/libraries/bash/entry/bootstrap_verify_perf.sh new file mode 100644 index 0000000000..74edc4cea8 --- /dev/null +++ b/resources/libraries/bash/entry/bootstrap_verify_perf.sh @@ -0,0 +1,49 @@ +#!/usr/bin/env bash + +# Copyright (c) 2018 Cisco and/or its affiliates. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -exuo pipefail + +# Assumptions: +# + There is a directory holding CSIT code to use (this script is there). +# + At least one of the following is true: +# ++ JOB_NAME environment variable is set, +# ++ or this entry script has access to arguments. +# Consequences (and specific assumptions) are multiple, +# examine tree of functions for current description. + +# FIXME: Define API contract (as opposed to just help) for bootstrap. + +# "set -eu" handles failures from the following two lines. +BASH_ENTRY_DIR="$(dirname $(readlink -e "${BASH_SOURCE[0]}"))" +BASH_FUNCTION_DIR="$(readlink -e "${BASH_ENTRY_DIR}/../function")" +source "${BASH_FUNCTION_DIR}/common.sh" || { + echo "Source failed." >&2 + exit 1 +} +source "${BASH_FUNCTION_DIR}/gather.sh" || die "Source failed." +common_dirs || die +get_test_tag_string || die +get_test_code "${1-}" || die +select_topology || die +gather_build || die +check_download_dir || die +activate_virtualenv "${CSIT_DIR}" || die +reserve_testbed || die +select_tags || die +compose_pybot_arguments || die +run_pybot || die +untrap_and_unreserve_testbed || die +copy_archives || die +die_on_pybot_error || die diff --git a/resources/libraries/bash/entry/per_patch_perf.sh b/resources/libraries/bash/entry/per_patch_perf.sh new file mode 100644 index 0000000000..5f438a9abc --- /dev/null +++ b/resources/libraries/bash/entry/per_patch_perf.sh @@ -0,0 +1,67 @@ +#!/usr/bin/env bash + +# Copyright (c) 2018 Cisco and/or its affiliates. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -exuo pipefail + +# Assumptions: +# + There is a directory holding VPP repo with patch under test checked out. +# + It contains csit subdirectory with CSIT code to use (this script is there). +# + Everything needed to build VPP is already installed locally. +# Consequences: +# + At the end, VPP repo has parent commit checked out and built. +# + Directories build_root, dpdk and csit are reset during the run. +# + The following directories (relative to VPP repo) are (re)created: +# ++ csit_new, csit_parent, build_new, build_parent, +# ++ archive, csit/archive, csit_download_dir. +# This entry script currently does not need any environment variable set. + +# TODO: Implement some kind of VPP build caching. + +# "set -eu" handles failures from the following two lines. +BASH_ENTRY_DIR="$(dirname $(readlink -e "${BASH_SOURCE[0]}"))" +BASH_FUNCTION_DIR="$(readlink -e "${BASH_ENTRY_DIR}/../function")" +source "${BASH_FUNCTION_DIR}/common.sh" || { + echo "Source failed." >&2 + exit 1 +} +source "${BASH_FUNCTION_DIR}/per_patch.sh" || die "Source failed." +common_dirs || die +set_perpatch_vpp_dir || die +build_vpp_ubuntu_amd64 "NEW" || die +prepare_build_parent || die +build_vpp_ubuntu_amd64 "PARENT" || die +prepare_test_new || die +## Replace previous 4 lines with this to speed up testing. +#download_builds "REPLACE_WITH_URL" || die +get_test_tag_string || die +get_test_code "${1-}" || die +set_perpatch_dut || die +select_topology || die +activate_virtualenv "${VPP_DIR}" || die +reserve_testbed || die +select_tags || die +compose_pybot_arguments || die +check_download_dir || die +run_pybot "10" || die +copy_archives || die +die_on_pybot_error || die +prepare_test_parent || die +check_download_dir || die +run_pybot "10" || die +untrap_and_unreserve_testbed || die +copy_archives || die +die_on_pybot_error || die +compare_test_results # The error code becomes this script's error code. +# TODO: After merging, make sure archiving works as expected. diff --git a/resources/libraries/bash/function/README.txt b/resources/libraries/bash/function/README.txt new file mode 100644 index 0000000000..055ebb4cdc --- /dev/null +++ b/resources/libraries/bash/function/README.txt @@ -0,0 +1,7 @@ +Files in this directory system are to be executed indirectly, +sourced from other scripts. + +In fact, the files should only define functions, +except perhaps some minimal logic needed to import dependencies. +The originating function calls should be executed from elsewhere, +typically from entry scripts. diff --git a/resources/libraries/bash/function/common.sh b/resources/libraries/bash/function/common.sh new file mode 100644 index 0000000000..b3a06d497b --- /dev/null +++ b/resources/libraries/bash/function/common.sh @@ -0,0 +1,562 @@ +# Copyright (c) 2018 Cisco and/or its affiliates. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -exuo pipefail + +# This library defines functions used by multiple entry scripts. +# Keep functions ordered alphabetically, please. + +# TODO: Add a link to bash style guide. +# TODO: Consider putting every die into a {} block, +# the code might become more readable (but longer). + + +function activate_virtualenv () { + + set -exuo pipefail + + # Arguments: + # - ${1} - Non-empty path to existing directory for creating virtualenv in. + # Variables read: + # - CSIT_DIR - Path to existing root of local CSIT git repository. + # Variables set: + # - ENV_DIR - Path to the created virtualenv subdirectory. + # Variables exported: + # - PYTHONPATH - CSIT_DIR, as CSIT Python scripts usually need this. + # Functions called: + # - die - Print to stderr and exit. + + # TODO: Do we really need to have ENV_DIR available as a global variable? + + if [[ "${1-}" == "" ]]; then + die "Root location of virtualenv to create is not specified." + fi + ENV_DIR="${1}/env" + rm -rf "${ENV_DIR}" || die "Failed to clean previous virtualenv." + + pip install --upgrade virtualenv || { + die "Virtualenv package install failed." + } + virtualenv --system-site-packages "${ENV_DIR}" || { + die "Virtualenv creation failed." + } + set +u + source "${ENV_DIR}/bin/activate" || die "Virtualenv activation failed." + set -u + pip install -r "${CSIT_DIR}/requirements.txt" || { + die "CSIT requirements installation failed." + } + + # Most CSIT Python scripts assume PYTHONPATH is set and exported. + export PYTHONPATH="${CSIT_DIR}" || die "Export failed." +} + + +function check_download_dir () { + + set -exuo pipefail + + # Fail if there are no files visible in ${DOWNLOAD_DIR}. + # TODO: Do we need this as a function, if it is (almost) a one-liner? + # + # Variables read: + # - DOWNLOAD_DIR - Path to directory pybot takes the build to test from. + # Directories read: + # - ${DOWNLOAD_DIR} - Has to be non-empty to proceed. + # Functions called: + # - die - Print to stderr and exit. + + if [[ ! "$(ls -A "${DOWNLOAD_DIR}")" ]]; then + die "No artifacts downloaded!" + fi +} + + +function common_dirs () { + + set -exuo pipefail + + # Variables set: + # - BASH_FUNCTION_DIR - Path to existing directory this file is located in. + # - CSIT_DIR - Path to existing root of local CSIT git repository. + # - TOPOLOGIES_DIR - Path to existing directory with available tpologies. + # - RESOURCES_DIR - Path to existing CSIT subdirectory "resources". + # - TOOLS_DIR - Path to existing resources subdirectory "tools". + # - PYTHON_SCRIPTS_DIR - Path to existing tools subdirectory "scripts". + # - ARCHIVE_DIR - Path to created CSIT subdirectory "archive". + # - DOWNLOAD_DIR - Path to created CSIT subdirectory "download_dir". + # Functions called: + # - die - Print to stderr and exit. + + BASH_FUNCTION_DIR="$(dirname "$(readlink -e "${BASH_SOURCE[0]}")")" || { + die "Some error during localizing this source directory." + } + # Current working directory could be in a different repo, e.g. VPP. + pushd "${BASH_FUNCTION_DIR}" || die "Pushd failed" + CSIT_DIR="$(readlink -e "$(git rev-parse --show-toplevel)")" || { + die "Readlink or git rev-parse failed." + } + popd || die "Popd failed." + TOPOLOGIES_DIR="$(readlink -e "${CSIT_DIR}/topologies/available")" || { + die "Readlink failed." + } + RESOURCES_DIR="$(readlink -e "${CSIT_DIR}/resources")" || { + die "Readlink failed." + } + TOOLS_DIR="$(readlink -e "${RESOURCES_DIR}/tools")" || { + die "Readlink failed." + } + PYTHON_SCRIPTS_DIR="$(readlink -e "${TOOLS_DIR}/scripts")" || { + die "Readlink failed." + } + + ARCHIVE_DIR="$(readlink -f "${CSIT_DIR}/archive")" || { + die "Readlink failed." + } + mkdir -p "${ARCHIVE_DIR}" || die "Mkdir failed." + DOWNLOAD_DIR="$(readlink -f "${CSIT_DIR}/download_dir")" || { + die "Readlink failed." + } + mkdir -p "${DOWNLOAD_DIR}" || die "Mkdir failed." +} + + +function compose_pybot_arguments () { + + set -exuo pipefail + + # Variables read: + # - WORKING_TOPOLOGY - Path to topology yaml file of the reserved testbed. + # - DUT - CSIT test/ subdirectory, set while processing tags. + # - TAGS - Array variable holding selected tag boolean expressions. + # - TOPOLOGIES_TAGS - Tag boolean expression filtering tests for topology. + # Variables set: + # - PYBOT_ARGS - String holding part of all arguments for pybot. + # - EXPANDED_TAGS - Array of strings pybot arguments compiled from tags. + + # No explicit check needed with "set -u". + PYBOT_ARGS=("--loglevel" "TRACE" "--variable" "TOPOLOGY_PATH:${WORKING_TOPOLOGY}") + PYBOT_ARGS+=("--suite" "tests.${DUT}.perf") + + EXPANDED_TAGS=() + for tag in "${TAGS[@]}"; do + if [[ ${tag} == "!"* ]]; then + EXPANDED_TAGS+=("--exclude" "${tag#$"!"}") + else + EXPANDED_TAGS+=("--include" "${TOPOLOGIES_TAGS}AND${tag}") + fi + done +} + + +function copy_archives () { + + set -exuo pipefail + + # Variables read: + # - WORKSPACE - Jenkins workspace, copy only if the value is not empty. + # Can be unset, then it speeds up manual testing. + # - ARCHIVE_DIR - Path to directory with content to be copied. + # Directories updated: + # - ${WORKSPACE}/archives/ - Created if does not exist. + # Content of ${ARCHIVE_DIR}/ is copied here. + # Functions called: + # - die - Print to stderr and exit. + + # We will create additional archive if workspace variable is set. + # This way if script is running in jenkins all will be + # automatically archived to logs.fd.io. + if [[ -n "${WORKSPACE-}" ]]; then + mkdir -p "${WORKSPACE}/archives/" || die "Archives dir create failed." + cp -r "${ARCHIVE_DIR}"/* "${WORKSPACE}/archives" || die "Copy failed." + fi +} + + +function die () { + # Print the message to standard error end exit with error code specified + # by the second argument. + # + # Hardcoded values: + # - The default error message. + # Arguments: + # - ${1} - The whole error message, be sure to quote. Optional + # - ${2} - the code to exit with, default: 1. + + set -x + set +eu + warn "${1:-Unspecified run-time error occurred!}" + exit "${2:-1}" +} + + +function die_on_pybot_error () { + + set -exuo pipefail + + # Source this fragment if you want to abort on any failed test case. + # + # Variables read: + # - PYBOT_EXIT_STATUS - Set by a pybot running fragment. + # Functions called: + # - die - Print to stderr and exit. + + if [[ "${PYBOT_EXIT_STATUS}" != "0" ]]; then + die "${PYBOT_EXIT_STATUS}" "Test failures are present!" + fi +} + + +function get_test_code () { + + set -exuo pipefail + + # Arguments: + # - ${1} - Optional, argument of entry script (or empty as unset). + # Test code value to override job name from environment. + # Variables read: + # - JOB_NAME - String affecting test selection, default if not argument. + # Variables set: + # - TEST_CODE - The test selection string from environment or argument. + # - NODENESS - Node multiplicity of desired testbed. + # - FLAVOR - Node flavor string, usually describing the processor. + + TEST_CODE="${1-}" || die "Reading optional argument failed, somehow." + if [[ -z "${TEST_CODE}" ]]; then + TEST_CODE="${JOB_NAME-}" || die "Reading job name failed, somehow." + fi + + case "${TEST_CODE}" in + *"2n-skx"*) + NODENESS="2n" + FLAVOR="skx" + ;; + *"3n-skx"*) + NODENESS="3n" + FLAVOR="skx" + ;; + *) + # Fallback to 3-node Haswell by default (backward compatibility) + NODENESS="3n" + FLAVOR="hsw" + ;; + esac +} + + +function get_test_tag_string () { + + set -exuo pipefail + + # Variables read: + # - GERRIT_EVENT_TYPE - Event type set by gerrit, can be unset. + # - GERRIT_EVENT_COMMENT_TEXT - Comment text, read for "comment-added" type. + # Variables set: + # - TEST_TAG_STRING - The string following "perftest" in gerrit comment, + # or empty. + + # TODO: ci-management scripts no longer need to perform this. + + trigger="" + if [[ "${GERRIT_EVENT_TYPE-}" == "comment-added" ]]; then + # On parsing error, ${trigger} stays empty. + trigger="$(echo "${GERRIT_EVENT_COMMENT_TEXT}" \ + | grep -oE '(perftest$|perftest[[:space:]].+$)')" || true + fi + # Set test tags as string. + TEST_TAG_STRING="${trigger#$"perftest"}" +} + + +function reserve_testbed () { + + set -exuo pipefail + + # Reserve physical testbed, perform cleanup, register trap to unreserve. + # + # Variables read: + # - TOPOLOGIES - Array of paths to topology yaml to attempt reservation on. + # - PYTHON_SCRIPTS_DIR - Path to directory holding the reservation script. + # Variables set: + # - WORKING_TOPOLOGY - Path to topology yaml file of the reserved testbed. + # Functions called: + # - die - Print to stderr and exit. + # Traps registered: + # - EXIT - Calls cancel_all for ${WORKING_TOPOLOGY}. + + while true; do + for topo in "${TOPOLOGIES[@]}"; do + set +e + python "${PYTHON_SCRIPTS_DIR}/topo_reservation.py" -t "${topo}" + result="$?" + set -e + if [[ "${result}" == "0" ]]; then + WORKING_TOPOLOGY="${topo}" + echo "Reserved: ${WORKING_TOPOLOGY}" + python "${PYTHON_SCRIPTS_DIR}/topo_cleanup.py" -t "${topo}" || { + die "Testbed cleanup failed." + } + trap "untrap_and_unreserve_testbed" EXIT || { + message="TRAP ATTEMPT AND UNRESERVE FAILED, FIX MANUALLY." + untrap_and_unreserve_testbed "${message}" || { + die "Teardown should have died, not failed." + } + die "Trap attempt failed, unreserve succeeded. Aborting." + } + break + fi + done + + if [[ -n "${WORKING_TOPOLOGY-}" ]]; then + # Exit the infinite while loop if we made a reservation. + break + fi + + # Wait ~3minutes before next try. + sleep_time="$[ ( $RANDOM % 20 ) + 180 ]s" || { + die "Sleep time calculation failed." + } + echo "Sleeping ${sleep_time}" + sleep "${sleep_time}" || die "Sleep failed." + done +} + + +function run_pybot () { + + set -exuo pipefail + + # Currently, VPP-1361 causes occasional test failures. + # If real result is more important than time, we can retry few times. + # TODO: We should be retrying on test case level instead. + + # Arguments: + # - ${1} - Optional number of pybot invocations to try to avoid failures. + # Default: 1. + # Variables read: + # - CSIT_DIR - Path to existing root of local CSIT git repository. + # - ARCHIVE_DIR - Path to store robot result files in. + # - PYBOT_ARGS, EXPANDED_TAGS - See compose_pybot_arguments.sh + # Variables set: + # - PYBOT_EXIT_STATUS - Exit status of most recent pybot invocation. + # Functions called: + # - die - Print to stderr and exit. + + # Set ${tries} as an integer variable, to fail on non-numeric input. + local -i "tries" || die "Setting type of variable failed." + tries="${1:-1}" || die "Argument evaluation failed." + all_options=("--outputdir" "${ARCHIVE_DIR}" "${PYBOT_ARGS[@]}") + all_options+=("${EXPANDED_TAGS[@]}") + + while true; do + if [[ "${tries}" -le 0 ]]; then + break + else + tries="$((${tries} - 1))" + fi + pushd "${CSIT_DIR}" || die "Change directory operation failed." + set +e + # TODO: Make robot tests not require "$(pwd)" == "${CSIT_DIR}". + pybot "${all_options[@]}" "${CSIT_DIR}/tests/" + PYBOT_EXIT_STATUS="$?" + set -e + popd || die "Change directory operation failed." + if [[ "${PYBOT_EXIT_STATUS}" == "0" ]]; then + break + fi + done +} + + +function select_tags () { + + set -exuo pipefail + + # Variables read: + # - WORKING_TOPOLOGY - Path to topology yaml file of the reserved testbed. + # - TEST_CODE - String affecting test selection, usually jenkins job name. + # - TEST_TAG_STRING - String selecting tags, from gerrit comment. + # Can be unset. + # - TOPOLOGIES_DIR - Path to existing directory with available tpologies. + # Variables set: + # - TAGS - Array of processed tag boolean expressions. + + # TODO: Empty exclude_nics (with failing grep) is expected, + # but others possible errors coule be checked explicitly. + # NIC SELECTION + # All topologies NICs + available=$(grep -hoPR "model: \K.*" "${TOPOLOGIES_DIR}"/* | sort -u) + # Selected topology NICs + reserved=$(grep -hoPR "model: \K.*" "${WORKING_TOPOLOGY}" | sort -u) + # All topologies NICs - Selected topology NICs + exclude_nics=($(comm -13 <(echo "${reserved}") <(echo "${available}"))) + + case "${TEST_CODE}" in + # Select specific performance tests based on jenkins job type variable. + *"ndrpdr-weekly"* ) + test_tag_array=("ndrpdrAND64bAND1c" + "ndrpdrAND78bAND1c") + ;; + *"mrr-daily"* | *"mrr-weekly"* ) + test_tag_array=("mrrAND64bAND1c" + "mrrAND64bAND2c" + "mrrAND64bAND4c" + "mrrAND78bAND1c" + "mrrAND78bAND2c" + "mrrAND78bAND4c" + "mrrANDimixAND1cANDvhost" + "mrrANDimixAND2cANDvhost" + "mrrANDimixAND4cANDvhost" + "mrrANDimixAND1cANDmemif" + "mrrANDimixAND2cANDmemif" + "mrrANDimixAND4cANDmemif") + ;; + * ) + if [[ -z "${TEST_TAG_STRING-}" ]]; then + # If nothing is specified, we will run pre-selected tests by + # following tags. Items of array will be concatenated by OR + # in Robot Framework. + test_tag_array=("mrrANDnic_intel-x710AND1cAND64bANDip4base" + "mrrANDnic_intel-x710AND1cAND78bANDip6base" + "mrrANDnic_intel-x710AND1cAND64bANDl2bdbase" + "mrrANDnic_intel-x710AND1cAND64bANDl2xcbase") + else + # If trigger contains tags, split them into array. + test_tag_array=(${TEST_TAG_STRING//:/ }) + fi + ;; + esac + + # We will add excluded NICs. + test_tag_array+=("${exclude_nics[@]/#/!NIC_}") + + TAGS=() + + # We will prefix with perftest to prevent running other tests + # (e.g. Functional). + prefix="perftestAND" + if [[ "${TEST_CODE}" == "vpp-"* ]]; then + # Automatic prefixing for VPP jobs to limit the NIC used and + # traffic evaluation to MRR. + prefix="${prefix}mrrANDnic_intel-x710AND" + fi + for tag in "${test_tag_array[@]}"; do + if [[ ${tag} == "!"* ]]; then + # Exclude tags are not prefixed. + TAGS+=("${tag}") + else + TAGS+=("${prefix}${tag}") + fi + done +} + + +function select_topology () { + + set -exuo pipefail + + # Variables read: + # - NODENESS - Node multiplicity of testbed, either "2n" or "3n". + # - FLAVOR - Node flavor string, currently either "hsw" or "skx". + # - CSIT_DIR - Path to existing root of local CSIT git repository. + # - TOPOLOGIES_DIR - Path to existing directory with available tpologies. + # Variables set: + # - TOPOLOGIES - Array of paths to suitable topology yaml files. + # - TOPOLOGIES_TAGS - Tag expression selecting tests for the topology. + # Functions called: + # - die - Print to stderr and exit. + + case_text="${NODENESS}_${FLAVOR}" + case "${case_text}" in + "3n_hsw") + TOPOLOGIES=( + "${TOPOLOGIES_DIR}/lf_3n_hsw_testbed1.yaml" + "${TOPOLOGIES_DIR}/lf_3n_hsw_testbed2.yaml" + "${TOPOLOGIES_DIR}/lf_3n_hsw_testbed3.yaml" + ) + TOPOLOGIES_TAGS="3_node_*_link_topo" + ;; + "2n_skx") + TOPOLOGIES=( + "${TOPOLOGIES_DIR}/lf_2n_skx_testbed21.yaml" + #"${TOPOLOGIES_DIR}/lf_2n_skx_testbed22.yaml" + #"${TOPOLOGIES_DIR}/lf_2n_skx_testbed23.yaml" + "${TOPOLOGIES_DIR}/lf_2n_skx_testbed24.yaml" + ) + TOPOLOGIES_TAGS="2_node_*_link_topo" + ;; + "3n_skx") + TOPOLOGIES=( + "${TOPOLOGIES_DIR}/lf_3n_skx_testbed31.yaml" + "${TOPOLOGIES_DIR}/lf_3n_skx_testbed32.yaml" + ) + TOPOLOGIES_TAGS="3_node_*_link_topo" + ;; + *) + # No falling back to 3n_hsw default, that should have been done + # by the function which has set NODENESS and FLAVOR. + die "Unknown specification: ${case_text}" + esac + + if [[ -z "${TOPOLOGIES-}" ]]; then + die "No applicable topology found!" + fi +} + + +function untrap_and_unreserve_testbed () { + # Use this as a trap function to ensure testbed does not remain reserved. + # Perhaps call directly before script exit, to free testbed for other jobs. + # This function is smart enough to avoid multiple unreservations (so safe). + # Topo cleanup is executed (call it best practice), ignoring failures. + # + # Hardcoded values: + # - default message to die with if testbed might remain reserved. + # Arguments: + # - ${1} - Message to die with if unreservation fails. Default hardcoded. + # Variables read (by inner function): + # - WORKING_TOPOLOGY - Path to topology yaml file of the reserved testbed. + # - PYTHON_SCRIPTS_DIR - Path to directory holding Python scripts. + # Variables written: + # - WORKING_TOPOLOGY - Set to empty string on successful unreservation. + # Trap unregistered: + # - EXIT - Failure to untrap is reported, but ignored otherwise. + # Functions called: + # - die - Print to stderr and exit. + + set -xo pipefail + set +eu # We do not want to exit early in a "teardown" function. + trap - EXIT || echo "Trap deactivation failed, continuing anyway." + wt="${WORKING_TOPOLOGY}" # Just to avoid too long lines. + if [[ -z "${wt-}" ]]; then + set -eu + echo "Testbed looks unreserved already. Trap removal failed before?" + else + python "${PYTHON_SCRIPTS_DIR}/topo_cleanup.py" -t "${wt}" || true + python "${PYTHON_SCRIPTS_DIR}/topo_reservation.py" -c -t "${wt}" || { + die "${1:-FAILED TO UNRESERVE, FIX MANUALLY.}" 2 + } + WORKING_TOPOLOGY="" + set -eu + fi +} + + +function warn () { + # Print the message to standard error. + # + # Arguments: + # - ${@} - The text of the message. + + echo "$@" >&2 +} diff --git a/resources/libraries/bash/function/gather.sh b/resources/libraries/bash/function/gather.sh new file mode 100644 index 0000000000..f490c80110 --- /dev/null +++ b/resources/libraries/bash/function/gather.sh @@ -0,0 +1,307 @@ +# Copyright (c) 2018 Cisco and/or its affiliates. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -exuo pipefail + +# This library defines functions used mainly by "bootstrap" entry scripts. +# Generally, the functions assume "common.sh" library has been sourced already. + +# Keep functions ordered alphabetically, please. + +# TODO: Add a link to bash style guide. + + +function gather_build () { + + set -exuo pipefail + + # Variables read: + # - TEST_CODE - String affecting test selection, usually jenkins job name. + # - DOWNLOAD_DIR - Path to directory pybot takes the build to test from. + # Variables set: + # - DUT - CSIT test/ subdirectory containing suites to execute. + # Directories updated: + # - ${DOWNLOAD_DIR} - Files needed by tests are gathered here. + # Functions called: + # - die - Print to stderr and exit, defined in common.sh + # - gather_dpdk, gather_vpp, gather_ligato - See their definitions. + # Multiple other side effects are possible, + # see functions called from here for their current description. + + # TODO: Separate DUT-from-TEST_CODE from gather-for-DUT, + # when the first one becomes relevant for per_patch. + + pushd "${DOWNLOAD_DIR}" || die "Pushd failed." + case "${TEST_CODE}" in + *"hc2vpp"*) + DUT="hc2vpp" + # FIXME: Avoid failing on empty ${DOWNLOAD_DIR}. + ;; + *"vpp"*) + DUT="vpp" + gather_vpp || die "The function should have died on error." + ;; + *"ligato"*) + DUT="kubernetes" + gather_ligato || die "The function should have died on error." + ;; + *"dpdk"*) + DUT="dpdk" + gather_dpdk || die "The function should have died on error." + ;; + *) + die "Unable to identify DUT type from: ${TEST_CODE}" + ;; + esac + popd || die "Popd failed." +} + + +function gather_dpdk () { + + set -exuo pipefail + + # Ensure latest DPDK archive is downloaded. + # + # Variables read: + # - TEST_CODE - The test selection string from environment or argument. + # Hardcoded: + # - dpdk archive name to download if TEST_CODE is not time based. + # Directories updated: + # - ./ - Assumed ${DOWNLOAD_DIR}, dpdk-*.tar.xz is downloaded if not there. + # Functions called: + # - die - Print to stderr and exit, defined in common.sh + + dpdk_repo="https://fast.dpdk.org/rel" + # Use downloaded packages with specific version + if [[ "${TEST_CODE}" == *"daily"* ]] || \ + [[ "${TEST_CODE}" == *"weekly"* ]] || \ + [[ "${TEST_CODE}" == *"timed"* ]]; + then + echo "Downloading latest DPDK packages from repo..." + # URL is not in quotes, calling command from variable keeps them. + wget_command=("wget" "--no-check-certificate" "-nv" "-O" "-") + wget_command+=("${dpdk_repo}") + dpdk_stable_ver="$("${wget_command[@]}" | grep -v "2015"\ + | grep -Eo 'dpdk-[^\"]+xz' | tail -1)" || { + die "Composite piped command failed." + } + else + echo "Downloading DPDK packages of specific version from repo..." + # TODO: Can we autodetect this based on what CSIT-stable VPP uses? + dpdk_stable_ver="dpdk-18.08.tar.xz" + fi + # TODO: Use "wget -N" instead checking for file presence? + if [[ ! -f "${dpdk_stable_ver}" ]]; then + wget -nv --no-check-certificate "${dpdk_repo}/${dpdk_stable_ver}" || { + die "Failed to get DPDK package from: ${dpdk_repo}" + } + fi +} + + +function gather_ligato () { + + set -exuo pipefail + + # Build docker image (with vpp, ligato and vpp-agent), + # and put it to ${DOWNLOAD_DIR}/. + # + # Access rights needed for: + # - "wget", "git clone", "dpdk -x", "cd" above ${CSIT_DIR}. + # - "sudo" without password. + # - With sudo: + # - "dpdk -i" is allowed. + # - "docker" commands have everything they needs. + # Variables read: + # - DOWNLOAD_DIR - Path to directory pybot takes the build to test from. + # - CSIT_DIR - Path to existing root of local CSIT git repository. + # Files read: + # - ${CSIT_DIR}/VPP_AGENT_STABLE_VER - Vpp agent version to use. + # Directories updated: + # - ${DOWNLOAD_DIR} - Docker image stored, VPP *.deb stored and deleted. + # - /tmp/vpp - VPP is unpacked there, not cleaned afterwards. + # - ${CSIT_DIR}/vpp-agent - Created, vpp-agent git repo si cloned there. + # - Also, various temporary files are stored there. + # System consequences: + # - Docker package is installed. + # - Presumably dockerd process is started. + # - The ligato/dev-vpp-agent docker image is downloaded. + # - Results of subsequent image manipulation are probably left lingering. + # Other hardcoded values: + # - Docker .deb file name to download and install. + # Functions called: + # - die - Print to stderr and exit, defined in common_functions.sh + # - gather_vpp - See eponymous fragment file assumend to be sourced already. + # TODO: What is the best order of description items? + + # TODO: Many of the following comments act as abstraction. + # But the abstracted blocks are mostly one-liners (plus "|| die"), + # so maybe it is not worth introducing fragments/functions for the blocks. + # TODO: This fragment is too long anyway, split it up. + + gather_vpp || die "The function should have died on error." + + # Extract VPP API to specific folder + # FIXME: Make sure /tmp/vpp/ exists. Should we clean it? + dpkg -x "${DOWNLOAD_DIR}/vpp_"*".deb" "/tmp/vpp" || { + die "Failed to extract VPP packages for kubernetes!" + } + + ligato_repo_url="https://github.com/ligato/" + vpp_agent_stable_ver="$(cat "${CSIT_DIR}/VPP_AGENT_STABLE_VER")" || { + die "Cat failed." + } + docker_deb="docker-ce_18.03.0~ce-0~ubuntu_amd64.deb" + + # Clone & checkout stable vpp-agent + cd "${CSIT_DIR}" || die "Change directory failed." + git clone -b "${vpp_agent_stable_ver}" --single-branch \ + "${ligato_repo_url}/vpp-agent" "vpp-agent" || { + die "Failed to run: git clone ${ligato_repo_url}/vpp-agent!" + } + cd "vpp-agent" || die "Change directory failed." + + # Install Docker + url_prefix="https://download.docker.com/linux/ubuntu/dists/xenial/pool" + # URL is not in quotes, calling command from variable keeps them. + wget_command=("wget" "-nv" "${url_prefix}/stable/amd64/${docker_deb}") + "${wget_command[@]}" || die "Failed to download Docker package!" + + sudo dpkg -i "${docker_deb}" || die "Failed to install Docker!" + + # Pull ligato/dev_vpp_agent docker image and re-tag as local + sudo docker pull "ligato/dev-vpp-agent:${vpp_agent_stable_ver}" || { + die "Failed to pull Docker image!" + } + + first_arg="ligato/dev-vpp-agent:${vpp_agent_stable_ver}" + sudo docker tag "${first_arg}" "dev_vpp_agent:latest" || { + die "Failed to tag Docker image!" + } + + # Start dev_vpp_agent container as daemon + sudo docker run --rm -itd --name "agentcnt" "dev_vpp_agent" bash || { + die "Failed to run Docker image!" + } + + # Copy latest vpp api into running container + sudo docker exec agentcnt rm -rf "agentcnt:/usr/share/vpp/api" || { + die "Failed to remove previous API!" + } + sudo docker cp "/tmp/vpp/usr/share/vpp/api" "agentcnt:/usr/share/vpp" || { + die "Failed to copy files Docker image!" + } + + # Recompile vpp-agent + script_arg=". ~/.bashrc; cd /go/src/github.com/ligato/vpp-agent" + script_arg+=" && make generate && make install" + sudo docker exec -i agentcnt script -qec "${script_arg}" || { + die "Failed to recompile vpp-agent in Docker image!" + } + # Make sure .deb files of other version are not present. + rm_cmd="rm -vf /opt/vpp-agent/dev/vpp/build-root/vpp*.deb /opt/vpp/*.deb" + sudo docker exec agentcnt bash -c "${rm_cmd}" || { + die "Failed to remove VPP debian packages!" + } + for f in "${DOWNLOAD_DIR}"/*; do + sudo docker cp "$f" "agentcnt:/opt/vpp-agent/dev/vpp/build-root"/ || { + die "Failed to copy files Docker image!" + } + done + # Save container state + sudo docker commit "$(sudo docker ps -q)" "dev_vpp_agent:latest" || { + die "Failed to commit state of Docker image!" + } + + # Build prod_vpp_agent docker image + cd "docker/prod" || die "Change directory failed." + sudo docker build --tag "prod_vpp_agent" --no-cache "." || { + die "Failed to build Docker image!" + } + # Export Docker image + sudo docker save "prod_vpp_agent" | gzip > "prod_vpp_agent.tar.gz" || { + die "Failed to save Docker image!" + } + docker_image="$(readlink -e "prod_vpp_agent.tar.gz")" || { + die "Readlink failed." + } + rm -r "${DOWNLOAD_DIR}/vpp"* || die "Rm failed." + mv "${docker_image}" "${DOWNLOAD_DIR}"/ || die "Mv failed." +} + + +function gather_vpp () { + + set -exuo pipefail + + # Variables read: + # - TEST_CODE - The test selection string from environment or argument. + # - DOWNLOAD_DIR - Path to directory pybot takes the build to test from. + # - CSIT_DIR - Path to existing root of local CSIT git repository. + # Files read: + # - ${CSIT_DIR}/DPDK_STABLE_VER - DPDK version to use + # by csit-vpp not-timed jobs. + # - ${CSIT_DIR}/VPP_STABLE_VER_UBUNTU - VPP version to use by those. + # - ../vpp*.deb - Relative to ${DOWNLOAD_DIR}, copied for vpp-csit jobs. + # Directories updated: + # - ${DOWNLOAD_DIR}, vpp-*.deb files are copied here for vpp-csit jobs. + # - ./ - Assumed ${DOWNLOAD_DIR}, vpp-*.deb files + # are downloaded here for csit-vpp. + # Functions called: + # - die - Print to stderr and exit, defined in common_functions.sh + # Bash scripts executed: + # - ${CSIT_DIR}/resources/tools/scripts/download_install_vpp_pkgs.sh + # - Should download and extract requested files to ./. + + case "${TEST_CODE}" in + # Not csit-vpp as this code is re-used by ligato gathering. + "csit-"*) + install_script="${CSIT_DIR}/resources/tools/scripts/" + install_script+="download_install_vpp_pkgs.sh" + # Use downloaded packages with specific version + if [[ "${TEST_CODE}" == *"daily"* ]] || \ + [[ "${TEST_CODE}" == *"weekly"* ]] || \ + [[ "${TEST_CODE}" == *"timed"* ]]; + then + echo "Downloading latest VPP packages from NEXUS..." + # TODO: Can we source? + bash "${install_script}" --skip-install || { + die "Failed to get VPP packages!" + } + else + echo "Downloading VPP packages of specific version from NEXUS." + dpdk_stable_ver="$(cat "${CSIT_DIR}/DPDK_STABLE_VER")" || { + die "Cat failed." + } + vpp_stable_ver="$(cat "${CSIT_DIR}/VPP_STABLE_VER_UBUNTU")" || { + die "Cat failed." + } + install_args=("--skip-install" "--vpp" "${vpp_stable_ver}") + install_args+=("--dkms" "${dpdk_stable_ver}") + bash "${install_script}" "${install_args[@]}" || { + die "Failed to get VPP packages!" + } + fi + ;; + "vpp-csit-"*) + # Use local built packages. + mv "${DOWNLOAD_DIR}"/../"vpp"*".deb" "${DOWNLOAD_DIR}"/ || { + die "Move command failed." + } + ;; + *) + die "Unable to identify job type from: ${TEST_CODE}" + ;; + esac +} diff --git a/resources/libraries/bash/function/per_patch.sh b/resources/libraries/bash/function/per_patch.sh new file mode 100644 index 0000000000..15eaf028ee --- /dev/null +++ b/resources/libraries/bash/function/per_patch.sh @@ -0,0 +1,314 @@ +# Copyright (c) 2018 Cisco and/or its affiliates. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -exuo pipefail + +# This library defines functions used mainly by "per_patch_perf.sh" entry script. +# Generally, the functions assume "common.sh" library has been sourced already. + +# Keep functions ordered alphabetically, please. + +# TODO: Add a link to bash style guide. + + +function build_vpp_ubuntu_amd64 () { + + set -exuo pipefail + + # TODO: Make sure whether this works on other distros/archs too. + + # Arguments: + # - ${1} - String identifier for echo, can be unset. + # Variables read: + # - VPP_DIR - Path to existing directory, parent to accessed directories. + # Directories updated: + # - ${VPP_DIR} - Whole subtree, many files (re)created by the build process. + # - ${VPP_DIR}/build-root - Final build artifacts for CSIT end up here. + # - ${VPP_DIR}/dpdk - The dpdk artifact is built, but moved to build-root/. + # Functions called: + # - die - Print to stderr and exit, defined in common.sh + + cd "${VPP_DIR}" || die "Change directory command failed." + echo 'Building using "make build-root/vagrant/build.sh"' + # TODO: Do we want to support "${DRYRUN}" == "True"? + make UNATTENDED=yes install-dep || die "Make install-dep failed." + # The per_patch script calls this function twice, first for the new commit, + # then for its parent commit. On Jenkins, no dpdk is installed at first, + # locally it might have been installed. New dpdk is installed second call. + # If make detects installed vpp-dpdk-dev with matching version, + # it skips building vpp-dpdk-dkms entirely, but we need that file. + # On the other hand, if parent uses different dpdk version, + # The new vpp-dpdk-dkms is built, but the old one is not removed + # from the build directory if present. (Further functions move both, + # and during test dpkg decides on its own which version gets installed.) + # As per_patch is too dumb (yet) to detect any of that, + # the only safe solution is to clean build directory and force rebuild. + # TODO: Make this function smarter and skip DPDK rebuilds if possible. + cmd=("dpkg-query" "--showformat='$${Version}'" "--show" "vpp-dpdk-dev") + installed_deb_ver="$(sudo "${cmd[@]}" || true)" + if [[ -n "${installed_deb_ver}" ]]; then + sudo dpkg --purge "vpp-dpdk-dev" || { + die "Dpdk package uninstalation failed." + } + fi + make UNATTENDED=yes dpdk-install-dev || { + die "Make dpdk-install-dev failed." + } + build-root/vagrant/"build.sh" || die "Vagrant VPP build script failed." + # CSIT also needs the DPDK artifacts, which is not in build-root. + mv -v "dpdk/vpp-dpdk-dkms"*".deb" "build-root"/ || { + die "*.deb move failed." + } + + echo "*******************************************************************" + echo "* VPP ${1-} BUILD SUCCESSFULLY COMPLETED" || { + die "Argument not found." + } + echo "*******************************************************************" +} + + +function compare_test_results () { + + set -exuo pipefail + + # Variables read: + # - VPP_DIR - Path to directory with VPP git repo (at least built parts). + # - ARCHIVE_DIR - Path to where robot result files are created in. + # - PYTHON_SCRIPTS_DIR - Path to directory holding comparison utility. + # Directories recreated: + # - csit_parent - Sibling to csit directory, for holding results + # of parent build. + # Functions called: + # - die - Print to stderr and exit, defined in common.sh + # - parse_bmrr_results - See definition in this file. + # Exit code: + # - 0 - If the comparison utility sees no regression (nor data error). + # - 1 - If the comparison utility sees a regression (or data error). + + cd "${VPP_DIR}" || die "Change directory operation failed." + rm -rf "csit_parent" || die "Remove operation failed." + mkdir -p "csit_parent" || die "Directory creation failed." + for filename in "output.xml" "log.html" "report.html"; do + mv "${ARCHIVE_DIR}/${filename}" "csit_parent/${filename}" || { + die "Attempt to move '${filename}' failed." + } + done + parse_bmrr_results "csit_parent" || { + die "The function should have died on error." + } + + # Reusing CSIT main virtualenv. + pip install -r "${PYTHON_SCRIPTS_DIR}/perpatch_requirements.txt" || { + die "Perpatch Python requirements installation failed." + } + python "${PYTHON_SCRIPTS_DIR}/compare_perpatch.py" + # The exit code determines the vote result. +} + + +function download_builds () { + + set -exuo pipefail + + # This is mostly useful only for Sandbox testing, to avoid recompilation. + # + # Arguments: + # - ${1} - URL to download VPP builds from. + # Variables read: + # - VPP_DIR - Path to WORKSPACE, parent of created directories. + # - DOWNLOAD_DIR - Path to directory pybot takes the build to test from. + # Directories created: + # - archive - Ends up empty, not to be confused with ${ARCHIVE_DIR}. + # - build_new - Holding built artifacts of the patch under test (PUT). + # - built_parent - Holding built artifacts of parent of PUT. + # - csit_new - (Re)set to a symlink to archive robot results on failure. + # Functions called: + # - die - Print to stderr and exit, defined in common.sh + + cd "${VPP_DIR}" || die "Change directory operation failed." + rm -rf "build-root" "build_parent" "build_new" "archive" "csit_new" || { + die "Directory removal failed." + } + wget -N --progress=dot:giga "${1}" || die "Wget download failed." + unzip "archive.zip" || die "Archive extraction failed." + mv "archive/build_parent" ./ || die "Move operation failed." + mv "archive/build_new" ./ || die "Move operation failed." + cp -r "build_new"/*".deb" "${DOWNLOAD_DIR}" || { + die "Copy operation failed." + } + # Create symlinks so that if job fails on robot, results can be archived. + ln -s "${ARCHIVE_DIR}" "csit_new" || die "Symbolic link creation failed." +} + + +function parse_bmrr_results () { + + set -exuo pipefail + + # Currently "parsing" is just two greps. + # TODO: Re-use PAL parsing code, make parsing more general and centralized. + # + # Arguments: + # - ${1} - Path to (existing) directory holding robot output.xml result. + # Files read: + # - output.xml - From argument location. + # Files updated: + # - results.txt - (Re)created, in argument location. + # Functions called: + # - die - Print to stderr and exit, defined in common.sh + + rel_dir="$(readlink -e "${1}")" || die "Readlink failed." + in_file="${rel_dir}/output.xml" + out_file="${rel_dir}/results.txt" + + # TODO: Do we need to check echo exit code explicitly? + echo "Parsing ${in_file} putting results into ${out_file}" + echo "TODO: Re-use parts of PAL when they support subsample test parsing." + + pattern='Maximum Receive Rate trial results in packets' + pattern+=' per second: .*\]' + grep -o "${pattern}" "${in_file}" | grep -o '\[.*\]' > "${out_file}" || { + die "Some parsing grep command has failed." + } +} + + +function prepare_build_parent () { + + set -exuo pipefail + + # Variables read: + # - VPP_DIR - Path to existing directory, parent to accessed directories. + # Directories read: + # - build-root - Existing directory with built VPP artifacts (also DPDK). + # Directories updated: + # - ${VPP_DIR} - A local git repository, parent commit gets checked out. + # - build_new - Old contents removed, content of build-root copied here. + # Functions called: + # - die - Print to stderr and exit, defined in common.sh + + cd "${VPP_DIR}" || die "Change directory operation failed." + rm -rf "build_new" || die "Remove operation failed." + mkdir -p "build_new" || die "Directory creation failed." + mv "build-root"/*".deb" "build_new"/ || die "Move operation failed." + # The previous build could have left some incompatible leftovers, + # e.g. DPDK artifacts of different version. + # "make -C dpdk clean" does not actually remove such .deb file. + # Also, there usually is a copy of dpdk artifact in build-root. + git clean -dffx "dpdk"/ "build-root"/ || die "Git clean operation failed." + # Finally, check out the parent commit. + git checkout HEAD~ || die "Git checkout operation failed." + # Display any other leftovers. + git status || die "Git status operation failed." +} + + +function prepare_test_new () { + + set -exuo pipefail + + # Variables read: + # - VPP_DIR - Path to existing directory, parent of accessed directories. + # - DOWNLOAD_DIR - Path to directory where Robot takes builds to test from. + # - ARCHIVE_DIR - Path to where robot result files are created in. + # Directories read: + # - build-root - Existing directory with built VPP artifacts (also DPDK). + # Directories updated: + # - build_parent - Old directory removed, build-root moved to become this. + # - ${DOWNLOAD_DIR} - Old content removed, files from build_new copied here. + # - csit_new - Currently a symlink to to archive robot results on failure. + # Functions called: + # - die - Print to stderr and exit, defined in common.sh + + cd "${VPP_DIR}" || die "Change directory operationf failed." + rm -rf "build_parent" "csit_new" "${DOWNLOAD_DIR}"/* || die "Remove failed." + mkdir -p "build_parent" || die "Directory creation operation failed." + mv "build-root"/*".deb" "build_parent"/ || die "Move operation failed." + cp "build_new"/*".deb" "${DOWNLOAD_DIR}" || die "Copy operation failed." + # Create symlinks so that if job fails on robot, results can be archived. + ln -s "${ARCHIVE_DIR}" "csit_new" || die "Symbolic link creation failed." +} + + +function prepare_test_parent () { + + set -exuo pipefail + + # Variables read: + # - VPP_DIR - Path to existing directory, parent of accessed directories. + # - CSIT_DIR - Path to existing root of local CSIT git repository. + # - ARCHIVE_DIR and DOWNLOAD_DIR - Paths to directories to update. + # Directories read: + # - build_parent - Build artifacts (to test next) are copied from here. + # Directories updated: + # - csit_new - Deleted, then recreated and latest robot results copied here. + # - ${CSIT_DIR} - Subjected to git reset and git clean. + # - ${ARCHIVE_DIR} - Created if not existing (if deleted by git clean). + # - ${DOWNLOAD_DIR} - Created after git clean, parent build copied here. + # - csit_parent - Currently a symlink to csit/ to archive robot results. + # Functions called: + # - die - Print to stderr and exit, defined in common.sh + # - parse_bmrr_results - See definition in this file. + + cd "${VPP_DIR}" || die "Change directory operation failed." + rm -rf "csit_new" "csit_parent" || die "Remove operation failed." + mkdir -p "csit_new" || die "Create directory operation failed." + for filename in "output.xml" "log.html" "report.html"; do + mv "${ARCHIVE_DIR}/${filename}" "csit_new/${filename}" || { + die "Move operation of '${filename}' failed." + } + done + parse_bmrr_results "csit_new" || { + die "The function should have died on error." + } + + pushd "${CSIT_DIR}" || die "Change directory operation failed." + git reset --hard HEAD || die "Git reset operation failed." + git clean -dffx || die "Git clean operation failed." + popd || die "Change directory operation failed." + mkdir -p "${ARCHIVE_DIR}" "${DOWNLOAD_DIR}" || die "Dir creation failed." + + cp "build_parent"/*".deb" "${DOWNLOAD_DIR}"/ || die "Copy failed." + # Create symlinks so that if job fails on robot, results can be archived. + ln -s "${ARCHIVE_DIR}" "csit_parent" || die "Symlink creation failed." +} + + +function set_perpatch_dut () { + + set -exuo pipefail + + # Variables set: + # - DUT - CSIT test/ subdirectory containing suites to execute. + + # TODO: Detect DUT from job name, when we have more than just VPP perpatch. + + DUT="vpp" +} + + +function set_perpatch_vpp_dir () { + + set -exuo pipefail + + # Variables read: + # - CSIT_DIR - Path to existing root of local CSIT git repository. + # Variables set: + # - VPP_DIR - Path to existing root of local VPP git repository. + # Functions called: + # - die - Print to stderr and exit, defined in common.sh + + # In perpatch, CSIT is cloned inside VPP clone. + VPP_DIR="$(readlink -e "${CSIT_DIR}/..")" || die "Readlink failed." +} diff --git a/resources/tools/scripts/compare_perpatch.py b/resources/tools/scripts/compare_perpatch.py new file mode 100644 index 0000000000..cc9ffd8992 --- /dev/null +++ b/resources/tools/scripts/compare_perpatch.py @@ -0,0 +1,85 @@ +# Copyright (c) 2018 Cisco and/or its affiliates. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Script for determining whether per-patch perf test votes -1. + +This script assumes there exist two text files with processed BMRR results, +located at hardcoded relative paths, having several lines +of json-parseable lists of float values, corresponding to testcase results. +This script then uses jumpavg library to determine whether there was +a regression, progression or no change for each testcase. +If number of tests does not match, or there was a regression, +this script votes -1 (by exiting with code 1), otherwise it votes +1 (exit 0). +""" + +import json +import sys + +from jumpavg.BitCountingMetadataFactory import BitCountingMetadataFactory +from jumpavg.BitCountingClassifier import BitCountingClassifier + + +def hack(value_list): + """Return middle two quartiles, hoping to reduce influence of outliers. + + :param value_list: List to pick subset from. + :type value_list: list of float + :returns: New list containing middle values. + :rtype: list of float + """ + tmp = sorted(value_list) + quarter = len(tmp) / 4 + ret = tmp[quarter:-quarter] + return ret + +parent_lines = list() +new_lines = list() +with open("csit_parent/results.txt") as parent_file: + parent_lines = parent_file.readlines() +with open("csit_new/results.txt") as new_file: + new_lines = new_file.readlines() +if len(parent_lines) != len(new_lines): + print "Number of passed tests does not match!" + sys.exit(1) +classifier = BitCountingClassifier() +num_tests = len(parent_lines) +exit_code = 0 +for index in range(num_tests): + parent_values = hack(json.loads(parent_lines[index])) + new_values = hack(json.loads(new_lines[index])) + parent_max = BitCountingMetadataFactory.find_max_value(parent_values) + new_max = BitCountingMetadataFactory.find_max_value(new_values) + cmax = max(parent_max, new_max) + factory = BitCountingMetadataFactory(cmax) + parent_stats = factory.from_data(parent_values) + factory = BitCountingMetadataFactory(cmax, parent_stats.avg) + new_stats = factory.from_data(new_values) + print "DEBUG parent: {p}".format(p=parent_stats) + print "DEBUG new: {n}".format(n=new_stats) + common_max = max(parent_stats.avg, new_stats.avg) + difference = (new_stats.avg - parent_stats.avg) / common_max + print "DEBUG difference: {d}%".format(d=100 * difference) + classified_list = classifier.classify([parent_stats, new_stats]) + if len(classified_list) < 2: + print "Test index {index}: normal (no anomaly)".format( + index=index) + continue + anomaly = classified_list[1].metadata.classification + if anomaly == "regression": + print "Test index {index}: anomaly regression".format(index=index) + exit_code = 1 + continue + print "Test index {index}: anomaly {anomaly}".format( + index=index, anomaly=anomaly) +print "DEBUG exit code {code}".format(code=exit_code) +sys.exit(exit_code) diff --git a/resources/tools/scripts/perpatch_requirements.txt b/resources/tools/scripts/perpatch_requirements.txt new file mode 100644 index 0000000000..225d0d6202 --- /dev/null +++ b/resources/tools/scripts/perpatch_requirements.txt @@ -0,0 +1,15 @@ +# Copyright (c) 2018 Cisco and/or its affiliates. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# TODO: Convert to use the code from cloned CSIT git, not from pip. +jumpavg==0.1.3