Fix exit from reservation loop
[csit.git] / resources / libraries / bash / function / common.sh
index 0daa1d3..e89bae9 100644 (file)
@@ -33,6 +33,7 @@ function activate_docker_topology () {
     # - TOPOLOGIES - Available topologies.
     # - NODENESS - Node multiplicity of desired testbed.
     # - FLAVOR - Node flavor string, usually describing the processor.
+    # - IMAGE_VER_FILE - Name of file that contains the image version.
     # Variables set:
     # - WORKING_TOPOLOGY - Path to topology file.
 
@@ -40,7 +41,7 @@ function activate_docker_topology () {
         die "Source failed!"
     }
 
-    device_image="$(< ${CSIT_DIR}/VPP_DEVICE_IMAGE)"
+    device_image="$(< ${CSIT_DIR}/${IMAGE_VER_FILE})"
     case_text="${NODENESS}_${FLAVOR}"
     case "${case_text}" in
         "1n_skx")
@@ -503,29 +504,35 @@ function get_test_tag_string () {
 }
 
 
-function reserve_testbed () {
+function reserve_and_cleanup_testbed () {
 
     set -exuo pipefail
 
     # Reserve physical testbed, perform cleanup, register trap to unreserve.
+    # When cleanup fails, remove from topologies and keep retrying
+    # until all topologies are removed.
     #
     # Variables read:
     # - TOPOLOGIES - Array of paths to topology yaml to attempt reservation on.
     # - PYTHON_SCRIPTS_DIR - Path to directory holding the reservation script.
     # Variables set:
+    # - TOPOLOGIES - Array of paths to topologies, with failed cleanups removed.
     # - WORKING_TOPOLOGY - Path to topology yaml file of the reserved testbed.
     # Functions called:
     # - die - Print to stderr and exit.
     # Traps registered:
     # - EXIT - Calls cancel_all for ${WORKING_TOPOLOGY}.
 
-    while true; do
+    while [[ ${TOPOLOGIES[@]} ]]; do
         for topo in "${TOPOLOGIES[@]}"; do
             set +e
             python "${PYTHON_SCRIPTS_DIR}/topo_reservation.py" -t "${topo}"
             result="$?"
             set -e
             if [[ "${result}" == "0" ]]; then
+                # Trap unreservation before cleanup check,
+                # so multiple jobs showing failed cleanup improve chances
+                # of humans to notice and fix.
                 WORKING_TOPOLOGY="${topo}"
                 echo "Reserved: ${WORKING_TOPOLOGY}"
                 trap "untrap_and_unreserve_testbed" EXIT || {
@@ -535,9 +542,28 @@ function reserve_testbed () {
                     }
                     die "Trap attempt failed, unreserve succeeded. Aborting."
                 }
-                cleanup_topo || {
-                    die "Testbed cleanup failed."
-                }
+                # Cleanup check.
+                set +e
+                cleanup_topo
+                result="$?"
+                set -e
+                if [[ "${result}" == "0" ]]; then
+                    break
+                fi
+                warn "Testbed cleanup failed: ${topo}"
+                untrap_and_unreserve_testbed "Fail of unreserve after cleanup."
+                # WORKING_TOPOLOGY is now empty again.
+                # Build new topology array.
+                #   TOPOLOGIES=("${TOPOLOGIES[@]/$topo}")
+                # does not really work, see:
+                # https://stackoverflow.com/questions/16860877/remove-an-element-from-a-bash-array
+                new_topologies=()
+                for item in "${TOPOLOGIES[@]}"; do
+                    if [[ "${item}" != "${topo}" ]]; then
+                        new_topologies+=("${item}")
+                    fi
+                done
+                TOPOLOGIES=("${new_topologies[@]}")
                 break
             fi
         done
@@ -554,6 +580,11 @@ function reserve_testbed () {
         echo "Sleeping ${sleep_time}"
         sleep "${sleep_time}" || die "Sleep failed."
     done
+    if [[ ${TOPOLOGIES[@]} ]]; then
+        echo "Reservation and cleanup successful."
+    else
+        die "Run out of operational testbeds!"
+    fi
 }
 
 
@@ -658,10 +689,6 @@ function select_tags () {
 
     # Blacklisting certain tags per topology.
     case "${TEST_CODE}" in
-        *"3n-hsw"*)
-            test_tag_array+=("!drv_avf")
-            test_tag_array+=("!ipsechwNOTnic_intel-xl710")
-            ;;
         *"2n-skx"*)
             test_tag_array+=("!ipsechw")
             ;;
@@ -677,9 +704,21 @@ function select_tags () {
             test_tag_array+=("!vhost")
             test_tag_array+=("!vts")
             ;;
+        *"3n-hsw"*)
+            # TODO: Introduce NOIOMMU version of AVF tests.
+            # TODO: Make (both) AVF tests work on Haswell,
+            # or document why (some of) it is not possible.
+            # https://github.com/FDio/vpp/blob/master/src/plugins/avf/README.md
+            test_tag_array+=("!drv_avf")
+            # All cards have access to QAT. But only one card (xl710)
+            # resides in same NUMA as QAT. Other cards must go over QPI
+            # which we do not want to even run.
+            test_tag_array+=("!ipsechwNOTnic_intel-xl710")
+            ;;
         *)
             # Default to 3n-hsw due to compatibility.
             test_tag_array+=("!drv_avf")
+            test_tag_array+=("!ipsechwNOTnic_intel-xl710")
             ;;
     esac
 
@@ -691,6 +730,7 @@ function select_tags () {
     # We will prefix with perftest to prevent running other tests
     # (e.g. Functional).
     prefix="perftestAND"
+    set +x
     if [[ "${TEST_CODE}" == "vpp-"* ]]; then
         # Automatic prefixing for VPP jobs to limit the NIC used and
         # traffic evaluation to MRR.
@@ -710,6 +750,7 @@ function select_tags () {
             TAGS+=("${prefix}${tag}")
         fi
     done
+    set -x
 }
 
 
@@ -758,6 +799,36 @@ function select_vpp_device_tags () {
     done
 }
 
+function select_os () {
+
+    set -exuo pipefail
+
+    # Variables set:
+    # - VPP_VER_FILE - Name of File in CSIT dir containing vpp stable version.
+    # - IMAGE_VER_FILE - Name of File in CSIT dir containing the image name.
+    # - PKG_SUFFIX - Suffix of OS package file name, "rpm" or "deb."
+
+    os_id=$(grep '^ID=' /etc/os-release | cut -f2- -d= | sed -e 's/\"//g') || {
+        die "Get OS release failed."
+    }
+
+    case "${os_id}" in
+        "ubuntu"*)
+            IMAGE_VER_FILE="VPP_DEVICE_IMAGE_UBUNTU"
+            VPP_VER_FILE="VPP_STABLE_VER_UBUNTU_BIONIC"
+            PKG_SUFFIX="deb"
+            ;;
+        "centos"*)
+            IMAGE_VER_FILE="VPP_DEVICE_IMAGE_CENTOS"
+            VPP_VER_FILE="VPP_STABLE_VER_CENTOS"
+            PKG_SUFFIX="rpm"
+            ;;
+        *)
+            die "Unable to identify distro or os from ${OS}"
+            ;;
+    esac
+}
+
 
 function select_topology () {
 
@@ -776,6 +847,7 @@ function select_topology () {
 
     case_text="${NODENESS}_${FLAVOR}"
     case "${case_text}" in
+        # TODO: Move tags to "# Blacklisting certain tags per topology" section.
         "1n_vbox")
             TOPOLOGIES=( "${TOPOLOGIES_DIR}"/*vpp_device*.template )
             TOPOLOGIES_TAGS="2_node_single_link_topo"