diff options
author | Peter Mikus <pmikus@cisco.com> | 2017-05-19 09:42:39 +0200 |
---|---|---|
committer | Dave Wallace <dwallacelf@gmail.com> | 2017-06-05 20:54:47 +0000 |
commit | e2f9bdd098fb0008cc4fc2f8c8298ebe0d6ef062 (patch) | |
tree | a98e8699c433ec4c2871268ea157ca3cb93d162e /bootstrap.sh | |
parent | 9339a16712a48e85fe97d3e6d9c3ab4a61455d1b (diff) |
CSIT-607 Optimize VIRL job scheduling algorithm
Optimize VIRL job scheduling algorithm based on available VIRL
host capacity. Add IP pool availability pre-checks.
- add quota based on max. allowe IPs and max. allowe VMs per virl
- use one common bootstrap file instead of two separate files (one
for ubutnut, another for centos)
Change-Id: Ic40122a084624ff9c5eafa9f372b0451e857e29a
Signed-off-by: Peter Mikus <pmikus@cisco.com>
Diffstat (limited to 'bootstrap.sh')
-rwxr-xr-x | bootstrap.sh | 436 |
1 files changed, 434 insertions, 2 deletions
diff --git a/bootstrap.sh b/bootstrap.sh index 9c4c467753..b7525754fc 100755 --- a/bootstrap.sh +++ b/bootstrap.sh @@ -18,9 +18,441 @@ cat /etc/hostname cat /etc/hosts SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" +export PYTHONPATH=${SCRIPT_DIR} if [ -f "/etc/redhat-release" ]; then - ${SCRIPT_DIR}/bootstrap-centos.sh $@ + DISTRO="CENTOS" + sudo yum install -y python-devel python-virtualenv + VPP_ARTIFACTS="vpp vpp-debuginfo vpp-devel vpp-lib vpp-plugins" + DPDK_ARTIFACTS="" + PACKAGE="rpm" + VPP_CLASSIFIER="" + DPDK_STABLE_VER=$(cat ${SCRIPT_DIR}/DPDK_STABLE_VER).x86_64 + VPP_REPO_URL=$(cat ${SCRIPT_DIR}/VPP_REPO_URL_CENTOS) + VPP_STABLE_VER=$(cat ${SCRIPT_DIR}/VPP_STABLE_VER_CENTOS) + VIRL_TOPOLOGY=$(cat ${SCRIPT_DIR}/VIRL_TOPOLOGY_CENTOS) + VIRL_RELEASE=$(cat ${SCRIPT_DIR}/VIRL_RELEASE_CENTOS) + SHARED_MEMORY_PATH="/dev/shm" else - ${SCRIPT_DIR}/bootstrap-ubuntu.sh $@ + DISTRO="UBUNTU" + export DEBIAN_FRONTEND=noninteractive + sudo apt-get -y update + sudo apt-get -y install libpython2.7-dev python-virtualenv + VPP_ARTIFACTS="vpp vpp-dbg vpp-dev vpp-lib vpp-plugins" + DPDK_ARTIFACTS="vpp-dpdk-dkms" + PACKAGE="deb" + VPP_CLASSIFIER="-deb" + DPDK_STABLE_VER=$(cat ${SCRIPT_DIR}/DPDK_STABLE_VER)_amd64 + VPP_REPO_URL=$(cat ${SCRIPT_DIR}/VPP_REPO_URL_UBUNTU) + VPP_STABLE_VER=$(cat ${SCRIPT_DIR}/VPP_STABLE_VER_UBUNTU) + VIRL_TOPOLOGY=$(cat ${SCRIPT_DIR}/VIRL_TOPOLOGY_UBUNTU) + VIRL_RELEASE=$(cat ${SCRIPT_DIR}/VIRL_RELEASE_UBUNTU) + SHARED_MEMORY_PATH="/run/shm" fi + +VIRL_SERVERS=("10.30.51.28" "10.30.51.29" "10.30.51.30") +IPS_PER_VIRL=( "10.30.51.28:252" + "10.30.51.29:74" + "10.30.51.30:74" ) +VMS_PER_VIRL=( "10.30.51.28:36" + "10.30.51.29:36" + "10.30.51.30:36" ) +IPS_PER_SIMULATION=5 + +function get_max_ip_nr() { + virl_server=$1 + IP_VALUE="0" + for item in "${IPS_PER_VIRL[@]}" ; do + if [ "${item%%:*}" == "${virl_server}" ] + then + IP_VALUE=${item#*:} + break + fi + done + echo "$IP_VALUE" +} + +function get_max_vm_nr() { + virl_server=$1 + VM_VALUE="0" + for item in "${VMS_PER_VIRL[@]}" ; do + if [ "${item%%:*}" == "${virl_server}" ] + then + VM_VALUE=${item#*:} + break + fi + done + echo "$VM_VALUE" +} + +VIRL_USERNAME=jenkins-in +VIRL_PKEY=priv_key +VIRL_SERVER_STATUS_FILE="status" +VIRL_SERVER_EXPECTED_STATUS="PRODUCTION" + +SSH_OPTIONS="-i ${VIRL_PKEY} -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o BatchMode=yes -o LogLevel=error" + +TEST_GROUPS=("gre,ipv6,lisp,policer,rpf,softwire" "dhcp,ipsec,l2bd,l2xc,telemetry,vrf,vxlan" "cop,fds,honeycomb,iacl,ipv4,tap,vhost,vlan") +SUITE_PATH="tests.func" +SKIP_PATCH="SKIP_PATCH" + +# Create tmp dir +mkdir ${SCRIPT_DIR}/tmp + +# Use tmp dir to store log files +LOG_PATH="${SCRIPT_DIR}/tmp" + +# Use tmp dir for tarballs +export TMPDIR="${SCRIPT_DIR}/tmp" + +function ssh_do() { + echo + echo "### " ssh $@ + ssh ${SSH_OPTIONS} $@ +} + +rm -f ${VIRL_PKEY} +cat > ${VIRL_PKEY} <<EOF +-----BEGIN RSA PRIVATE KEY----- +MIIEpQIBAAKCAQEA+IHXq87GcqMR1C47rzx6Cbip5Ghq8pKrbqKrP5Nf41HcYrT6 +GOXl9nFWKsMOzIlIn+8y7Il27eZh7csQGApbg8QLiHMtcYEmWNzKZpkqg4nuAPxX +VXwlKgnKX902SrET9Gp9TDayiHtCRWVfrlPPPSA0UEXW6BjLN/uHJ+W/Xzrrab+9 +asBVa05vT2W6n0KJ66zfCaeDM912mQ6SttscAwFoWDmdHlegiVqrlIG2ABxOvxxz +L3dM3iSmlmQlzv9bThjo+nI4KFYh6m5wrZmAo5r/4q9CIJc21HVnTqkGOWJIZz6J +73lePJVSq5gYqaoGw3swFEA/MDkOx7baWKSoLQIDAQABAoIBAQCNBeolNp+JWJ76 +gQ4fwLsknyXSV6sxYyhkDW4PEwwcTU06uqce0AAzXVffxne0fMe48x47+zqBgPbb +4huM+Pu8B9nfojUMr5TaYtl9Zbgpk3F8H7dT7LKOa6XrxvZTZrADSRc30+Z26zPN +e9zTaf42Gvt0/l0Zs1BHwbaOXqO+XuwJ3/F9Sf3PQYWXD3EOWjpHDP/X/1vAs6lV +SLkm6J/9KKE1m6I6LTYjIXuYt4SXybW6N2TSy54hhQtYcDUnIU2hR/PHVWKrGA0J +kELgrtTNTdbML27O5gFWU4PLUEYTZ9fN11D6qUZKxLcPOiPPHXkiILMRCCnG5DYI +ksBAU/YlAoGBAPxZO9VO18TYc8THV1nLKcvT2+1oSs1UcA2wNQMU55t910ZYinRa +MRwUhMOf8Mv5wOeiZaRICQB1PnVWtDVmGECgPpK6jUxqAwn8rgJcnoafLGL5YKMY +RVafTe6N5LXgCaOcJrk21wxs6v7ninEbUxxc575urOvZMBkymDw91dwbAoGBAPwa +YRhKhrzFKZzdK0RadVjnxKvolUllpoqqg3XuvmeAJHAOAnaOgVWq68NAcp5FZJv0 +2D2Up7TX8pjf9MofP1SJbcraKBpK4NzfNkA0dSdEi+FhVofAJ9umB2o5LW1n7sab +UIrjsdzSJK/9Zb9yTTHPyibYzNEgaJV1HsbxfEFXAoGAYO2RmvRm0phll18OQVJV +IpKk9kLKAKZ/R/K32hAsikBC8SVPQTPniyaifFWx81diblalff2hX4ipTf7Yx24I +wMIMZuW7Im/R7QMef4+94G3Bad7p7JuE/qnAEHJ2OBnu+eYfxaK35XDsrq6XMazS +NqHE7hOq3giVfgg+C12hCKMCgYEAtu9dbYcG5owbehxzfRI2/OCRsjz/t1bv1seM +xVMND4XI6xb/apBWAZgZpIFrqrWoIBM3ptfsKipZe91ngBPUnL9s0Dolx452RVAj +yctHB8uRxWYgqDkjsxtzXf1HnZBBkBS8CUzYj+hdfuddoeKLaY3invXLCiV+PpXS +U4KAK9kCgYEAtSv0m5+Fg74BbAiFB6kCh11FYkW94YI6B/E2D/uVTD5dJhyEUFgZ +cWsudXjMki8734WSpMBqBp/J8wG3C9ZS6IpQD+U7UXA+roB7Qr+j4TqtWfM+87Rh +maOpG56uAyR0w5Z9BhwzA3VakibVk9KwDgZ29WtKFzuATLFnOtCS46E= +-----END RSA PRIVATE KEY----- +EOF +chmod 600 ${VIRL_PKEY} + +# +# The server must be reachable and have a "status" file with +# the content "PRODUCTION" to be selected. +# +# If the server is not reachable or does not have the correct +# status remove it from the array and start again. +# +# Abort if there are no more servers left in the array. +# +VIRL_PROD_SERVERS=() +for index in "${!VIRL_SERVERS[@]}"; do + virl_server_status=$(ssh ${SSH_OPTIONS} ${VIRL_USERNAME}@${VIRL_SERVERS[$index]} cat $VIRL_SERVER_STATUS_FILE 2>&1) + echo VIRL HOST ${VIRL_SERVERS[$index]} status is \"$virl_server_status\" + if [ "$virl_server_status" == "$VIRL_SERVER_EXPECTED_STATUS" ] + then + # Candidate is in good status. Add to array. + VIRL_PROD_SERVERS+=(${VIRL_SERVERS[$index]}) + fi +done + +VIRL_SERVERS=("${VIRL_PROD_SERVERS[@]}") +echo "VIRL servers in production: ${VIRL_SERVERS[@]}" +num_hosts=${#VIRL_SERVERS[@]} +if [ $num_hosts == 0 ] +then + echo "No more VIRL candidate hosts available, failing." + exit 127 +fi + +# Get the LOAD of each server based on number of active simulations (testcases) +VIRL_SERVER_LOAD=() +for index in "${!VIRL_SERVERS[@]}"; do + VIRL_SERVER_LOAD[${index}]=$(ssh ${SSH_OPTIONS} ${VIRL_USERNAME}@${VIRL_SERVERS[$index]} "list-testcases | grep session | wc -l") +done + +# Pick for each TEST_GROUP least loaded server +VIRL_SERVER=() +for index in "${!TEST_GROUPS[@]}"; do + least_load_server_idx=$(echo "${VIRL_SERVER_LOAD[*]}" | tr -s ' ' '\n' | awk '{print($0" "NR)}' | sort -g -k1,1 | head -1 | cut -f2 -d' ') + least_load_server=${VIRL_SERVERS[$least_load_server_idx-1]} + VIRL_SERVER+=($least_load_server) + # Adjusting load as we are not going run simulation immediately + VIRL_SERVER_LOAD[$least_load_server_idx-1]=$((VIRL_SERVER_LOAD[$least_load_server_idx-1]+1)) +done + +echo "Selected VIRL servers: ${VIRL_SERVER[@]}" + +# Temporarily download VPP and DPDK packages from nexus.fd.io +if [ "${#}" -ne "0" ]; then + arr=(${@}) + echo ${arr[0]} + SKIP_PATCH="skip_patchORskip_vpp_patch" + # Download DPDK parts not included in dpdk plugin of vpp build + for ARTIFACT in ${DPDK_ARTIFACTS}; do + wget -q "${VPP_REPO_URL}/${ARTIFACT}/${DPDK_STABLE_VER}/${ARTIFACT}-${DPDK_STABLE_VER}${VPP_CLASSIFIER}.${PACKAGE}" || exit + done +else + rm -f *.${PACKAGE} + for ARTIFACT in ${DPDK_ARTIFACTS}; do + wget -q "${VPP_REPO_URL}/${ARTIFACT}/${DPDK_STABLE_VER}/${ARTIFACT}-${DPDK_STABLE_VER}${VPP_CLASSIFIER}.${PACKAGE}" || exit + done + for ARTIFACT in ${VPP_ARTIFACTS}; do + wget -q "${VPP_REPO_URL}/${ARTIFACT}/${VPP_STABLE_VER}/${ARTIFACT}-${VPP_STABLE_VER}${VPP_CLASSIFIER}.${PACKAGE}" || exit + done +fi + +VPP_PKGS=(*.$PACKAGE) +echo ${VPP_PKGS[@]} +VIRL_DIR_LOC="/tmp" +VPP_PKGS_FULL=(${VPP_PKGS[@]}) + +# Prepend directory location at remote host to package file list +for index in "${!VPP_PKGS_FULL[@]}"; do + VPP_PKGS_FULL[${index}]=${VIRL_DIR_LOC}/${VPP_PKGS_FULL[${index}]} +done + +echo "Updated file names: " ${VPP_PKGS_FULL[@]} + +cat ${VIRL_PKEY} + +# Copy the files to VIRL hosts +DONE="" +for index in "${!VIRL_SERVER[@]}"; do + # Do not copy files in case they have already been copied to the VIRL host + [[ "${DONE[@]}" =~ "${VIRL_SERVER[${index}]}" ]] && copy=0 || copy=1 + + if [ "${copy}" -eq "0" ]; then + echo "VPP packages have already been copied to the VIRL host ${VIRL_SERVER[${index}]}" + else + scp ${SSH_OPTIONS} *.${PACKAGE} \ + ${VIRL_USERNAME}@${VIRL_SERVER[${index}]}:${VIRL_DIR_LOC}/ + + result=$? + if [ "${result}" -ne "0" ]; then + echo "Failed to copy VPP packages to VIRL host ${VIRL_SERVER[${index}]}" + echo ${result} + exit ${result} + else + echo "VPP packages successfully copied to the VIRL host ${VIRL_SERVER[${index}]}" + fi + DONE+=(${VIRL_SERVER[${index}]}) + fi +done + +# Start a simulation on VIRL server + +function stop_virl_simulation { + for index in "${!VIRL_SERVER[@]}"; do + ssh ${SSH_OPTIONS} ${VIRL_USERNAME}@${VIRL_SERVER[${index}]}\ + "stop-testcase ${VIRL_SID[${index}]}" + done +} + +# Upon script exit, cleanup the simulation execution +trap stop_virl_simulation EXIT + +for index in "${!VIRL_SERVER[@]}"; do + echo "Starting simulation nr. ${index} on VIRL server ${VIRL_SERVER[${index}]}" + # Get given VIRL server limits for max. number of VMs and IPs + max_ips=$(get_max_ip_nr ${VIRL_SERVER[${index}]}) + max_ips_from_vms=$(($(get_max_vm_nr ${VIRL_SERVER[${index}]})*IPS_PER_SIMULATION)) + # Set quota to lower value + IP_QUOTA=$([ $max_ips -le $max_ips_from_vms ] && echo "$max_ips" || echo "$max_ips_from_vms") + # Start the simulation + VIRL_SID[${index}]=$(ssh ${SSH_OPTIONS} \ + ${VIRL_USERNAME}@${VIRL_SERVER[${index}]} \ + "start-testcase -vv --quota ${IP_QUOTA} --copy ${VIRL_TOPOLOGY} \ + --release ${VIRL_RELEASE} ${VPP_PKGS_FULL[@]}") + retval=$? + if [ ${retval} -ne "0" ]; then + echo "VIRL simulation start failed on ${VIRL_SERVER[${index}]}" + exit ${retval} + fi + if [[ ! "${VIRL_SID[${index}]}" =~ session-[a-zA-Z0-9_]{6} ]]; then + echo "No VIRL session ID reported." + exit 127 + fi + echo "VIRL simulation nr. ${index} started on ${VIRL_SERVER[${index}]}" + + ssh_do ${VIRL_USERNAME}@${VIRL_SERVER[${index}]}\ + cat /scratch/${VIRL_SID[${index}]}/topology.yaml + + # Download the topology file from VIRL session and rename it + scp ${SSH_OPTIONS} \ + ${VIRL_USERNAME}@${VIRL_SERVER[${index}]}:/scratch/${VIRL_SID[${index}]}/topology.yaml \ + topologies/enabled/topology${index}.yaml + + retval=$? + if [ ${retval} -ne "0" ]; then + echo "Failed to copy topology file from VIRL simulation nr. ${index} on VIRL server ${VIRL_SERVER[${index}]}" + exit ${retval} + fi +done + +echo ${VIRL_SID[@]} + +virtualenv --system-site-packages env +. env/bin/activate + +echo pip install +pip install -r ${SCRIPT_DIR}/requirements.txt + +for index in "${!VIRL_SERVER[@]}"; do + pykwalify -s ${SCRIPT_DIR}/resources/topology_schemas/3_node_topology.sch.yaml \ + -s ${SCRIPT_DIR}/resources/topology_schemas/topology.sch.yaml \ + -d ${SCRIPT_DIR}/topologies/enabled/topology${index}.yaml \ + -vvv + if [ "$?" -ne "0" ]; then + echo "Topology${index} schema validation failed." + echo "However, the tests will start." + fi +done + +function run_test_set() { + set +x + OLDIFS=$IFS + IFS="," + nr=$(echo $1) + rm -f ${LOG_PATH}/test_run${nr}.log + exec &> >(while read line; do echo "$(date +'%H:%M:%S') $line" \ + >> ${LOG_PATH}/test_run${nr}.log; done;) + suite_str="" + for suite in ${TEST_GROUPS[${nr}]}; do + suite_str="${suite_str} --suite ${SUITE_PATH}.${suite}" + done + IFS=$OLDIFS + + echo "PYTHONPATH=`pwd` pybot -L TRACE -W 136\ + -v TOPOLOGY_PATH:${SCRIPT_DIR}/topologies/enabled/topology${nr}.yaml \ + ${suite_str} \ + --include vm_envAND3_node_single_link_topo \ + --include vm_envAND3_node_double_link_topo \ + --exclude PERFTEST \ + --exclude ${SKIP_PATCH} \ + --noncritical EXPECTED_FAILING \ + --output ${LOG_PATH}/log_test_set_run${nr} \ + tests/" + + PYTHONPATH=`pwd` pybot -L TRACE -W 136\ + -v TOPOLOGY_PATH:${SCRIPT_DIR}/topologies/enabled/topology${nr}.yaml \ + ${suite_str} \ + --include vm_envAND3_node_single_link_topo \ + --include vm_envAND3_node_double_link_topo \ + --exclude PERFTEST \ + --exclude ${SKIP_PATCH} \ + --noncritical EXPECTED_FAILING \ + --output ${LOG_PATH}/log_test_set_run${nr} \ + tests/ + + local_run_rc=$? + echo ${local_run_rc} > ${SHARED_MEMORY_PATH}/rc_test_run${nr} + set -x +} + +set +x +# Send to background an instance of the run_test_set() function for each number, +# record the pid. +for index in "${!VIRL_SERVER[@]}"; do + run_test_set ${index} & + pid=$! + echo "Sent to background: Test_set${index} (pid=$pid)" + pids[$pid]=$index +done + +echo +echo -n "Waiting..." + +# Watch the stable of background processes. +# If a pid goes away, remove it from the array. +while [ -n "${pids[*]}" ]; do + for i in $(seq 0 9); do + sleep 1 + echo -n "." + done + for pid in "${!pids[@]}"; do + if ! ps "$pid" >/dev/null; then + echo -e "\n" + echo "Test_set${pids[$pid]} with PID $pid finished." + unset pids[$pid] + fi + done + if [ -z "${!pids[*]}" ]; then + break + fi + echo -n -e "\nStill waiting for test set(s): ${pids[*]} ..." +done + +echo +echo "All test set runs finished." +echo + +set -x + +RC=0 +for index in "${!VIRL_SERVER[@]}"; do + echo "Test_set${index} log:" + cat ${LOG_PATH}/test_run${index}.log + RC_PARTIAL_RUN=$(cat ${SHARED_MEMORY_PATH}/rc_test_run${index}) + RC=$((RC+RC_PARTIAL_RUN)) + rm -f ${SHARED_MEMORY_PATH}/rc_test_run${index} + rm -f ${LOG_PATH}/test_run${index}.log + echo +done + +# Log the final result +if [ "${RC}" -eq "0" ]; then + set +x + echo + echo "========================================================================================================================================" + echo "Final result of all test loops: | PASS |" + echo "All critical tests have passed." + echo "========================================================================================================================================" + echo + set -x +else + if [ "${RC}" -eq "1" ]; then + HLP_STR="test has" + else + HLP_STR="tests have" + fi + set +x + echo + echo "========================================================================================================================================" + echo "Final result of all test loops: | FAIL |" + echo "${RC} critical ${HLP_STR} failed." + echo "========================================================================================================================================" + echo + set -x +fi + +echo Post-processing test data... + +partial_logs="" +for index in "${!VIRL_SERVER[@]}"; do + partial_logs="${partial_logs} ${LOG_PATH}/log_test_set_run${index}.xml" +done + +# Rebot output post-processing +rebot --noncritical EXPECTED_FAILING \ + --output output.xml ${partial_logs} + +# Remove unnecessary log files +rm -f ${partial_logs} + +echo Post-processing finished. + +if [ ${RC} -eq 0 ]; then + RETURN_STATUS=0 +else + RETURN_STATUS=1 +fi + +exit ${RETURN_STATUS} |