Make wait_for use getopt and add walltime support

We currently use wait_for which does not account for time spent blocking
during COMMAND. This leads to issues where it is hard to calculate time
we want to spend waiting for something. Modifying wait_for and
wait_for_stack_ready to support walltime based timeouts.

Closes-Bug: #1407132

Change-Id: Icdc626ef8075fbd2f9e7cb7c011a12351c815e09
This commit is contained in:
Gregory Haynes 2015-01-07 11:55:49 -08:00
parent 52739c04a0
commit e5e46a69df
6 changed files with 146 additions and 43 deletions

View File

@ -206,7 +206,7 @@ else
fi fi
echo "Waiting for $IMAGE_NAME VM to boot." echo "Waiting for $IMAGE_NAME VM to boot."
wait_for 100 1 poll_vm wait_for -w 100 --delay 1 -- poll_vm
poll_vm poll_vm
echo echo
@ -215,7 +215,7 @@ echo "Booted. Found IP: $VM_IP."
# hostkeys are generated by cloud-init as part of the boot sequence - can # hostkeys are generated by cloud-init as part of the boot sequence - can
# take a few seconds. # take a few seconds.
echo "Waiting for SSH hostkey." echo "Waiting for SSH hostkey."
wait_for 30 1 "ssh-keyscan $VM_IP 2>&1 | grep \"$VM_IP.*OpenSSH\"" wait_for -w 30 --delay 1 -- "ssh-keyscan $VM_IP 2>&1 | grep \"$VM_IP.*OpenSSH\""
# Remove the hostkey, new instance == new key. # Remove the hostkey, new instance == new key.
ssh-keygen -R $(os-apply-config -m $TE_DATAFILE --key baremetal-network.seed.ip --type netaddress --key-default '192.0.2.1') || true ssh-keygen -R $(os-apply-config -m $TE_DATAFILE --key baremetal-network.seed.ip --type netaddress --key-default '192.0.2.1') || true

View File

@ -289,7 +289,7 @@ fi
## #. Wait for the BM cloud to register BM nodes with the scheduler:: ## #. Wait for the BM cloud to register BM nodes with the scheduler::
expected_nodes=$(( $OVERCLOUD_COMPUTESCALE + $OVERCLOUD_CONTROLSCALE + $OVERCLOUD_BLOCKSTORAGESCALE )) expected_nodes=$(( $OVERCLOUD_COMPUTESCALE + $OVERCLOUD_CONTROLSCALE + $OVERCLOUD_BLOCKSTORAGESCALE ))
wait_for 60 $expected_nodes wait_for_hypervisor_stats $expected_nodes wait_for -w $((60 * $expected_nodes)) --delay 10 -- wait_for_hypervisor_stats $expected_nodes
## #. Set password for Overcloud SNMPd, same password needs to be set in Undercloud Ceilometer ## #. Set password for Overcloud SNMPd, same password needs to be set in Undercloud Ceilometer
@ -495,7 +495,7 @@ fi
## :: ## ::
echo "Waiting for the overcloud stack to be ready" #nodocs echo "Waiting for the overcloud stack to be ready" #nodocs
wait_for_stack_ready $(($OVERCLOUD_STACK_TIMEOUT * 60 / 10)) 10 $STACKNAME wait_for_stack_ready -w $(($OVERCLOUD_STACK_TIMEOUT * 60)) 10 $STACKNAME
OVERCLOUD_ENDPOINT=$(heat output-show $STACKNAME KeystoneURL|sed 's/^"\(.*\)"$/\1/') OVERCLOUD_ENDPOINT=$(heat output-show $STACKNAME KeystoneURL|sed 's/^"\(.*\)"$/\1/')
OVERCLOUD_IP=$(echo $OVERCLOUD_ENDPOINT | awk -F '[/:]' '{print $4}') OVERCLOUD_IP=$(echo $OVERCLOUD_ENDPOINT | awk -F '[/:]' '{print $4}')
### --end ### --end
@ -599,12 +599,12 @@ fi #nodocs
## #. _`Wait for Nova Compute` ## #. _`Wait for Nova Compute`
## :: ## ::
wait_for 30 10 nova service-list --binary nova-compute 2\>/dev/null \| grep 'enabled.*\ up\ ' wait_for -w 300 --delay 10 -- nova service-list --binary nova-compute 2\>/dev/null \| grep 'enabled.*\ up\ '
## #. Wait for L2 Agent On Nova Compute ## #. Wait for L2 Agent On Nova Compute
## :: ## ::
wait_for 30 10 neutron agent-list -f csv -c alive -c agent_type -c host \| grep "\":-).*Open vSwitch agent.*-novacompute\"" #nodocs wait_for -w 300 --delay 10 -- neutron agent-list -f csv -c alive -c agent_type -c host \| grep "\":-).*Open vSwitch agent.*-novacompute\"" #nodocs
## wait_for 30 10 neutron agent-list -f csv -c alive -c agent_type -c host \| grep "\":-).*Open vSwitch agent.*-novacompute\"" ## wait_for 30 10 neutron agent-list -f csv -c alive -c agent_type -c host \| grep "\":-).*Open vSwitch agent.*-novacompute\""
## #. Log in as a user. ## #. Log in as a user.
@ -627,7 +627,7 @@ if [ "stack-create" = "$HEAT_OP" ] ; then #nodocs
## #. Add an external IP for it. ## #. Add an external IP for it.
## :: ## ::
wait_for 10 5 neutron port-list -f csv -c id --quote none \| grep id wait_for -w 50 --delay 5 -- neutron port-list -f csv -c id --quote none \| grep id
PORT=$(neutron port-list -f csv -c id --quote none | tail -n1) PORT=$(neutron port-list -f csv -c id --quote none | tail -n1)
FLOATINGIP=$(neutron floatingip-create ext-net \ FLOATINGIP=$(neutron floatingip-create ext-net \
--port-id "${PORT//[[:space:]]/}" \ --port-id "${PORT//[[:space:]]/}" \
@ -654,7 +654,7 @@ fi
## #. After which, you should be able to ping it ## #. After which, you should be able to ping it
## :: ## ::
wait_for 30 10 ping -c 1 $FLOATINGIP wait_for -w 300 --delay 10 -- ping -c 1 $FLOATINGIP
### --end ### --end

View File

@ -289,7 +289,7 @@ echo "Waiting for seed node to configure br-ctlplane..." #nodocs
timeout 480 sh -c 'printf "HTTP/1.0 200 OK\r\n\r\n\r\n" | nc -l '"$COMP_IP"' '"$SEED_COMP_PORT"' | grep '"$SEED_IMAGE_ID" timeout 480 sh -c 'printf "HTTP/1.0 200 OK\r\n\r\n\r\n" | nc -l '"$COMP_IP"' '"$SEED_COMP_PORT"' | grep '"$SEED_IMAGE_ID"
# Wait for network # Wait for network
wait_for 10 1 ping -c 1 $BM_NETWORK_SEED_IP wait_for -w 10 --delay 1 -- ping -c 1 $BM_NETWORK_SEED_IP
# If ssh-keyscan fails to connect, it returns 0. So grep to see if it succeeded # If ssh-keyscan fails to connect, it returns 0. So grep to see if it succeeded
ssh-keyscan -t rsa $BM_NETWORK_SEED_IP | tee -a ~/.ssh/known_hosts | grep -q "^$BM_NETWORK_SEED_IP ssh-rsa " ssh-keyscan -t rsa $BM_NETWORK_SEED_IP | tee -a ~/.ssh/known_hosts | grep -q "^$BM_NETWORK_SEED_IP ssh-rsa "
@ -302,13 +302,13 @@ keystone role-create --name=swiftoperator
keystone role-create --name=ResellerAdmin keystone role-create --name=ResellerAdmin
echo "Waiting for nova to initialise..." echo "Waiting for nova to initialise..."
wait_for 50 10 nova list wait_for -w 500 --delay 10 -- nova list
user-config user-config
echo "Waiting for Nova Compute to be available" echo "Waiting for Nova Compute to be available"
wait_for 30 10 nova service-list --binary nova-compute 2\>/dev/null \| grep 'enabled.*\ up\ ' wait_for -w 300 --delay 10 -- nova service-list --binary nova-compute 2\>/dev/null \| grep 'enabled.*\ up\ '
echo "Waiting for neutron API and L2 agent to be available" echo "Waiting for neutron API and L2 agent to be available"
wait_for 30 10 neutron agent-list -f csv -c alive -c agent_type -c host \| grep "\":-).*Open vSwitch agent.*\"" #nodocs wait_for -w 300 --delay 10 -- neutron agent-list -f csv -c alive -c agent_type -c host \| grep "\":-).*Open vSwitch agent.*\"" #nodocs
BM_NETWORK_SEED_RANGE_START=$(os-apply-config -m $TE_DATAFILE --key baremetal-network.seed.range-start --type raw --key-default '192.0.2.2') BM_NETWORK_SEED_RANGE_START=$(os-apply-config -m $TE_DATAFILE --key baremetal-network.seed.range-start --type raw --key-default '192.0.2.2')
BM_NETWORK_SEED_RANGE_END=$(os-apply-config -m $TE_DATAFILE --key baremetal-network.seed.range-end --type raw --key-default '192.0.2.20') BM_NETWORK_SEED_RANGE_END=$(os-apply-config -m $TE_DATAFILE --key baremetal-network.seed.range-end --type raw --key-default '192.0.2.20')

View File

@ -178,7 +178,7 @@ POWER_USER=$(os-apply-config -m $TE_DATAFILE --key ssh-user --type raw)
## #. Wait for the BM cloud to register BM nodes with the scheduler:: ## #. Wait for the BM cloud to register BM nodes with the scheduler::
wait_for 60 1 wait_for_hypervisor_stats wait_for -w 60 --delay 1 -- wait_for_hypervisor_stats
## #. We need an environment file to store the parameters we're going to give ## #. We need an environment file to store the parameters we're going to give
@ -327,7 +327,7 @@ heat $HEAT_OP -e $HEAT_ENV \
echo "Waiting for the undercloud stack to be ready" #nodocs echo "Waiting for the undercloud stack to be ready" #nodocs
# Make time out 60 mins as like the Heat stack-create default timeout. # Make time out 60 mins as like the Heat stack-create default timeout.
wait_for_stack_ready $(($UNDERCLOUD_STACK_TIMEOUT * 60 / 10)) 10 undercloud wait_for_stack_ready -w $(($UNDERCLOUD_STACK_TIMEOUT * 60 )) 10 undercloud
UNDERCLOUD_CTL_IP=$(nova list | grep ctlplane | sed -e "s/.*=\\([0-9.]*\\).*/\1/") UNDERCLOUD_CTL_IP=$(nova list | grep ctlplane | sed -e "s/.*=\\([0-9.]*\\).*/\1/")
## #. If we're deploying with a public VLAN we must use it, not the control plane ## #. If we're deploying with a public VLAN we must use it, not the control plane

View File

@ -17,47 +17,113 @@
set -e # exit on the first non-zero status set -e # exit on the first non-zero status
set -u # exit on unset variables set -u # exit on unset variables
set -o pipefail
SCRIPT_NAME=$(basename $0) SCRIPT_NAME=$(basename $0)
function show_options() { function show_options() {
echo "Usage: $SCRIPT_NAME LOOPS_NUMBER SLEEP_TIME ARGS" EXITVAL=${1:-1}
echo "Usage: $SCRIPT_NAME [-h] [-w TIMEOUT] [-l LOOP_COUNT] [-f FAIL_MATCH] [-s SUCCESS_MATCH] --delay SLEEP_TIME -- COMMAND"
echo echo
echo "ARGS are read and concatenated together into a single command." echo "Waits for a command to fail, succeed, or timeout."
echo "Execute the command in a loop until it succeeds or the number"
echo "of attempts exceeds LOOPS_NUMBER value. After each failure"
echo "pause for SLEEP_TIME seconds."
echo echo
echo "An optional FAIL_MATCH_OUTPUT variable may also be set to control " echo "Options:"
echo "if the loop exits early if the commands stdout/stderr matches the " echo " -h,--help -- this help"
echo "supplied regex string." echo " -w,--walltime TIMEOUT -- Timeout after TIMEOUT seconds."
echo " -l,--looptimeout LOOP_COUNT -- Timeout after checking COMMAND LOOP_COUNT times."
echo " -d,--delay SLEEP_TIME -- Seconds to sleep between checks of COMMAND."
echo " -s,--success-match -- Output that indicates a success."
echo " -f,--fail-match -- Output that indicates a short-circuit failure."
echo
echo "Execute the command in a loop until it succeeds, a timeout is reached, or"
echo "a short-circuit failure occurs. Between each check of the command sleep for"
echo "the number of seconds specified by SLEEP_TIME."
echo echo
echo "Examples:" echo "Examples:"
echo " wait_for 30 10 ping -c 1 192.0.2.2" echo " wait_for -w 300 --delay 10 -- ping -c 1 192.0.2.2"
echo " wait_for 10 1 ls file_we_are_waiting_for" echo " wait_for -w 10 --delay 1 -- ls file_we_are_waiting_for"
echo " wait_for 10 3 date \| grep 8" echo " wait_for -w 30 --delay 3 -- date \| grep 8"
echo " FAIL_MATCH_OUTPUT=CREATE_FAILED wait_for 30 10 heat stack-show undercloud" echo " wait_for -w 300 --delay 10 --fail-match CREATE_FAILED -- heat stack-show undercloud"
echo " SUCCESSFUL_MATCH_OUTPUT=CREATE_COMPLETE wait_for 30 10 heat stack-show undercloud" echo " wait_for -w 300 --delay 10 --success-match CREATE_COMPLETE -- heat stack-show undercloud"
exit 1 exit $EXITVAL
} }
USE_WALLTIME=
TIMEOUT=
DELAY=
LOOPS=${1:-""} if [ -n "${SUCCESSFUL_MATCH_OUTPUT:-}" ]; then
SLEEPTIME=${2:-""} echo "DEPRECATION WARNING: Using env vars for specifying SUCCESSFUL_MATCH_OUTPUT is deprecated."
FAIL_MATCH_OUTPUT=${FAIL_MATCH_OUTPUT:-""} fi
SUCCESSFUL_MATCH_OUTPUT=${SUCCESSFUL_MATCH_OUTPUT:-""} SUCCESSFUL_MATCH_OUTPUT=${SUCCESSFUL_MATCH_OUTPUT:-""}
shift 2 || true if [ -n "${FAIL_MATCH_OUTPUT:-}" ]; then
echo "DEPRECATION WARNING: Using env vars for specifying FAIL_MATCH_OUTPUT is deprecated."
fi
FAIL_MATCH_OUTPUT=${FAIL_MATCH_OUTPUT:-""}
USE_ARGPARSE=0
# We have to support positional arguments for backwards compat
if [ -n "$1" -a "${1:0:1}" == "-" ]; then
USE_ARGPARSE=1
else
echo "DEPRECATION WARNING: Using positional arguments for wait_for is deprecated."
fi
if [ $USE_ARGPARSE -eq 1 ]; then
set +e
TEMP=$(getopt -o h,w:,l:,d:,s:,f: -l help,walltime:,looptimeout:,delay:,success-match:,fail-match: -n $SCRIPT_NAME -- "$@")
if [ $? != 0 ] ; then show_options ; fi
set -e
# Note the quotes around `$TEMP': they are essential!
eval set -- "$TEMP"
while true ; do
case "$1" in
-h) show_options 0;;
--help) show_options 0;;
-w|--walltime) [ -n "$USE_WALLTIME" ] && show_options
USE_WALLTIME=1
TIMEOUT="$2"
shift 2
;;
-l|--looptimeout) [ -n "$USE_WALLTIME" ] && show_options
USE_WALLTIME=0
TIMEOUT="$2"
shift 2
;;
-d|--delay) DELAY="$2"; shift 2;;
-s|--success-match) SUCCESSFUL_MATCH_OUTPUT="$2"; shift 2;;
-f|--fail-match) FAIL_MATCH_OUTPUT="$2"; shift 2;;
--) shift ; break ;;
esac
done
else
TIMEOUT=${1:-""}
DELAY=${2:-""}
USE_WALLTIME=0
shift 2 || true
fi
COMMAND="$@" COMMAND="$@"
if [ -z "$LOOPS" -o -z "$SLEEPTIME" -o -z "$COMMAND" ]; then if [ -z "$TIMEOUT" -o -z "$DELAY" -o -z "$COMMAND" ]; then
show_options show_options
fi fi
i=0 ENDTIME=$(($(date +%s) + $TIMEOUT))
while [ $i -lt $LOOPS ]; do TIME_REMAINING=0
i=$((i + 1)) function update_time_remaining() {
CUR_TIME="$(date +%s)"
TIME_REMAINING=$(($ENDTIME - $CUR_TIME))
}
OUTPUT=
function check_cmd() {
STATUS=0 STATUS=0
OUTPUT=$(eval $COMMAND 2>&1) || STATUS=$? OUTPUT=$(eval $COMMAND 2>&1) || STATUS=$?
if [[ -n "$SUCCESSFUL_MATCH_OUTPUT" ]] \ if [[ -n "$SUCCESSFUL_MATCH_OUTPUT" ]] \
@ -72,10 +138,40 @@ while [ $i -lt $LOOPS ]; do
# it's output so we have finished waiting. # it's output so we have finished waiting.
exit 0 exit 0
fi fi
}
sleep $SLEEPTIME i=0
while [ $USE_WALLTIME -eq 1 -o $i -lt $TIMEOUT ]; do
if [ $USE_WALLTIME -eq 1 ]; then
update_time_remaining
if [ $TIME_REMAINING -le 0 ]; then
break
fi
else
i=$((i + 1))
fi
check_cmd
if [ $USE_WALLTIME -eq 1 ]; then
update_time_remaining
if [ $TIME_REMAINING -lt $DELAY ]; then
if [ $TIME_REMAINING -gt 0 ]; then
sleep $TIME_REMAINING
check_cmd
fi
else
sleep $DELAY
fi
else
sleep $DELAY
fi
done done
SECONDS=$((LOOPS * SLEEPTIME)) if [ $USE_WALLTIME -eq 1 ]; then
SECONDS=$TIMEOUT
else
SECONDS=$((TIMEOUT * DELAY))
fi
printf 'Timing out after %d seconds:\nCOMMAND=%s\nOUTPUT=%s\n' \ printf 'Timing out after %d seconds:\nCOMMAND=%s\nOUTPUT=%s\n' \
"$SECONDS" "$COMMAND" "$OUTPUT" "$SECONDS" "$COMMAND" "$OUTPUT"
exit 1 exit 1

View File

@ -19,18 +19,25 @@ set -eu
SCRIPT_NAME=$(basename $0) SCRIPT_NAME=$(basename $0)
USE_WALLTIME="-l"
if [ -n "$1" -a "$1" = "-w" ]; then
USE_WALLTIME="-w"
shift 1
fi
LOOPS=${1:-""} LOOPS=${1:-""}
SLEEPTIME=${2:-""} SLEEPTIME=${2:-""}
STACK_NAME=${3:-""} STACK_NAME=${3:-""}
if [ -z "$LOOPS" -o -z "$SLEEPTIME" -o -z "$STACK_NAME" ]; then if [ -z "$LOOPS" -o -z "$SLEEPTIME" -o -z "$STACK_NAME" ]; then
echo "Usage: $SCRIPT_NAME LOOPS_NUMBER SLEEP_TIME STACK_NAME" echo "Usage: $SCRIPT_NAME [-w] LOOPS_NUMBER SLEEP_TIME STACK_NAME"
exit 1 exit 1
fi fi
SUCCESSFUL_MATCH_OUTPUT="(CREATE|UPDATE)_COMPLETE" SUCCESSFUL_MATCH_OUTPUT="(CREATE|UPDATE)_COMPLETE"
FAIL_MATCH_OUTPUT="(CREATE|UPDATE)_FAILED" FAIL_MATCH_OUTPUT="(CREATE|UPDATE)_FAILED"
SUCCESSFUL_MATCH_OUTPUT=$SUCCESSFUL_MATCH_OUTPUT \
FAIL_MATCH_OUTPUT=$FAIL_MATCH_OUTPUT \ wait_for $USE_WALLTIME $1 --delay $2 \
wait_for $1 $2 \ --success-match $SUCCESSFUL_MATCH_OUTPUT \
--fail-match $FAIL_MATCH_OUTPUT -- \
"heat stack-show $STACK_NAME | awk '/stack_status / { print \$4 }'" "heat stack-show $STACK_NAME | awk '/stack_status / { print \$4 }'"