
After some investigations, the mysql connection is not persistent and is only established when heat API is used by a client. By checking if the TCP connection is actually established, we have a fail every time Pacemaker checks. If no Heat API requests have been sent for some time, Heat will be considered as failed and will be restart which is not clean. This patch aims to disable this check and only let AMQP connection check. Signed-off-by: Emilien Macchi <emilien.macchi@enovance.com>
358 lines
13 KiB
Bash
358 lines
13 KiB
Bash
#!/bin/sh
|
|
#
|
|
#
|
|
# OpenStack Orchestration Engine Service (heat-engine)
|
|
#
|
|
# Description: Manages an OpenStack Orchestration Engine Service (heat-engine) process as an HA resource
|
|
#
|
|
# Authors: Emilien Macchi
|
|
#
|
|
# Support: openstack@lists.launchpad.net
|
|
# License: Apache Software License (ASL) 2.0
|
|
#
|
|
#
|
|
# See usage() function below for more details ...
|
|
#
|
|
# OCF instance parameters:
|
|
# OCF_RESKEY_binary
|
|
# OCF_RESKEY_config
|
|
# OCF_RESKEY_user
|
|
# OCF_RESKEY_pid
|
|
# OCF_RESKEY_monitor_binary
|
|
# OCF_RESKEY_amqp_server_port
|
|
# OCF_RESKEY_zeromq
|
|
# OCF_RESKEY_additional_parameters
|
|
#######################################################################
|
|
# Initialization:
|
|
|
|
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
|
|
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
|
|
|
|
#######################################################################
|
|
|
|
# Fill in some defaults if no values are specified
|
|
|
|
OCF_RESKEY_binary_default="heat-engine"
|
|
OCF_RESKEY_config_default="/etc/heat/heat.conf"
|
|
OCF_RESKEY_user_default="heat"
|
|
OCF_RESKEY_pid_default="$HA_RSCTMP/$OCF_RESOURCE_INSTANCE.pid"
|
|
OCF_RESKEY_amqp_server_port_default="5672"
|
|
OCF_RESKEY_zeromq_default="false"
|
|
|
|
: ${OCF_RESKEY_binary=${OCF_RESKEY_binary_default}}
|
|
: ${OCF_RESKEY_config=${OCF_RESKEY_config_default}}
|
|
: ${OCF_RESKEY_user=${OCF_RESKEY_user_default}}
|
|
: ${OCF_RESKEY_pid=${OCF_RESKEY_pid_default}}
|
|
: ${OCF_RESKEY_amqp_server_port=${OCF_RESKEY_amqp_server_port_default}}
|
|
: ${OCF_RESKEY_zeromq=${OCF_RESKEY_zeromq_default}}
|
|
|
|
#######################################################################
|
|
|
|
usage() {
|
|
cat <<UEND
|
|
usage: $0 (start|stop|validate-all|meta-data|status|monitor)
|
|
|
|
$0 manages an OpenStack Orchestration Engine Service (heat-engine) process as an HA resource
|
|
|
|
The 'start' operation starts the heat-engine service.
|
|
The 'stop' operation stops the heat-engine service.
|
|
The 'validate-all' operation reports whether the parameters are valid
|
|
The 'meta-data' operation reports this RA's meta-data information
|
|
The 'status' operation reports whether the heat-engine service is running
|
|
The 'monitor' operation reports whether the heat-engine service seems to be working
|
|
|
|
UEND
|
|
}
|
|
|
|
meta_data() {
|
|
cat <<END
|
|
<?xml version="1.0"?>
|
|
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
|
|
<resource-agent name="heat-engine">
|
|
<version>1.0</version>
|
|
|
|
<longdesc lang="en">
|
|
Resource agent for the OpenStack Orchestration Engine Service (heat-engine)
|
|
May manage a heat-engine instance or a clone set that
|
|
creates a distributed heat-engine cluster.
|
|
</longdesc>
|
|
<shortdesc lang="en">Manages the OpenStack Orchestration Engine Service (heat-engine)</shortdesc>
|
|
<parameters>
|
|
|
|
<parameter name="binary" unique="0" required="0">
|
|
<longdesc lang="en">
|
|
Location of the OpenStack Orchestration Engine server binary (heat-engine)
|
|
</longdesc>
|
|
<shortdesc lang="en">OpenStack Orchestration Engine server binary (heat-engine)</shortdesc>
|
|
<content type="string" default="${OCF_RESKEY_binary_default}" />
|
|
</parameter>
|
|
|
|
<parameter name="config" unique="0" required="0">
|
|
<longdesc lang="en">
|
|
Location of the OpenStack Orchestration Engine Service (heat-engine) configuration file
|
|
</longdesc>
|
|
<shortdesc lang="en">OpenStack Orchestration Engine (heat-engine) config file</shortdesc>
|
|
<content type="string" default="${OCF_RESKEY_config_default}" />
|
|
</parameter>
|
|
|
|
<parameter name="user" unique="0" required="0">
|
|
<longdesc lang="en">
|
|
User running OpenStack Orchestration Engine Service (heat-engine)
|
|
</longdesc>
|
|
<shortdesc lang="en">OpenStack Orchestration Engine Service (heat-engine) user</shortdesc>
|
|
<content type="string" default="${OCF_RESKEY_user_default}" />
|
|
</parameter>
|
|
|
|
<parameter name="pid" unique="0" required="0">
|
|
<longdesc lang="en">
|
|
The pid file to use for this OpenStack Orchestration Engine Service (heat-engine) instance
|
|
</longdesc>
|
|
<shortdesc lang="en">OpenStack Orchestration Engine Service (heat-engine) pid file</shortdesc>
|
|
<content type="string" default="${OCF_RESKEY_pid_default}" />
|
|
</parameter>
|
|
|
|
<parameter name="amqp_server_port" unique="0" required="0">
|
|
<longdesc lang="en">
|
|
The listening port number of the AMQP server. Use for monitoring purposes
|
|
</longdesc>
|
|
<shortdesc lang="en">AMQP listening port</shortdesc>
|
|
<content type="integer" default="${OCF_RESKEY_amqp_server_port_default}" />
|
|
</parameter>
|
|
|
|
<parameter name="zeromq" unique="0" required="0">
|
|
<longdesc lang="en">
|
|
If zeromq is used, this will disable the connection test to the AMQP server. Use for monitoring purposes
|
|
</longdesc>
|
|
<shortdesc lang="en">Zero-MQ usage</shortdesc>
|
|
<content type="boolean" default="${OCF_RESKEY_zeromq_default}" />
|
|
</parameter>
|
|
|
|
<parameter name="additional_parameters" unique="0" required="0">
|
|
<longdesc lang="en">
|
|
Additional parameters to pass on to the OpenStack Orchestration Engine Service (heat-engine)
|
|
</longdesc>
|
|
<shortdesc lang="en">Additional parameters for heat-engine</shortdesc>
|
|
<content type="string" />
|
|
</parameter>
|
|
|
|
</parameters>
|
|
|
|
<actions>
|
|
<action name="start" timeout="20" />
|
|
<action name="stop" timeout="20" />
|
|
<action name="status" timeout="20" />
|
|
<action name="monitor" timeout="30" interval="20" />
|
|
<action name="validate-all" timeout="5" />
|
|
<action name="meta-data" timeout="5" />
|
|
</actions>
|
|
</resource-agent>
|
|
END
|
|
}
|
|
|
|
#######################################################################
|
|
# Functions invoked by resource manager actions
|
|
|
|
heat_engine_check_port() {
|
|
# This function has been taken from the squid RA and improved a bit
|
|
# The length of the integer must be 4
|
|
# Examples of valid port: "1080", "0080"
|
|
# Examples of invalid port: "1080bad", "0", "0000", ""
|
|
|
|
local int
|
|
local cnt
|
|
|
|
int="$1"
|
|
cnt=${#int}
|
|
echo $int |egrep -qx '[0-9]+(:[0-9]+)?(,[0-9]+(:[0-9]+)?)*'
|
|
|
|
if [ $? -ne 0 ] || [ $cnt -ne 4 ]; then
|
|
ocf_log err "Invalid port number: $1"
|
|
exit $OCF_ERR_CONFIGURED
|
|
fi
|
|
}
|
|
|
|
heat_engine_validate() {
|
|
local rc
|
|
|
|
check_binary $OCF_RESKEY_binary
|
|
check_binary netstat
|
|
heat_engine_check_port $OCF_RESKEY_amqp_server_port
|
|
|
|
# A config file on shared storage that is not available
|
|
# during probes is OK.
|
|
if [ ! -f $OCF_RESKEY_config ]; then
|
|
if ! ocf_is_probe; then
|
|
ocf_log err "Config $OCF_RESKEY_config doesn't exist"
|
|
return $OCF_ERR_INSTALLED
|
|
fi
|
|
ocf_log_warn "Config $OCF_RESKEY_config not available during a probe"
|
|
fi
|
|
|
|
getent passwd $OCF_RESKEY_user >/dev/null 2>&1
|
|
rc=$?
|
|
if [ $rc -ne 0 ]; then
|
|
ocf_log err "User $OCF_RESKEY_user doesn't exist"
|
|
return $OCF_ERR_INSTALLED
|
|
fi
|
|
|
|
true
|
|
}
|
|
|
|
heat_engine_status() {
|
|
local pid
|
|
local rc
|
|
|
|
if [ ! -f $OCF_RESKEY_pid ]; then
|
|
ocf_log info "OpenStack Orchestration Engine (heat-engine) is not running"
|
|
return $OCF_NOT_RUNNING
|
|
else
|
|
pid=`cat $OCF_RESKEY_pid`
|
|
fi
|
|
|
|
ocf_run -warn kill -s 0 $pid
|
|
rc=$?
|
|
if [ $rc -eq 0 ]; then
|
|
return $OCF_SUCCESS
|
|
else
|
|
ocf_log info "Old PID file found, but OpenStack Orchestration Engine (heat-engine) is not running"
|
|
return $OCF_NOT_RUNNING
|
|
fi
|
|
}
|
|
|
|
heat_engine_monitor() {
|
|
local rc
|
|
local pid
|
|
local rc_amqp
|
|
local engine_amqp_check
|
|
|
|
heat_engine_status
|
|
rc=$?
|
|
|
|
# If status returned anything but success, return that immediately
|
|
if [ $rc -ne $OCF_SUCCESS ]; then
|
|
return $rc
|
|
fi
|
|
|
|
# Check the connections according to the PID.
|
|
# We are sure to hit the heat-engine process and not other heat process with the same connection behavior (for example heat-api)
|
|
if ocf_is_true "$OCF_RESKEY_zeromq"; then
|
|
pid=`cat $OCF_RESKEY_pid`
|
|
engine_amqp_check=`netstat -punt | grep -s "$OCF_RESKEY_amqp_server_port" | grep -s "$pid" | grep -qs "ESTABLISHED"`
|
|
rc_amqp=$?
|
|
if [ $rc_amqp -ne 0 ]; then
|
|
ocf_log err "Heat Engine is not connected to the AMQP server: AMQP connection test returned $rc_amqp"
|
|
return $OCF_NOT_RUNNING
|
|
fi
|
|
fi
|
|
|
|
ocf_log debug "OpenStack Orchestration Engine (heat-engine) monitor succeeded"
|
|
return $OCF_SUCCESS
|
|
}
|
|
|
|
heat_engine_start() {
|
|
local rc
|
|
|
|
heat_engine_status
|
|
rc=$?
|
|
if [ $rc -eq $OCF_SUCCESS ]; then
|
|
ocf_log info "OpenStack Orchestration Engine (heat-engine) already running"
|
|
return $OCF_SUCCESS
|
|
fi
|
|
|
|
# run the actual heat-engine daemon. Don't use ocf_run as we're sending the tool's output
|
|
# straight to /dev/null anyway and using ocf_run would break stdout-redirection here.
|
|
su ${OCF_RESKEY_user} -s /bin/sh -c "${OCF_RESKEY_binary} --config-file=$OCF_RESKEY_config \
|
|
$OCF_RESKEY_additional_parameters"' >> /dev/null 2>&1 & echo $!' > $OCF_RESKEY_pid
|
|
|
|
# Spin waiting for the server to come up.
|
|
while true; do
|
|
heat_engine_monitor
|
|
rc=$?
|
|
[ $rc -eq $OCF_SUCCESS ] && break
|
|
if [ $rc -ne $OCF_NOT_RUNNING ]; then
|
|
ocf_log err "OpenStack Orchestration Engine (heat-engine) start failed"
|
|
exit $OCF_ERR_GENERIC
|
|
fi
|
|
sleep 1
|
|
done
|
|
|
|
ocf_log info "OpenStack Orchestration Engine (heat-engine) started"
|
|
return $OCF_SUCCESS
|
|
}
|
|
|
|
heat_engine_stop() {
|
|
local rc
|
|
local pid
|
|
|
|
heat_engine_status
|
|
rc=$?
|
|
if [ $rc -eq $OCF_NOT_RUNNING ]; then
|
|
ocf_log info "OpenStack Orchestration Engine (heat-engine) already stopped"
|
|
return $OCF_SUCCESS
|
|
fi
|
|
|
|
# Try SIGTERM
|
|
pid=`cat $OCF_RESKEY_pid`
|
|
ocf_run kill -s TERM $pid
|
|
rc=$?
|
|
if [ $rc -ne 0 ]; then
|
|
ocf_log err "OpenStack Orchestration Engine (heat-engine) couldn't be stopped"
|
|
exit $OCF_ERR_GENERIC
|
|
fi
|
|
|
|
# stop waiting
|
|
shutdown_timeout=15
|
|
if [ -n "$OCF_RESKEY_CRM_meta_timeout" ]; then
|
|
shutdown_timeout=$((($OCF_RESKEY_CRM_meta_timeout/1000)-5))
|
|
fi
|
|
count=0
|
|
while [ $count -lt $shutdown_timeout ]; do
|
|
heat_engine_status
|
|
rc=$?
|
|
if [ $rc -eq $OCF_NOT_RUNNING ]; then
|
|
break
|
|
fi
|
|
count=`expr $count + 1`
|
|
sleep 1
|
|
ocf_log debug "OpenStack Orchestration Engine (heat-engine) still hasn't stopped yet. Waiting ..."
|
|
done
|
|
|
|
heat_engine_status
|
|
rc=$?
|
|
if [ $rc -ne $OCF_NOT_RUNNING ]; then
|
|
# SIGTERM didn't help either, try SIGKILL
|
|
ocf_log info "OpenStack Orchestration Engine (heat-engine) failed to stop after ${shutdown_timeout}s \
|
|
using SIGTERM. Trying SIGKILL ..."
|
|
ocf_run kill -s KILL $pid
|
|
fi
|
|
|
|
ocf_log info "OpenStack Orchestration Engine (heat-engine) stopped"
|
|
|
|
rm -f $OCF_RESKEY_pid
|
|
|
|
return $OCF_SUCCESS
|
|
}
|
|
|
|
#######################################################################
|
|
|
|
case "$1" in
|
|
meta-data) meta_data
|
|
exit $OCF_SUCCESS;;
|
|
usage|help) usage
|
|
exit $OCF_SUCCESS;;
|
|
esac
|
|
|
|
# Anything except meta-data and help must pass validation
|
|
heat_engine_validate || exit $?
|
|
|
|
# What kind of method was invoked?
|
|
case "$1" in
|
|
start) heat_engine_start;;
|
|
stop) heat_engine_stop;;
|
|
status) heat_engine_status;;
|
|
monitor) heat_engine_monitor;;
|
|
validate-all) ;;
|
|
*) usage
|
|
exit $OCF_ERR_UNIMPLEMENTED;;
|
|
esac
|