
Most of the openstack processes are containerized so there is no need for them to be included in the patch restart scripts, or the syslog configuration and log rotation files. Story: 2004764 Task: 30668 Change-Id: Ib342fa7b594cdafa5d7c7575044ea28783daf9d0 Signed-off-by: Al Bailey <Al.Bailey@windriver.com>
556 lines
17 KiB
Bash
Executable File
556 lines
17 KiB
Bash
Executable File
#!/bin/bash
|
|
#
|
|
# Copyright (c) 2016-18 Wind River Systems, Inc.
|
|
#
|
|
# SPDX-License-Identifier: Apache-2.0
|
|
#
|
|
##############################################################################
|
|
|
|
script=`basename "$0"`
|
|
|
|
##############################################################################
|
|
#
|
|
# For Patch Writers
|
|
# -----------------
|
|
#
|
|
# This script supports no-reboot process restart from command line list.
|
|
# Must be run as root or y sudo
|
|
# Calling sequence:
|
|
#
|
|
# /usr/sbin/process-restart process1 process2 ... processN
|
|
# if [ $? != 0 ] ; then
|
|
# restart action failed
|
|
#
|
|
###############################################################################
|
|
#
|
|
# For Developers
|
|
# --------------
|
|
#
|
|
# Process restart support can be added to this script by adding your process to the
|
|
# command line parser as a new case based on the process's name in the following form.
|
|
#
|
|
# "[process-name]")
|
|
# process_list =(${process_list[@]} "[monitor] [process-name] [process-alias] [hosttype] [pidfile] [status]
|
|
# ;;
|
|
#
|
|
# Field Descriptions: all fields are manditory
|
|
#
|
|
# monitor : sm or pmon
|
|
# process-name : the name of the process
|
|
# - for sm monitored processes, this must be unique, but does
|
|
# not need to match the actual binary name
|
|
# - for pmon monitored processes, this must be unique and
|
|
# must match the actual binary name
|
|
# process-alias: the alias name that SM uses instead of the actual process name
|
|
# - valid for sm only
|
|
# - if its the same as the process name then make it the same
|
|
# hosttype : the supported hosttypes are ; stack with commas
|
|
# - all ...... all nodetypes
|
|
# - controller . controllers
|
|
# - storage .... storage nodes
|
|
# - compute .... compute nodes
|
|
# pidfile : the path to and name of the process's pidfile
|
|
# status : set as 0
|
|
#
|
|
# Example: based on sysinv-api which is monitored by sm, only runs on the controller
|
|
# and has an sm alias.
|
|
#
|
|
# "sysinv-api")
|
|
# process_list =(${process_list[@] } "sm:sysinv-api:sysinv-inv:controller:/var/run/sysinv-api.pid:0")
|
|
# ;;
|
|
#
|
|
# start with empty process restart control structure
|
|
declare process_list=""
|
|
declare pids=""
|
|
|
|
# pull in loginfo and nodetype
|
|
. /etc/patching/patch-functions
|
|
. /etc/platform/platform.conf
|
|
|
|
#
|
|
# Declare an overall script return code
|
|
#
|
|
declare -i GLOBAL_RC=$PATCH_STATUS_FAILED
|
|
|
|
# if set with -c or --clean options then the flag files for
|
|
# each process are removed at the start.
|
|
CLEAN=false
|
|
|
|
#
|
|
# if set with -p or --parallel options then restart each process in parallel
|
|
PARALLEL=false
|
|
|
|
#
|
|
# Completion status ; stored in PID index
|
|
#
|
|
DISABLED="disabled"
|
|
NOPID="not-running"
|
|
SKIPPED="skipped"
|
|
RESTARTED="restarted"
|
|
|
|
#
|
|
# process query and restart executables
|
|
#
|
|
SM_RESTART_EXEC="sm-restart-safe"
|
|
SM_QUERY_EXEC="sm-query"
|
|
PMON_RESTART_EXEC="pmon-restart"
|
|
|
|
#
|
|
# sleep delays (seconds)
|
|
#
|
|
SM_SLEEP=5
|
|
PMON_SLEEP=2
|
|
MONITOR_SLEEP=2
|
|
|
|
#
|
|
# Struct indexes
|
|
#
|
|
MONITOR_INDEX=0
|
|
PROCESS_INDEX=1
|
|
ALIAS_INDEX=2
|
|
HOSTTYPE_INDEX=3
|
|
PIDFILE_INDEX=4
|
|
STATUS_INDEX=5
|
|
|
|
|
|
#
|
|
# update_status: update the specified process index's status field
|
|
#
|
|
# ${1} = process list index
|
|
# ${2} = status
|
|
#
|
|
function update_status {
|
|
DAEMON=${process_list[${1}]}
|
|
info=(${DAEMON//:/ })
|
|
process_list[${1}]="${info[${MONITOR_INDEX}]}:${info[${PROCESS_INDEX}]}:${info[${ALIAS_INDEX}]}:${info[${HOSTTYPE_INDEX}]}:${info[${PIDFILE_INDEX}]}:${2}"
|
|
}
|
|
|
|
|
|
#
|
|
# print the list of processes that this script supports restart of
|
|
#
|
|
function print_list {
|
|
printf "\nThis restart script supports post patching restart the following processes ...\n\n"
|
|
list=$(fgrep "process_list=(${process_list[@]}" ${0} | grep -v grep | cut -f 2 -d ':')
|
|
printf "${list}\n\n"
|
|
}
|
|
|
|
|
|
#
|
|
# print the command and option syntax as well as the list of processes supported by this script
|
|
#
|
|
function print_help {
|
|
printf "\nTiS patching process restart script.\n"
|
|
printf "\n%s {-options} [processes ...]\n" "${script}"
|
|
printf "\noptions: -l or --list prints a list of supported processes\n"
|
|
print_list
|
|
}
|
|
|
|
#
|
|
# patching.log banner for this script
|
|
#
|
|
loginfo "------------------------------------------"
|
|
loginfo "No-Reboot Patching Process Restart Request"
|
|
|
|
#
|
|
# Option and process list parser
|
|
# Build the process list.
|
|
# All arguements should be a valid process name, not the SM alias.
|
|
# See the list below for supported process names.
|
|
#
|
|
while [[ ${#} > 0 ]]
|
|
do
|
|
process="${1}"
|
|
case $process in
|
|
-h|--help)
|
|
print_help
|
|
exit 0
|
|
;;
|
|
-l|--list)
|
|
print_list
|
|
exit 0
|
|
;;
|
|
|
|
-c|--clean)
|
|
CLEAN=true
|
|
;;
|
|
|
|
-p|--parallel)
|
|
PARALLEL=true
|
|
;;
|
|
|
|
# Sysinv processes
|
|
"sysinv-conductor")
|
|
process_list=(${process_list[@]} "sm:sysinv-conductor:sysinv-conductor:controller:/var/run/sysinv-conductor.pid:0")
|
|
;;
|
|
"sysinv-api")
|
|
process_list=(${process_list[@]} "sm:sysinv-api:sysinv-inv:controller:/var/run/sysinv-api.pid:0")
|
|
;;
|
|
"sysinv-agent")
|
|
process_list=(${process_list[@]} "pmon:sysinv-agent:sysinv-agent:all:/var/run/sysinv-agent.pid:0")
|
|
;;
|
|
# Keystone processes
|
|
"keystone")
|
|
process_list=(${process_list[@]} "sm:keystone:keystone:controller:/var/run/openstack-keystone.pid:0")
|
|
;;
|
|
# Barbican processes
|
|
"barbican-api")
|
|
process_list=(${process_list[@]} "sm:barbican-api:barbican-api:controller:/var/run/barbican/pid:0")
|
|
;;
|
|
"barbican-keystone-listener")
|
|
process_list=(${process_list[@]} "sm:barbican-keystone-listener:barbican-keystone-listener:controller:/var/run/resource-agents/barbican-keystone-listener.pid:0")
|
|
;;
|
|
"barbican-worker")
|
|
process_list=(${process_list[@]} "sm:barbican-worker:barbican-worker:controller:/var/run/resource-agents/barbican-worker.pid:0")
|
|
;;
|
|
# IO-Monitor process
|
|
"io-monitor-manager")
|
|
process_list=(${process_list[@]} "pmon:io-monitor-manager:io-monitor-manager:controller:/var/run/io-monitor/io-monitor-manager.pid:0")
|
|
;;
|
|
# Vim processes
|
|
"nfv-vim")
|
|
process_list=(${process_list[@]} "sm:nfv-vim:vim:controller:/var/run/nfv-vim.pid:0")
|
|
;;
|
|
"nfv-vim-api")
|
|
process_list=(${process_list[@]} "sm:nfv-vim-api:vim-api:controller:/var/run/nfv-vim-api.pid:0")
|
|
;;
|
|
"nfv-vim-webserver")
|
|
process_list=(${process_list[@]} "sm:nfv-vim-webserver:vim-webserver:controller:/var/run/nfv-vim-webserver.pid:0")
|
|
;;
|
|
# Distributed Cloud processes
|
|
"dcmanager-manager")
|
|
process_list=(${process_list[@]} "sm:dcmanager-manager:dcmanager-manager:controller:/var/run/resource-agents/dcmanager-manager.pid:0")
|
|
;;
|
|
"dcmanager-api")
|
|
process_list=(${process_list[@]} "sm:dcmanager-api:dcmanager-api:controller:/var/run/resource-agents/dcmanager-api.pid:0")
|
|
;;
|
|
"dcorch-engine")
|
|
process_list=(${process_list[@]} "sm:dcorch-engine:dcorch-engine:controller:/var/run/resource-agents/dcorch-engine.pid:0")
|
|
;;
|
|
"dcorch-snmp")
|
|
process_list=(${process_list[@]} "sm:dcorch-snmp:dcorch-snmp:controller:/var/run/resource-agents/dcorch-snmp.pid:0")
|
|
;;
|
|
"dcorch-sysinv-api-proxy")
|
|
process_list=(${process_list[@]} "sm:dcorch-sysinv-api-proxy:dcorch-sysinv-api-proxy:controller:/var/run/resource-agents/dcorch-sysinv-api-proxy.pid:0")
|
|
;;
|
|
"dcorch-patch-api-proxy")
|
|
process_list=(${process_list[@]} "sm:dcorch-patch-api-proxy:dcorch-patch-api-proxy:controller:/var/run/resource-agents/dcorch-patch-api-proxy.pid:0")
|
|
;;
|
|
"collectd")
|
|
process_list=(${process_list[@]} "pmon:collectd:collectd:all:/var/run/collectd.pid:0")
|
|
;;
|
|
"influxdb")
|
|
process_list=(${process_list[@]} "pmon:influxdb:influxdb:all:/var/run/influxdb/influxdb.pid:0")
|
|
;;
|
|
|
|
*)
|
|
echo "Unknown process:${process}"
|
|
loginfo "Unknown process:${process}"
|
|
;;
|
|
esac
|
|
shift
|
|
done
|
|
|
|
# Assume we are done until we know we are not
|
|
__done=true
|
|
|
|
if [ -n "${process_list}" ] ; then
|
|
|
|
# Record current process IDs
|
|
index=0
|
|
for DAEMON in "${process_list[@]}"
|
|
do
|
|
info=(${DAEMON//:/ })
|
|
|
|
monitor="${info[${MONITOR_INDEX}]}"
|
|
pidfile="${info[${PIDFILE_INDEX}]}"
|
|
hosttype="${info[${HOSTTYPE_INDEX}]}"
|
|
process="${info[${PROCESS_INDEX}]}"
|
|
alias="${info[${ALIAS_INDEX}]}"
|
|
stat="${info[${STATUS_INDEX}]}"
|
|
|
|
if [ "${CLEAN}" = true ] ; then
|
|
rm -f $PATCH_FLAGDIR/${process}.restarted
|
|
fi
|
|
|
|
# default to not skipping this process
|
|
skip=true
|
|
|
|
# filter out based on current nodetype and specified hosttype
|
|
if [ "${hosttype}" == "all" ] ; then
|
|
skip=false
|
|
else
|
|
|
|
# check for controller function
|
|
if [[ ${hosttype} == *"controller"* ]] ; then
|
|
if [[ ${nodetype} == *"controller"* ]] ; then
|
|
skip=false
|
|
fi
|
|
fi
|
|
|
|
# Check for compute as subfunction
|
|
if [[ "${subfunction}" == *"compute"* ]] ; then
|
|
if [[ $hosttype} == *"compute"* ]] ; then
|
|
skip=false
|
|
fi
|
|
fi
|
|
|
|
# check for compute as main function
|
|
if [[ ${hosttype} == *"compute"* ]] ; then
|
|
if [[ ${nodetype} == *"compute"* ]] ; then
|
|
skip=false
|
|
fi
|
|
fi
|
|
|
|
# check for storage type
|
|
if [[ ${hosttype} == *"storage"* ]] ; then
|
|
if [[ "${nodetype}" == *"storage"* ]] ; then
|
|
skip=false
|
|
fi
|
|
fi
|
|
fi
|
|
|
|
if [ "${skip}" = true ] ; then
|
|
loginfo "${process} skipped for '${nodetype}' nodetype"
|
|
stat="${SKIPPED}"
|
|
update_status $index "$stat"
|
|
((index++))
|
|
continue
|
|
fi
|
|
|
|
if [ -e ${PATCH_FLAGDIR}/${process}.restarted ] ; then
|
|
|
|
loginfo "${process} restart skipped - already done"
|
|
stat="${SKIPPED}"
|
|
update_status ${index} "${stat}"
|
|
((index++))
|
|
continue
|
|
|
|
else
|
|
|
|
# record the existing PID for log purposes
|
|
if [ -e ${pidfile} ] ; then
|
|
|
|
stat=$(head -1 ${pidfile} 2>/dev/null)
|
|
# check if the pid is running
|
|
kill -0 ${stat} 2>/dev/null
|
|
rc=$?
|
|
if [ ${rc} -ne 0 ] ; then
|
|
loginfo "${process} is not running"
|
|
stat="${NOPID}"
|
|
update_status ${index} "${stat}"
|
|
((index++))
|
|
continue
|
|
fi
|
|
|
|
else
|
|
loginfo "${process} is not running ; missing pidfile"
|
|
stat="${NOPID}"
|
|
update_status ${index} "${stat}"
|
|
((index++))
|
|
continue
|
|
fi
|
|
|
|
#
|
|
# If we get here then we want to restart this process
|
|
# for this node type and the process is running
|
|
#
|
|
#
|
|
# Now manage restart of that process based on what its monitor method is
|
|
#
|
|
if [ "${monitor}" == "sm" ] ; then
|
|
|
|
# Managed/Monitored by SM
|
|
sm_query_result=$(${SM_QUERY_EXEC} service ${alias})
|
|
echo "sm_query_result:${sm_query_result} - alias:${alias}"
|
|
if [[ "${sm_query_result}" == *"enabled-active"* ]] ; then
|
|
|
|
echo "${SM_RESTART_EXEC} of ${process} [pid:${stat}]"
|
|
loginfo "${SM_RESTART_EXEC} of ${process} [pid:${stat}]"
|
|
touch $PATCH_FLAGDIR/${process}.restarted 2>/dev/null
|
|
${SM_RESTART_EXEC} service "${alias}"
|
|
__done=false
|
|
if [ "${PARALLEL}" = true ] ; then
|
|
sleep ${SM_SLEEP} &
|
|
pids="$pids $!"
|
|
else
|
|
sleep ${SM_SLEEP}
|
|
fi
|
|
|
|
elif [[ ${sm_query_result} == *"is enabling"* ]] ; then
|
|
loginfo "sm-restart ${process} ; [in progress] ; [pid:${info[${STATUS_INDEX}]}]"
|
|
stat="${NOPID}"
|
|
else
|
|
loginfo "${process} is not active"
|
|
stat="${DISABLED}"
|
|
fi
|
|
|
|
else
|
|
|
|
# Managed/Monitored by PMON
|
|
echo "${PMON_RESTART_EXEC} of ${process} [pid:${stat}]"
|
|
loginfo "${PMON_RESTART_EXEC} of ${process} [pid:${stat}]"
|
|
touch $PATCH_FLAGDIR/${process}.restarted 2>/dev/null
|
|
${PMON_RESTART_EXEC} ${process}
|
|
__done=false
|
|
if [ "${PARALLEL}" = true ] ; then
|
|
sleep ${PMON_SLEEP} &
|
|
pids="$pids $!"
|
|
else
|
|
sleep ${PMON_SLEEP}
|
|
fi
|
|
|
|
fi
|
|
fi
|
|
|
|
# echo "Monitor:${monitor} Process:${process} Alias:${alias} Node:${hosttype} Pidfile:${pidfile} Status:${stat}"
|
|
|
|
# Save the PID or NOPID status to the process line
|
|
update_status ${index} "${stat}"
|
|
|
|
((index++))
|
|
done
|
|
|
|
# wait for background sleeps
|
|
wait ${pids}
|
|
fi
|
|
|
|
#
|
|
# Now Loop over the process list waiting for all the processes to restart.
|
|
# There is an overall timout of 20 seconds for all the processes to be restarted
|
|
#
|
|
if [ "${__done}" = true ] ; then
|
|
|
|
GLOBAL_RC=$PATCH_STATUS_OK
|
|
loginfo "No-Reboot Patching Process Restart Status: ${GLOBAL_RC} - nothing to do."
|
|
exit ${GLOBAL_RC}
|
|
fi
|
|
|
|
# Monitor the restart of processes
|
|
#
|
|
# Don't want to start from the beginning of the shell
|
|
# Want time zero now plus 30 seconds.
|
|
#
|
|
SECONDS=0
|
|
TIMEOUT=120
|
|
let UNTIL=${SECONDS}+${TIMEOUT}
|
|
loginfo "restart timeout is ${TIMEOUT}"
|
|
|
|
while [ ${UNTIL} -ge ${SECONDS} ]
|
|
do
|
|
if [ "${__done}" = false ] ; then
|
|
index=0
|
|
for DAEMON in "${process_list[@]}"
|
|
do
|
|
info=(${DAEMON//:/ })
|
|
pidfile="${info[${PIDFILE_INDEX}]}"
|
|
process="${info[${PROCESS_INDEX}]}"
|
|
alias="${info[${ALIAS_INDEX}]}"
|
|
stat="${info[${STATUS_INDEX}]}"
|
|
|
|
if [ "${stat}" != "${SKIPPED}" -a "${stat}" != "${RESTARTED}" -a "${stat}" != "${DISABLED}" -a "${stat}" != "${NOPID}" ] ; then
|
|
if [ -e ${pidfile} ] ; then
|
|
|
|
# Get the new PID
|
|
new_pid=$(head -1 ${pidfile} 2>/dev/null)
|
|
|
|
# check if the pid is running
|
|
kill -0 ${new_pid} 2>/dev/null
|
|
if [ $? -eq 0 -a -n ${new_pid} ] ; then
|
|
|
|
# verify the pid is different
|
|
if [ "${stat}" != "${new_pid}" ] ; then
|
|
loginfo "${process} ${RESTARTED} ok [pid:${stat} -> ${new_pid}]"
|
|
stat="${RESTARTED}"
|
|
update_status ${index} "${stat}"
|
|
fi
|
|
fi
|
|
fi
|
|
fi
|
|
((index++))
|
|
done
|
|
|
|
sleep ${MONITOR_SLEEP}
|
|
|
|
# Loop over all proceses looking for complete restarts.
|
|
# Update process struct PID field as status is learned.
|
|
|
|
index=0
|
|
__not_done=false
|
|
for DAEMON in "${process_list[@]}"
|
|
do
|
|
info=(${DAEMON//:/ })
|
|
stat="${info[${STATUS_INDEX}]}"
|
|
if [ "${stat}" != "${SKIPPED}" -a "${stat}" != "${RESTARTED}" -a "${stat}" != "${DISABLED}" -a "${stat}" != "${NOPID}" ] ; then
|
|
__not_done=true
|
|
fi
|
|
((index++))
|
|
done
|
|
|
|
# Exit if done
|
|
if [ "${__not_done}" = false ] ; then
|
|
|
|
__done=true
|
|
GLOBAL_RC=${PATCH_STATUS_OK}
|
|
break
|
|
|
|
fi
|
|
else
|
|
# should not get here but handle anyway
|
|
GLOBAL_RC=${PATCH_STATUS_OK}
|
|
break
|
|
fi
|
|
done
|
|
|
|
logged=false
|
|
for DAEMON in "${process_list[@]}"
|
|
do
|
|
info=(${DAEMON//:/ })
|
|
if [ "${info[${STATUS_INDEX}]}" == "${RESTARTED}" ] ; then
|
|
if [ "${logged}" = false ] ; then
|
|
loginfo "The following processes have been 'restarted'"
|
|
logged=true
|
|
fi
|
|
loginfo "... process: ${info[${PROCESS_INDEX}]}"
|
|
fi
|
|
done
|
|
|
|
logged=false
|
|
for DAEMON in "${process_list[@]}"
|
|
do
|
|
info=(${DAEMON//:/ })
|
|
if [ "${info[${STATUS_INDEX}]}" == "${SKIPPED}" ] ; then
|
|
if [ "${logged}" = false ] ; then
|
|
loginfo "The following processes have been 'skipped'"
|
|
logged=true
|
|
fi
|
|
loginfo "... process: ${info[${PROCESS_INDEX}]}"
|
|
fi
|
|
done
|
|
|
|
if [ "${__done}" = false ] ; then
|
|
loginfo "Process Restart Timeout ; waiting on "
|
|
for DAEMON in "${process_list[@]}"
|
|
do
|
|
info=(${DAEMON//:/ })
|
|
stat="${info[${STATUS_INDEX}]}"
|
|
|
|
if [ "${stat}" == "${SKIPPED}" ] ; then
|
|
((index++))
|
|
elif [ "${stat}" == "${RESTARTED}" ] ; then
|
|
((index++))
|
|
elif [ "${stat}" == "${DISABLED}" ] ; then
|
|
((index++))
|
|
elif [ "${stat}" == "${NOPID}" ] ; then
|
|
((index++))
|
|
else
|
|
loginfo "... process: ${stat}"
|
|
fi
|
|
((index++))
|
|
done
|
|
fi
|
|
|
|
loginfo "No-Reboot Patching Process Restart Status: ${GLOBAL_RC}"
|
|
|
|
exit ${GLOBAL_RC}
|