Introduce dccertmon service

This commit introduces dccertmon, a new managed service for DC
certificate auditing and management.

Currently, platform cert management, DC cert management, and subcloud
cert auditing are coupled into a single platform service (certmon). To
meet the requirements of DC scalability and portability, DC specific
functionality must be decoupled. These changes lay the groundwork
for the new service, by:
- Creating the necessary service files.
- Introducing configs for the service.
- Declaring high level methods (Skeleton - lifecycle and manager)

DC-specific functionality will be migrated to this dccertmon service and
optimized in subsequent changes. Non-DC cert management will continue to
be handled by certmon.

Overall, this commit introduces:
- The OCF file necessary for high availability management of the
  dccertmon service by SM.
- Package configurations to build the service (Package: distributedcloud-dccertmon).
- Lifecycle manager for a running DC cert monitor service.
- Skeleton/base service application logic - CertificateMonitorManager.
- RPC notification handlers for subcloud online/managed.
- Configuration for the log folders and log rotation. The logs
  will be available in /var/log/dccertmon/dccertmon.log.

These changes are part of a set of commits to introduce the dccertmon service:
  [1] https://review.opendev.org/c/starlingx/ha/+/941205
  [2] https://review.opendev.org/c/starlingx/stx-puppet/+/941208

Test Plan:
  - PASS: Build dccertmon package
  - PASS: Install and bootstrap system with custom ISO containing the
          newly created dccertmon package
  - PASS: Verify that the dccertmon.service is loaded
  - PASS: Verify dccertmon is being properly logged to the correct
          folder.
  - PASS: Check logged messages and verify execution of
           - Cert Watcher thread
           - Task Executor (Audit thread)
           - Periodic tasks running at expected intervals
  - PASS: Configure and provision the service using SM and verify
          it has correctly started and can be restarted with
          'sm-restart'.
  - PASS: Tox checks running on dccertmon

  Note: This commit has been tested alongside the related changes and
        their respective test plans. [1][2]

Story: 2011311
Task: 51663

Change-Id: Ic23d8d13e4b292cf0508d23eaae99b8e07f36d31
Signed-off-by: Salman Rana <salman.rana@windriver.com>
This commit is contained in:
Salman Rana 2025-02-11 03:06:31 -05:00
parent 99291be851
commit 7d44c38c90
27 changed files with 820 additions and 9 deletions

View File

@ -6,5 +6,6 @@ distributedcloud-dcdbsync
distributedcloud-dcmanager
distributedcloud-dcorch
distributedcloud-dcagent
distributedcloud-dccertmon
python3-redfish

View File

@ -10,6 +10,7 @@ test_command=OS_STDOUT_CAPTURE=${OS_STDOUT_CAPTURE:-1}
${PYTHON} -m subunit.run discover -s dcmanager $LISTOPT $IDOPTION
${PYTHON} -m subunit.run discover -s dcorch $LISTOPT $IDOPTION
${PYTHON} -m subunit.run discover -s dcagent $LISTOPT $IDOPTION
${PYTHON} -m subunit.run discover -s dccertmon $LISTOPT $IDOPTION
test_id_option=--load-list $IDFILE
test_list_option=--list
test_run_concurrency=echo 5

View File

@ -0,0 +1,10 @@
#
# Copyright (c) 2025 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
import pbr.version
__version__ = pbr.version.VersionInfo("distributedcloud").version_string()

View File

@ -0,0 +1,54 @@
#
# Copyright (c) 2025 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
"""
DC Certificate Monitor Service
"""
import eventlet
eventlet.monkey_patch()
# pylint: disable=wrong-import-position
from oslo_config import cfg # noqa: E402
from oslo_i18n import _lazy # noqa: E402
from oslo_log import log as logging # noqa: E402
from oslo_service import service # noqa: E402
from dccertmon.common import config # noqa: E402
from dcmanager.common import messaging # noqa: E402
# pylint: enable=wrong-import-position
_lazy.enable_lazy()
LOG = logging.getLogger("dccertmon")
CONF = cfg.CONF
def main():
config.generate_config()
logging.register_options(CONF)
CONF(project="dccertmon")
config.register_config_opts()
logging.set_defaults()
logging.setup(CONF, "dccertmon")
messaging.setup()
from dccertmon.common import service as dc_cert_mon
srv = dc_cert_mon.CertificateMonitorService()
launcher = service.launch(cfg.CONF, srv)
LOG.info("Starting...")
LOG.debug("Configuration:")
cfg.CONF.log_opt_values(LOG, logging.DEBUG)
launcher.wait()
if __name__ == "__main__":
main()

View File

@ -0,0 +1,95 @@
#
# Copyright (c) 2025 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
import time
import eventlet
import greenlet
from oslo_config import cfg
from oslo_log import log
from oslo_service import periodic_task
from dccertmon.common import watcher
LOG = log.getLogger(__name__)
CONF = cfg.CONF
class CertificateMonitorManager(periodic_task.PeriodicTasks):
def __init__(self):
super(CertificateMonitorManager, self).__init__(CONF)
self.mon_thread = None
self.worker_thread = None
def on_start(self):
LOG.info("Service Start - prepare for initial audit")
def start_task_executor(self):
self.worker_thread = eventlet.greenthread.spawn(self.worker_task_loop)
self.on_start()
def start_cert_watcher(self):
dc_monitor = None
while True:
try:
dc_monitor = watcher.DC_CertWatcher()
dc_monitor.initialize()
except Exception as e:
LOG.exception(e)
time.sleep(5)
else:
break
# spawn monitor thread
self.mon_thread = eventlet.greenthread.spawn(self.monitor_cert_loop, dc_monitor)
def stop_cert_watcher(self):
if self.mon_thread:
self.mon_thread.kill()
self.mon_thread.wait()
self.mon_thread = None
def stop_task_executor(self):
if self.worker_thread:
self.worker_thread.kill()
self.worker_thread.wait()
self.worker_thread = None
def worker_task_loop(self):
while True:
try:
self.run_periodic_tasks(context=None)
# TODO(srana): Reset sleep after proper implementation
time.sleep(60)
except greenlet.GreenletExit:
break
except Exception as e:
LOG.exception(e)
def monitor_cert_loop(self, monitor):
while True:
# never exit until exit signal received
try:
monitor.start_watch(on_success=None, on_error=None)
except greenlet.GreenletExit:
break
except Exception:
# It shouldn't fall to here, but log and restart if it did
LOG.exception("Unexpected exception from start_watch")
time.sleep(1)
@periodic_task.periodic_task(spacing=CONF.dccertmon.audit_interval)
def audit_sc_cert_start(self, context):
LOG.info("periodic_task: audit_sc_cert_start")
@periodic_task.periodic_task(spacing=5)
def audit_sc_cert_task(self, context):
LOG.info("periodic_task: audit_sc_cert_task")
@periodic_task.periodic_task(spacing=CONF.dccertmon.retry_interval)
def retry_monitor_task(self, context):
LOG.info("periodic_task: retry_monitor_task")

View File

@ -0,0 +1,132 @@
#
# Copyright (c) 2025 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
import configparser
import os
import keyring
from oslo_config import cfg
from dccommon import consts as dccommon_consts
from dcmanager.common import utils
CONF = cfg.CONF
logging_default_format_string = (
"%(process)d %(levelname)s %(name)s [-] %(instance)s%(message)s"
)
config_values = {
"keystone_authtoken": {
"auth_url": "http://controller.internal:5000",
"auth_uri": "http://controller.internal:5000",
"auth_type": "password",
"project_name": "services",
"username": "sysinv",
"password": "None",
"user_domain_name": "Default",
"project_domain_name": "Default",
"interface": "internal",
"region_name": "None",
},
"DEFAULT": {
"syslog_log_facility": "local4",
"use_syslog": "True",
"debug": "False",
"logging_default_format_string": logging_default_format_string,
"logging_debug_format_suffix": "%(pathname)s:%(lineno)d",
"auth_strategy": "keystone",
"transport_url": "None",
},
"dccertmon": {
"retry_interval": "600",
"max_retry": "14",
"audit_interval": "86400",
"startup_audit_all": "False",
"network_retry_interval": "180",
"network_max_retry": "30",
"audit_batch_size": "40",
"audit_greenpool_size": "20",
"certificate_timeout_secs": "5",
},
"endpoint_cache": {
"auth_plugin": "password",
"username": "dcmanager",
"password": "None",
"project_name": "services",
"user_domain_name": "Default",
"project_domain_name": "Default",
"http_connect_timeout": "15",
"auth_uri": "http://controller.internal:5000/v3",
},
}
common_opts = [cfg.StrOpt("host", default="localhost", help="hostname of the machine")]
dc_cert_mon_opts = [
cfg.IntOpt(
"audit_interval",
default=86400, # 24 hours
help="Interval to run certificate audit",
),
cfg.IntOpt(
"retry_interval",
default=10 * 60, # retry every 10 minutes
help="Interval to reattempt accessing external system if failure occurred",
),
]
def register_config_opts():
CONF.register_opts(common_opts)
CONF.register_opts(dc_cert_mon_opts, "dccertmon")
def override_config_values():
rabbit_auth_password = keyring.get_password("amqp", "rabbit")
config_values["keystone_authtoken"]["region_name"] = utils.get_region_name(
"http://controller.internal:6385"
)
config_values["endpoint_cache"]["password"] = keyring.get_password(
"dcmanager", dccommon_consts.SERVICES_USER_NAME
)
config_values["keystone_authtoken"]["password"] = keyring.get_password(
"sysinv", dccommon_consts.SERVICES_USER_NAME
)
config_values["DEFAULT"][
"transport_url"
] = f"rabbit://guest:{rabbit_auth_password}@controller.internal:5672"
def create_conf_file():
output_dir = "/etc/dccertmon"
output_file = os.path.join(output_dir, "dccertmon.conf")
os.makedirs(output_dir, exist_ok=True)
config = configparser.RawConfigParser()
# Populate the config parser with values
for section, options in config_values.items():
config[section] = options
with open(output_file, "w") as f:
config.write(f)
os.chmod(output_file, 0o600)
def generate_config():
# Set dynamic values (e.g., passwords, urls, etc)
override_config_values()
# Create service conf file
create_conf_file()
def list_opts():
yield "dccertmon", dc_cert_mon_opts
yield None, common_opts

View File

@ -0,0 +1,83 @@
#
# Copyright (c) 2025 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
from oslo_config import cfg
from oslo_log import log as logging
import oslo_messaging
from oslo_service import service
from dccertmon.common.certificate_monitor_manager import CertificateMonitorManager
from dccertmon.common import utils
from dcmanager.common import consts
from dcmanager.common import messaging as rpc_messaging
CONF = cfg.CONF
LOG = logging.getLogger(__name__)
class CertificateMonitorService(service.Service):
"""Lifecycle manager for a running DC cert monitor service."""
def __init__(self):
super(CertificateMonitorService, self).__init__()
self.rpc_api_version = consts.RPC_API_VERSION
self.topic = consts.TOPIC_DC_NOTIFICATION
# TODO(srana): Refactor DC role usage due to deprecation.
self.dc_role = utils.DC_ROLE_UNDETECTED
self.manager = CertificateMonitorManager()
self._rpc_server = None
self.target = None
def start(self):
LOG.info("Starting %s", self.__class__.__name__)
super(CertificateMonitorService, self).start()
self._get_dc_role()
self.manager.start_cert_watcher()
self.manager.start_task_executor()
if self.dc_role == utils.DC_ROLE_SYSTEMCONTROLLER:
self.target = oslo_messaging.Target(
version=self.rpc_api_version, server=CONF.host, topic=self.topic
)
self._rpc_server = rpc_messaging.get_rpc_server(self.target, self)
self._rpc_server.start()
def stop(self):
LOG.info("Stopping %s", self.__class__.__name__)
if self.dc_role == utils.DC_ROLE_SYSTEMCONTROLLER:
self._stop_rpc_server()
self.manager.stop_cert_watcher()
self.manager.stop_task_executor()
super(CertificateMonitorService, self).stop()
def _stop_rpc_server(self):
if self._rpc_server:
try:
self._rpc_server.stop()
self._rpc_server.wait()
LOG.info("Engine service stopped successfully")
except Exception as ex:
LOG.error("Failed to stop engine service: %s" % ex)
LOG.exception(ex)
def _get_dc_role(self):
# TODO(srana): Update after migrating from certmon
return utils.DC_ROLE_SYSTEMCONTROLLER
def subcloud_online(self, context, subcloud_name=None):
"""TODO(srana): Trigger a subcloud online audit"""
LOG.info("%s is online." % subcloud_name)
def subcloud_managed(self, context, subcloud_name=None):
"""TODO(srana): Trigger a subcloud audit"""
LOG.info("%s is managed." % subcloud_name)
def subcloud_sysinv_endpoint_update(self, ctxt, subcloud_name, endpoint):
"""TODO(srana): Update sysinv endpoint of dc token cache"""
LOG.info("Update subcloud: %s sysinv endpoint" % subcloud_name)

View File

@ -0,0 +1,26 @@
#
# Copyright (c) 2025 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
# TODO(srana): Refactor DC role usage due to deprecation.
DC_ROLE_UNDETECTED = "unknown"
DC_ROLE_SUBCLOUD = "subcloud"
DC_ROLE_SYSTEMCONTROLLER = "systemcontroller"
DC_ROLE_TIMEOUT_SECONDS = 180
DC_ROLE_DELAY_SECONDS = 5
INVALID_SUBCLOUD_AUDIT_DEPLOY_STATES = [
# Secondary subclouds should not be audited as they are expected
# to be managed by a peer system controller (geo-redundancy feat.)
"create-complete",
"create-failed",
"pre-rehome",
"rehome-failed",
"rehome-pending",
"rehoming",
"secondary",
"secondary-failed",
]

View File

@ -0,0 +1,23 @@
#
# Copyright (c) 2025 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
import time
from oslo_log import log
LOG = log.getLogger(__name__)
class DC_CertWatcher(object):
def __init__(self):
pass
def initialize(self):
LOG.info("initialize DC_CertWatcher")
def start_watch(self, on_success, on_error):
LOG.info("DC_CertWatcher start_watch")
time.sleep(60)

View File

@ -0,0 +1,9 @@
[DEFAULT]
output_file = etc/dccertmon/dccertmon.conf.sample
wrap_width = 79
namespace = dccertmon.cmd.cert_mon
namespace = oslo.messaging
namespace = oslo.middleware
namespace = oslo.log
namespace = oslo.service.service
namespace = oslo.service.periodic_task

View File

@ -1,5 +1,5 @@
# Copyright (c) 2016 Ericsson AB.
# Copyright (c) 2017-2024 Wind River Systems, Inc.
# Copyright (c) 2017-2025 Wind River Systems, Inc.
# All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
@ -23,6 +23,7 @@ TOPIC_DC_MANAGER_STATE = "dcmanager-state"
TOPIC_DC_MANAGER_AUDIT = "dcmanager-audit"
TOPIC_DC_MANAGER_AUDIT_WORKER = "dcmanager-audit-worker"
TOPIC_DC_MANAGER_ORCHESTRATOR = "dcmanager-orchestrator"
TOPIC_DC_NOTIFICATION = "DCMANAGER-NOTIFICATION"
CERTS_VAULT_DIR = "/opt/dc-vault/certs"
PATCH_VAULT_DIR = "/opt/dc-vault/patches"

View File

@ -1,4 +1,4 @@
# Copyright (c) 2017-2024 Wind River Systems, Inc.
# Copyright (c) 2017-2025 Wind River Systems, Inc.
# All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
@ -468,12 +468,9 @@ class DCManagerNotifications(RPCClient):
1.0 - Initial version
"""
DCMANAGER_RPC_API_VERSION = "1.0"
TOPIC_DC_NOTIFICIATION = "DCMANAGER-NOTIFICATION"
def __init__(self, timeout=None):
super(DCManagerNotifications, self).__init__(
timeout, self.TOPIC_DC_NOTIFICIATION, self.DCMANAGER_RPC_API_VERSION
timeout, consts.TOPIC_DC_NOTIFICATION, consts.RPC_API_VERSION
)
def subcloud_online(self, ctxt, subcloud_name):
@ -495,5 +492,5 @@ class DCManagerNotifications(RPCClient):
endpoint=endpoint,
),
fanout=True,
version=self.DCMANAGER_RPC_API_VERSION,
version=consts.RPC_API_VERSION,
)

View File

@ -83,6 +83,14 @@ Depends:
Description: Distributed Cloud DCAgent
Distributed Cloud DCAgent
Package: distributedcloud-dccertmon
Architecture: any
Depends:
${misc:Depends},
${python3:Depends}
Description: Distributed Cloud Certificate Monitor
Distributed Cloud Certificate Monitor
Package: distributedcloud-wheels
Architecture: any
Depends:

View File

@ -0,0 +1 @@
var/log/dccertmon

View File

@ -0,0 +1,5 @@
etc/dccertmon/dccertmon.conf
usr/bin/dccertmon
usr/lib/ocf/resource.d/openstack/dccertmon*
usr/lib/python3/dist-packages/dccertmon/*
usr/lib/tmpfiles.d/dccertmon.conf

View File

@ -1,3 +1,4 @@
d /var/log/dccertmon 0755 root root - -
d /var/log/dcagent 0755 root root - -
d /var/log/dcdbsync 0755 root root - -
d /var/log/dcmanager 0755 root root - -

View File

@ -43,6 +43,7 @@ override_dh_install:
install -p -D -m 644 files/dcorch.conf $(TMP_DIR)/dcorch.conf
install -p -D -m 644 files/dcmanager.conf $(TMP_DIR)/dcmanager.conf
install -p -D -m 644 files/dcagent.conf $(TMP_DIR)/dcagent.conf
install -p -D -m 644 files/dccertmon.conf $(TMP_DIR)/dccertmon.conf
# install systemd unit files for optional second instance
install -p -D -m 644 files/dcdbsync-openstack-api.service $(SYSTEMD_DIR)/dcdbsync-openstack-api.service
@ -67,6 +68,7 @@ override_dh_install:
PYTHONPATH=. oslo-config-generator --config-file=./dcorch/config-generator.conf
PYTHONPATH=. oslo-config-generator --config-file=./dcdbsync/config-generator.conf
PYTHONPATH=. oslo-config-generator --config-file=./dcagent/config-generator.conf
PYTHONPATH=. oslo-config-generator --config-file=./dccertmon/config-generator.conf
# install default config files
oslo-config-generator \
@ -89,6 +91,11 @@ override_dh_install:
--output-file ./dcagent/dcagent.conf.sample
install -p -D -m 640 ./dcagent/dcagent.conf.sample $(SYS_CONF_DIR)/dcagent/dcagent.conf
oslo-config-generator \
--config-file ./dccertmon/config-generator.conf \
--output-file ./dccertmon/dccertmon.conf.sample
install -p -D -m 640 ./dccertmon/dccertmon.conf.sample $(SYS_CONF_DIR)/dccertmon/dccertmon.conf
# install rvmc_install.py script
install -d $(ROOT)/usr/local/bin/
install -p -D -m 700 scripts/rvmc_install.py $(ROOT)/usr/local/bin
@ -98,6 +105,7 @@ override_dh_install:
rm -rf $(ROOT)/usr/lib/python3/dist-packages/dcmanager/tests
rm -rf $(ROOT)/usr/lib/python3/dist-packages/dcorch/tests
rm -rf $(ROOT)/usr/lib/python3/dist-packages/dcagent/tests
rm -rf $(ROOT)/usr/lib/python3/dist-packages/dccertmon/tests
dh_install
@ -109,7 +117,8 @@ override_dh_fixperms:
--exclude etc/dcdbsync/dcdbsync.conf \
--exclude etc/dcmanager/dcmanager.conf \
--exclude etc/dcorch/dcorch.conf \
--exclude etc/dcagent/dcagent.conf
--exclude etc/dcagent/dcagent.conf \
--exclude etc/dccertmon/dccertmon.conf
execute_after_dh_fixperms:
# forcing 600 for /var/opt/dc/ansible

View File

@ -0,0 +1,4 @@
To generate the sample dccertmon.conf file, run the following
command from the top level of the dccertmon directory:
tox -egenconfig

View File

@ -0,0 +1,5 @@
{
"context_is_admin": "role:admin",
"admin_or_owner": "is_admin:True or project_id:%(project_id)s",
"default": "rule:admin_or_owner"
}

View File

@ -0,0 +1 @@
d /var/run/dccertmon 0755 root root -

View File

@ -91,6 +91,21 @@
endscript
}
/var/log/dccertmon/*.log
{
nodateext
size 20M
start 1
rotate 20
missingok
notifempty
compress
sharedscripts
postrotate
systemctl reload syslog-ng > /dev/null 2>&1 || true
endscript
}
/var/log/dcmanager/ansible/*.log
{
nodateext

View File

@ -7,6 +7,7 @@ destination d_dcmanager_orch { file("/var/log/dcmanager/orchestrator.log" templa
destination d_dcorch { file("/var/log/dcorch/dcorch.log" template(t_preformatted)); };
destination d_dcdbsync { file("/var/log/dcdbsync/dcdbsync.log" template(t_preformatted)); };
destination d_dcagent { file("/var/log/dcagent/dcagent.log" template(t_preformatted)); };
destination d_dccertmon { file("/var/log/dccertmon/dccertmon.log" template(t_preformatted)); };
# Distributed Cloud Log Filters
filter f_dcmanagermanager { facility(local4) and program(dcmanager-manager); };
@ -22,6 +23,8 @@ filter f_dcdbsyncapi { facility(local4) and program(dcdbsync-api); };
filter f_dcagentapi { facility(local4) and program(dcagent-api); };
filter f_dccertmon { facility(local4) and program(dccertmon); };
# Distributed Cloud Log Path
log {source(s_src); filter(f_dcmanagermanager); destination(d_dcmanager); };
log {source(s_src); filter(f_dcmanageraudit); destination(d_dcmanager_audit); };
@ -32,4 +35,5 @@ log {source(s_src); filter(f_dcorchengine); destination(d_dcorch); };
log {source(s_src); filter(f_dcorchapiproxy); destination(d_dcorch); };
log {source(s_src); filter(f_dcdbsyncapi); destination(d_dcdbsync); };
log {source(s_src); filter(f_dcagentapi); destination(d_dcagent); };
log {source(s_src); filter(f_dccertmon); destination(d_dccertmon); };

View File

@ -0,0 +1,323 @@
#!/bin/sh
# OpenStack DC Certificate Monitor Service (dccertmon)
#
# Description: Manages a DC Certificate Monitor Service
# (dccertmon) process as an HA resource
#
# Copyright (c) 2025 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
#
# See usage() function below for more details ...
#
# OCF instance parameters:
# OCF_RESKEY_binary
# OCF_RESKEY_config
# OCF_RESKEY_user
# OCF_RESKEY_pid
# OCF_RESKEY_additional_parameters
#######################################################################
# Initialization:
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
#######################################################################
# Fill in some defaults if no values are specified
OCF_RESKEY_binary_default="dccertmon"
OCF_RESKEY_config_default="/etc/dccertmon/dccertmon.conf"
OCF_RESKEY_user_default="root"
OCF_RESKEY_pid_default="$HA_RSCTMP/$OCF_RESOURCE_INSTANCE.pid"
: ${OCF_RESKEY_binary=${OCF_RESKEY_binary_default}}
: ${OCF_RESKEY_config=${OCF_RESKEY_config_default}}
: ${OCF_RESKEY_user=${OCF_RESKEY_user_default}}
: ${OCF_RESKEY_pid=${OCF_RESKEY_pid_default}}
#######################################################################
usage() {
cat <<UEND
usage: $0 (start|stop|validate-all|meta-data|status|monitor)
$0 manages an OpenStack DC Certificate Monitor Service (dccertmon) process as an HA resource
The 'start' operation starts the dccertmon service.
The 'stop' operation stops the dccertmon service.
The 'validate-all' operation reports whether the parameters are valid
The 'meta-data' operation reports this RA's meta-data information
The 'status' operation reports whether the dccertmon service is running
The 'monitor' operation reports whether the dccertmon service seems to be working
UEND
}
meta_data() {
cat <<END
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="dccertmon">
<version>1.0</version>
<longdesc lang="en">
Resource agent for the DC Certificate Monitor Service (dccertmon)
</longdesc>
<shortdesc lang="en">Manages the DC Certificate Monitor
Service (dccertmon)</shortdesc>
<parameters>
<parameter name="binary" unique="0" required="0">
<longdesc lang="en">
Location of the DC Certificate Monitor binary (dccertmon)
</longdesc>
<shortdesc lang="en">DC Certificate Monitor binary (dccertmon)</shortdesc>
<content type="string" default="${OCF_RESKEY_binary_default}" />
</parameter>
<parameter name="config" unique="0" required="0">
<longdesc lang="en">
Location of the DC Certificate Monitor (dccertmon) configuration file
</longdesc>
<shortdesc lang="en">DC Certificate Monitor (dccertmon registry) config file</shortdesc>
<content type="string" default="${OCF_RESKEY_config_default}" />
</parameter>
<parameter name="user" unique="0" required="0">
<longdesc lang="en">
User running DC Certificate Monitor (dccertmon)
</longdesc>
<shortdesc lang="en">DC Certificate Monitor (dccertmon) user</shortdesc>
<content type="string" default="${OCF_RESKEY_user_default}" />
</parameter>
<parameter name="pid" unique="0" required="0">
<longdesc lang="en">
The pid file to use for this DC Certificate Monitor (dccertmon) instance
</longdesc>
<shortdesc lang="en">DC Certificate Monitor (dccertmon) pid file</shortdesc>
<content type="string" default="${OCF_RESKEY_pid_default}" />
</parameter>
<parameter name="additional_parameters" unique="0" required="0">
<longdesc lang="en">
Additional parameters to pass on to the DC Certificate Monitor (dccertmon)
</longdesc>
<shortdesc lang="en">Additional parameters for dccertmon</shortdesc>
<content type="string" />
</parameter>
</parameters>
<actions>
<action name="start" timeout="20" />
<action name="stop" timeout="20" />
<action name="status" timeout="20" />
<action name="monitor" timeout="10" interval="5" />
<action name="validate-all" timeout="5" />
<action name="meta-data" timeout="5" />
</actions>
</resource-agent>
END
}
#######################################################################
# Functions invoked by resource manager actions
dccertmon_validate() {
local rc
check_binary $OCF_RESKEY_binary
check_binary curl
check_binary tr
check_binary grep
check_binary cut
check_binary head
# A config file on shared storage that is not available
# during probes is OK.
if [ ! -f $OCF_RESKEY_config ]; then
if ! ocf_is_probe; then
ocf_log err "Config $OCF_RESKEY_config doesn't exist"
return $OCF_ERR_INSTALLED
fi
ocf_log_warn "Config $OCF_RESKEY_config not available during a probe"
fi
getent passwd $OCF_RESKEY_user >/dev/null 2>&1
rc=$?
if [ $rc -ne 0 ]; then
ocf_log err "User $OCF_RESKEY_user doesn't exist"
return $OCF_ERR_INSTALLED
fi
true
}
dccertmon_status() {
local pid
local rc
if [ ! -f $OCF_RESKEY_pid ]; then
ocf_log info "DC Certificate Monitor (dccertmon) is not running"
return $OCF_NOT_RUNNING
else
pid=`cat $OCF_RESKEY_pid`
fi
ocf_run -warn kill -s 0 $pid
rc=$?
if [ $rc -eq 0 ]; then
return $OCF_SUCCESS
else
ocf_log info "Old PID file found, but DC Certificate Monitor (dccertmon) is not running"
rm -f $OCF_RESKEY_pid
return $OCF_NOT_RUNNING
fi
}
dccertmon_monitor() {
local rc
dccertmon_status
rc=$?
# If status returned anything but success, return that immediately
if [ $rc -ne $OCF_SUCCESS ]; then
return $rc
fi
ocf_log debug "DC Certificate Monitor (dccertmon) monitor succeeded"
return $OCF_SUCCESS
}
dccertmon_start() {
local rc
dccertmon_status
rc=$?
if [ $rc -eq $OCF_SUCCESS ]; then
ocf_log info "DC Certificate Monitor (dccertmon) already running"
return $OCF_SUCCESS
fi
# Change the working dir to /, to be sure it's accesible
cd /
# run the actual dccertmon daemon. Don't use ocf_run as we're sending the tool's output
# straight to /dev/null anyway and using ocf_run would break stdout-redirection here.
su ${OCF_RESKEY_user} -s /bin/sh -c "${OCF_RESKEY_binary} --config-file=$OCF_RESKEY_config \
$OCF_RESKEY_additional_parameters"' >> /dev/null 2>&1 & echo $!' > $OCF_RESKEY_pid
# Spin waiting for the server to come up.
# Let the CRM/LRM time us out if required
while true; do
dccertmon_monitor
rc=$?
[ $rc -eq $OCF_SUCCESS ] && break
if [ $rc -ne $OCF_NOT_RUNNING ]; then
ocf_log err "DC Certificate Monitor (dccertmon) start failed"
exit $OCF_ERR_GENERIC
fi
sleep 1
done
ocf_log info "DC Certificate Monitor (dccertmon) started"
return $OCF_SUCCESS
}
dccertmon_confirm_stop() {
local my_bin
local my_processes
my_binary=`which ${OCF_RESKEY_binary}`
my_processes=`pgrep -l -f "^(python|/usr/bin/python|/usr/bin/python3) ${my_binary}([^\w-]|$)"`
if [ -n "${my_processes}" ]
then
ocf_log info "About to SIGKILL the following: ${my_processes}"
pkill -KILL -f "^(python|/usr/bin/python|/usr/bin/python3) ${my_binary}([^\w-]|$)"
fi
}
dccertmon_stop() {
local rc
local pid
dccertmon_status
rc=$?
if [ $rc -eq $OCF_NOT_RUNNING ]; then
ocf_log info "DC Certificate Monitor (dccertmon) already stopped"
dccertmon_confirm_stop
return $OCF_SUCCESS
fi
# Try SIGTERM
pid=`cat $OCF_RESKEY_pid`
ocf_run kill -s TERM $pid
rc=$?
if [ $rc -ne 0 ]; then
ocf_log err "DC Certificate Monitor (dccertmon) couldn't be stopped"
dccertmon_confirm_stop
exit $OCF_ERR_GENERIC
fi
# stop waiting
shutdown_timeout=15
if [ -n "$OCF_RESKEY_CRM_meta_timeout" ]; then
shutdown_timeout=$((($OCF_RESKEY_CRM_meta_timeout/1000)-5))
fi
count=0
while [ $count -lt $shutdown_timeout ]; do
dccertmon_status
rc=$?
if [ $rc -eq $OCF_NOT_RUNNING ]; then
break
fi
count=`expr $count + 1`
sleep 1
ocf_log debug "DC Certificate Monitor (dccertmon) still hasn't stopped yet. Waiting ..."
done
dccertmon_status
rc=$?
if [ $rc -ne $OCF_NOT_RUNNING ]; then
# SIGTERM didn't help either, try SIGKILL
ocf_log info "DC Certificate Monitor (dccertmon) failed to stop after ${shutdown_timeout}s \
using SIGTERM. Trying SIGKILL ..."
ocf_run kill -s KILL $pid
fi
dccertmon_confirm_stop
ocf_log info "DC Certificate Monitor (dccertmon) stopped"
rm -f $OCF_RESKEY_pid
return $OCF_SUCCESS
}
#######################################################################
case "$1" in
meta-data) meta_data
exit $OCF_SUCCESS;;
usage|help) usage
exit $OCF_SUCCESS;;
esac
# Anything except meta-data and help must pass validation
dccertmon_validate || exit $?
# What kind of method was invoked?
case "$1" in
start) dccertmon_start;;
stop) dccertmon_stop;;
status) dccertmon_status;;
monitor) dccertmon_monitor;;
validate-all) ;;
*) usage
exit $OCF_ERR_UNIMPLEMENTED;;
esac

View File

@ -26,6 +26,7 @@ packages =
dcorch
dcdbsync
dcagent
dccertmon
[entry_points]
console_scripts =
@ -43,8 +44,10 @@ console_scripts =
dcorch-api-proxy = dcorch.cmd.api_proxy:main
dcdbsync-api = dcdbsync.cmd.api:main
dcagent-api = dcagent.cmd.audit:main
dccertmon = dccertmon.cmd.cert_mon:main
oslo.config.opts =
dccertmon.cmd.cert_mon.config = dccertmon.common.config:list_opts
dcagent.common.config = dcagent.common.config:list_opts
dcagent.common.api.api_config = dcagent.api.api_config:list_opts
dcorch.common.config = dcorch.common.config:list_opts

View File

@ -91,7 +91,7 @@ setenv =
{[testenv]setenv}
PYTHONPATH = {toxinidir}
commands =
pylint {posargs} dccommon dcdbsync dcmanager dcorch dcagent --rcfile=./.pylintrc
pylint {posargs} dccommon dcdbsync dcmanager dcorch dcagent dccertmon --rcfile=./.pylintrc
[testenv:black]
# This environment checks and displays the recommended changes by Black for formatting