Introduce dccertmon service
This commit introduces dccertmon, a new managed service for DC certificate auditing and management. Currently, platform cert management, DC cert management, and subcloud cert auditing are coupled into a single platform service (certmon). To meet the requirements of DC scalability and portability, DC specific functionality must be decoupled. These changes lay the groundwork for the new service, by: - Creating the necessary service files. - Introducing configs for the service. - Declaring high level methods (Skeleton - lifecycle and manager) DC-specific functionality will be migrated to this dccertmon service and optimized in subsequent changes. Non-DC cert management will continue to be handled by certmon. Overall, this commit introduces: - The OCF file necessary for high availability management of the dccertmon service by SM. - Package configurations to build the service (Package: distributedcloud-dccertmon). - Lifecycle manager for a running DC cert monitor service. - Skeleton/base service application logic - CertificateMonitorManager. - RPC notification handlers for subcloud online/managed. - Configuration for the log folders and log rotation. The logs will be available in /var/log/dccertmon/dccertmon.log. These changes are part of a set of commits to introduce the dccertmon service: [1] https://review.opendev.org/c/starlingx/ha/+/941205 [2] https://review.opendev.org/c/starlingx/stx-puppet/+/941208 Test Plan: - PASS: Build dccertmon package - PASS: Install and bootstrap system with custom ISO containing the newly created dccertmon package - PASS: Verify that the dccertmon.service is loaded - PASS: Verify dccertmon is being properly logged to the correct folder. - PASS: Check logged messages and verify execution of - Cert Watcher thread - Task Executor (Audit thread) - Periodic tasks running at expected intervals - PASS: Configure and provision the service using SM and verify it has correctly started and can be restarted with 'sm-restart'. - PASS: Tox checks running on dccertmon Note: This commit has been tested alongside the related changes and their respective test plans. [1][2] Story: 2011311 Task: 51663 Change-Id: Ic23d8d13e4b292cf0508d23eaae99b8e07f36d31 Signed-off-by: Salman Rana <salman.rana@windriver.com>
This commit is contained in:
parent
99291be851
commit
7d44c38c90
@ -6,5 +6,6 @@ distributedcloud-dcdbsync
|
||||
distributedcloud-dcmanager
|
||||
distributedcloud-dcorch
|
||||
distributedcloud-dcagent
|
||||
distributedcloud-dccertmon
|
||||
|
||||
python3-redfish
|
||||
|
@ -10,6 +10,7 @@ test_command=OS_STDOUT_CAPTURE=${OS_STDOUT_CAPTURE:-1}
|
||||
${PYTHON} -m subunit.run discover -s dcmanager $LISTOPT $IDOPTION
|
||||
${PYTHON} -m subunit.run discover -s dcorch $LISTOPT $IDOPTION
|
||||
${PYTHON} -m subunit.run discover -s dcagent $LISTOPT $IDOPTION
|
||||
${PYTHON} -m subunit.run discover -s dccertmon $LISTOPT $IDOPTION
|
||||
test_id_option=--load-list $IDFILE
|
||||
test_list_option=--list
|
||||
test_run_concurrency=echo 5
|
||||
|
10
distributedcloud/dccertmon/__init__.py
Normal file
10
distributedcloud/dccertmon/__init__.py
Normal file
@ -0,0 +1,10 @@
|
||||
#
|
||||
# Copyright (c) 2025 Wind River Systems, Inc.
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
|
||||
import pbr.version
|
||||
|
||||
|
||||
__version__ = pbr.version.VersionInfo("distributedcloud").version_string()
|
0
distributedcloud/dccertmon/cmd/__init__.py
Normal file
0
distributedcloud/dccertmon/cmd/__init__.py
Normal file
54
distributedcloud/dccertmon/cmd/cert_mon.py
Normal file
54
distributedcloud/dccertmon/cmd/cert_mon.py
Normal file
@ -0,0 +1,54 @@
|
||||
#
|
||||
# Copyright (c) 2025 Wind River Systems, Inc.
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
|
||||
"""
|
||||
DC Certificate Monitor Service
|
||||
"""
|
||||
import eventlet
|
||||
|
||||
eventlet.monkey_patch()
|
||||
|
||||
# pylint: disable=wrong-import-position
|
||||
from oslo_config import cfg # noqa: E402
|
||||
from oslo_i18n import _lazy # noqa: E402
|
||||
from oslo_log import log as logging # noqa: E402
|
||||
from oslo_service import service # noqa: E402
|
||||
|
||||
from dccertmon.common import config # noqa: E402
|
||||
from dcmanager.common import messaging # noqa: E402
|
||||
|
||||
# pylint: enable=wrong-import-position
|
||||
|
||||
_lazy.enable_lazy()
|
||||
|
||||
LOG = logging.getLogger("dccertmon")
|
||||
CONF = cfg.CONF
|
||||
|
||||
|
||||
def main():
|
||||
config.generate_config()
|
||||
logging.register_options(CONF)
|
||||
CONF(project="dccertmon")
|
||||
config.register_config_opts()
|
||||
|
||||
logging.set_defaults()
|
||||
logging.setup(CONF, "dccertmon")
|
||||
messaging.setup()
|
||||
|
||||
from dccertmon.common import service as dc_cert_mon
|
||||
|
||||
srv = dc_cert_mon.CertificateMonitorService()
|
||||
launcher = service.launch(cfg.CONF, srv)
|
||||
|
||||
LOG.info("Starting...")
|
||||
LOG.debug("Configuration:")
|
||||
cfg.CONF.log_opt_values(LOG, logging.DEBUG)
|
||||
|
||||
launcher.wait()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
0
distributedcloud/dccertmon/common/__init__.py
Normal file
0
distributedcloud/dccertmon/common/__init__.py
Normal file
@ -0,0 +1,95 @@
|
||||
#
|
||||
# Copyright (c) 2025 Wind River Systems, Inc.
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
|
||||
import time
|
||||
|
||||
import eventlet
|
||||
import greenlet
|
||||
from oslo_config import cfg
|
||||
from oslo_log import log
|
||||
from oslo_service import periodic_task
|
||||
|
||||
from dccertmon.common import watcher
|
||||
|
||||
|
||||
LOG = log.getLogger(__name__)
|
||||
CONF = cfg.CONF
|
||||
|
||||
|
||||
class CertificateMonitorManager(periodic_task.PeriodicTasks):
|
||||
def __init__(self):
|
||||
super(CertificateMonitorManager, self).__init__(CONF)
|
||||
self.mon_thread = None
|
||||
self.worker_thread = None
|
||||
|
||||
def on_start(self):
|
||||
LOG.info("Service Start - prepare for initial audit")
|
||||
|
||||
def start_task_executor(self):
|
||||
self.worker_thread = eventlet.greenthread.spawn(self.worker_task_loop)
|
||||
self.on_start()
|
||||
|
||||
def start_cert_watcher(self):
|
||||
dc_monitor = None
|
||||
while True:
|
||||
try:
|
||||
dc_monitor = watcher.DC_CertWatcher()
|
||||
dc_monitor.initialize()
|
||||
except Exception as e:
|
||||
LOG.exception(e)
|
||||
time.sleep(5)
|
||||
else:
|
||||
break
|
||||
|
||||
# spawn monitor thread
|
||||
self.mon_thread = eventlet.greenthread.spawn(self.monitor_cert_loop, dc_monitor)
|
||||
|
||||
def stop_cert_watcher(self):
|
||||
if self.mon_thread:
|
||||
self.mon_thread.kill()
|
||||
self.mon_thread.wait()
|
||||
self.mon_thread = None
|
||||
|
||||
def stop_task_executor(self):
|
||||
if self.worker_thread:
|
||||
self.worker_thread.kill()
|
||||
self.worker_thread.wait()
|
||||
self.worker_thread = None
|
||||
|
||||
def worker_task_loop(self):
|
||||
while True:
|
||||
try:
|
||||
self.run_periodic_tasks(context=None)
|
||||
# TODO(srana): Reset sleep after proper implementation
|
||||
time.sleep(60)
|
||||
except greenlet.GreenletExit:
|
||||
break
|
||||
except Exception as e:
|
||||
LOG.exception(e)
|
||||
|
||||
def monitor_cert_loop(self, monitor):
|
||||
while True:
|
||||
# never exit until exit signal received
|
||||
try:
|
||||
monitor.start_watch(on_success=None, on_error=None)
|
||||
except greenlet.GreenletExit:
|
||||
break
|
||||
except Exception:
|
||||
# It shouldn't fall to here, but log and restart if it did
|
||||
LOG.exception("Unexpected exception from start_watch")
|
||||
time.sleep(1)
|
||||
|
||||
@periodic_task.periodic_task(spacing=CONF.dccertmon.audit_interval)
|
||||
def audit_sc_cert_start(self, context):
|
||||
LOG.info("periodic_task: audit_sc_cert_start")
|
||||
|
||||
@periodic_task.periodic_task(spacing=5)
|
||||
def audit_sc_cert_task(self, context):
|
||||
LOG.info("periodic_task: audit_sc_cert_task")
|
||||
|
||||
@periodic_task.periodic_task(spacing=CONF.dccertmon.retry_interval)
|
||||
def retry_monitor_task(self, context):
|
||||
LOG.info("periodic_task: retry_monitor_task")
|
132
distributedcloud/dccertmon/common/config.py
Normal file
132
distributedcloud/dccertmon/common/config.py
Normal file
@ -0,0 +1,132 @@
|
||||
#
|
||||
# Copyright (c) 2025 Wind River Systems, Inc.
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
|
||||
import configparser
|
||||
import os
|
||||
|
||||
import keyring
|
||||
from oslo_config import cfg
|
||||
|
||||
from dccommon import consts as dccommon_consts
|
||||
from dcmanager.common import utils
|
||||
|
||||
CONF = cfg.CONF
|
||||
|
||||
logging_default_format_string = (
|
||||
"%(process)d %(levelname)s %(name)s [-] %(instance)s%(message)s"
|
||||
)
|
||||
|
||||
config_values = {
|
||||
"keystone_authtoken": {
|
||||
"auth_url": "http://controller.internal:5000",
|
||||
"auth_uri": "http://controller.internal:5000",
|
||||
"auth_type": "password",
|
||||
"project_name": "services",
|
||||
"username": "sysinv",
|
||||
"password": "None",
|
||||
"user_domain_name": "Default",
|
||||
"project_domain_name": "Default",
|
||||
"interface": "internal",
|
||||
"region_name": "None",
|
||||
},
|
||||
"DEFAULT": {
|
||||
"syslog_log_facility": "local4",
|
||||
"use_syslog": "True",
|
||||
"debug": "False",
|
||||
"logging_default_format_string": logging_default_format_string,
|
||||
"logging_debug_format_suffix": "%(pathname)s:%(lineno)d",
|
||||
"auth_strategy": "keystone",
|
||||
"transport_url": "None",
|
||||
},
|
||||
"dccertmon": {
|
||||
"retry_interval": "600",
|
||||
"max_retry": "14",
|
||||
"audit_interval": "86400",
|
||||
"startup_audit_all": "False",
|
||||
"network_retry_interval": "180",
|
||||
"network_max_retry": "30",
|
||||
"audit_batch_size": "40",
|
||||
"audit_greenpool_size": "20",
|
||||
"certificate_timeout_secs": "5",
|
||||
},
|
||||
"endpoint_cache": {
|
||||
"auth_plugin": "password",
|
||||
"username": "dcmanager",
|
||||
"password": "None",
|
||||
"project_name": "services",
|
||||
"user_domain_name": "Default",
|
||||
"project_domain_name": "Default",
|
||||
"http_connect_timeout": "15",
|
||||
"auth_uri": "http://controller.internal:5000/v3",
|
||||
},
|
||||
}
|
||||
|
||||
common_opts = [cfg.StrOpt("host", default="localhost", help="hostname of the machine")]
|
||||
|
||||
dc_cert_mon_opts = [
|
||||
cfg.IntOpt(
|
||||
"audit_interval",
|
||||
default=86400, # 24 hours
|
||||
help="Interval to run certificate audit",
|
||||
),
|
||||
cfg.IntOpt(
|
||||
"retry_interval",
|
||||
default=10 * 60, # retry every 10 minutes
|
||||
help="Interval to reattempt accessing external system if failure occurred",
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
def register_config_opts():
|
||||
CONF.register_opts(common_opts)
|
||||
CONF.register_opts(dc_cert_mon_opts, "dccertmon")
|
||||
|
||||
|
||||
def override_config_values():
|
||||
rabbit_auth_password = keyring.get_password("amqp", "rabbit")
|
||||
|
||||
config_values["keystone_authtoken"]["region_name"] = utils.get_region_name(
|
||||
"http://controller.internal:6385"
|
||||
)
|
||||
config_values["endpoint_cache"]["password"] = keyring.get_password(
|
||||
"dcmanager", dccommon_consts.SERVICES_USER_NAME
|
||||
)
|
||||
config_values["keystone_authtoken"]["password"] = keyring.get_password(
|
||||
"sysinv", dccommon_consts.SERVICES_USER_NAME
|
||||
)
|
||||
config_values["DEFAULT"][
|
||||
"transport_url"
|
||||
] = f"rabbit://guest:{rabbit_auth_password}@controller.internal:5672"
|
||||
|
||||
|
||||
def create_conf_file():
|
||||
output_dir = "/etc/dccertmon"
|
||||
output_file = os.path.join(output_dir, "dccertmon.conf")
|
||||
|
||||
os.makedirs(output_dir, exist_ok=True)
|
||||
|
||||
config = configparser.RawConfigParser()
|
||||
|
||||
# Populate the config parser with values
|
||||
for section, options in config_values.items():
|
||||
config[section] = options
|
||||
|
||||
with open(output_file, "w") as f:
|
||||
config.write(f)
|
||||
|
||||
os.chmod(output_file, 0o600)
|
||||
|
||||
|
||||
def generate_config():
|
||||
# Set dynamic values (e.g., passwords, urls, etc)
|
||||
override_config_values()
|
||||
# Create service conf file
|
||||
create_conf_file()
|
||||
|
||||
|
||||
def list_opts():
|
||||
yield "dccertmon", dc_cert_mon_opts
|
||||
yield None, common_opts
|
83
distributedcloud/dccertmon/common/service.py
Normal file
83
distributedcloud/dccertmon/common/service.py
Normal file
@ -0,0 +1,83 @@
|
||||
#
|
||||
# Copyright (c) 2025 Wind River Systems, Inc.
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
|
||||
from oslo_config import cfg
|
||||
from oslo_log import log as logging
|
||||
import oslo_messaging
|
||||
from oslo_service import service
|
||||
|
||||
from dccertmon.common.certificate_monitor_manager import CertificateMonitorManager
|
||||
from dccertmon.common import utils
|
||||
from dcmanager.common import consts
|
||||
from dcmanager.common import messaging as rpc_messaging
|
||||
|
||||
CONF = cfg.CONF
|
||||
LOG = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class CertificateMonitorService(service.Service):
|
||||
"""Lifecycle manager for a running DC cert monitor service."""
|
||||
|
||||
def __init__(self):
|
||||
super(CertificateMonitorService, self).__init__()
|
||||
self.rpc_api_version = consts.RPC_API_VERSION
|
||||
self.topic = consts.TOPIC_DC_NOTIFICATION
|
||||
# TODO(srana): Refactor DC role usage due to deprecation.
|
||||
self.dc_role = utils.DC_ROLE_UNDETECTED
|
||||
self.manager = CertificateMonitorManager()
|
||||
self._rpc_server = None
|
||||
self.target = None
|
||||
|
||||
def start(self):
|
||||
LOG.info("Starting %s", self.__class__.__name__)
|
||||
super(CertificateMonitorService, self).start()
|
||||
self._get_dc_role()
|
||||
|
||||
self.manager.start_cert_watcher()
|
||||
self.manager.start_task_executor()
|
||||
|
||||
if self.dc_role == utils.DC_ROLE_SYSTEMCONTROLLER:
|
||||
self.target = oslo_messaging.Target(
|
||||
version=self.rpc_api_version, server=CONF.host, topic=self.topic
|
||||
)
|
||||
self._rpc_server = rpc_messaging.get_rpc_server(self.target, self)
|
||||
self._rpc_server.start()
|
||||
|
||||
def stop(self):
|
||||
LOG.info("Stopping %s", self.__class__.__name__)
|
||||
|
||||
if self.dc_role == utils.DC_ROLE_SYSTEMCONTROLLER:
|
||||
self._stop_rpc_server()
|
||||
|
||||
self.manager.stop_cert_watcher()
|
||||
self.manager.stop_task_executor()
|
||||
super(CertificateMonitorService, self).stop()
|
||||
|
||||
def _stop_rpc_server(self):
|
||||
if self._rpc_server:
|
||||
try:
|
||||
self._rpc_server.stop()
|
||||
self._rpc_server.wait()
|
||||
LOG.info("Engine service stopped successfully")
|
||||
except Exception as ex:
|
||||
LOG.error("Failed to stop engine service: %s" % ex)
|
||||
LOG.exception(ex)
|
||||
|
||||
def _get_dc_role(self):
|
||||
# TODO(srana): Update after migrating from certmon
|
||||
return utils.DC_ROLE_SYSTEMCONTROLLER
|
||||
|
||||
def subcloud_online(self, context, subcloud_name=None):
|
||||
"""TODO(srana): Trigger a subcloud online audit"""
|
||||
LOG.info("%s is online." % subcloud_name)
|
||||
|
||||
def subcloud_managed(self, context, subcloud_name=None):
|
||||
"""TODO(srana): Trigger a subcloud audit"""
|
||||
LOG.info("%s is managed." % subcloud_name)
|
||||
|
||||
def subcloud_sysinv_endpoint_update(self, ctxt, subcloud_name, endpoint):
|
||||
"""TODO(srana): Update sysinv endpoint of dc token cache"""
|
||||
LOG.info("Update subcloud: %s sysinv endpoint" % subcloud_name)
|
26
distributedcloud/dccertmon/common/utils.py
Normal file
26
distributedcloud/dccertmon/common/utils.py
Normal file
@ -0,0 +1,26 @@
|
||||
#
|
||||
# Copyright (c) 2025 Wind River Systems, Inc.
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
|
||||
# TODO(srana): Refactor DC role usage due to deprecation.
|
||||
DC_ROLE_UNDETECTED = "unknown"
|
||||
DC_ROLE_SUBCLOUD = "subcloud"
|
||||
DC_ROLE_SYSTEMCONTROLLER = "systemcontroller"
|
||||
|
||||
DC_ROLE_TIMEOUT_SECONDS = 180
|
||||
DC_ROLE_DELAY_SECONDS = 5
|
||||
|
||||
INVALID_SUBCLOUD_AUDIT_DEPLOY_STATES = [
|
||||
# Secondary subclouds should not be audited as they are expected
|
||||
# to be managed by a peer system controller (geo-redundancy feat.)
|
||||
"create-complete",
|
||||
"create-failed",
|
||||
"pre-rehome",
|
||||
"rehome-failed",
|
||||
"rehome-pending",
|
||||
"rehoming",
|
||||
"secondary",
|
||||
"secondary-failed",
|
||||
]
|
23
distributedcloud/dccertmon/common/watcher.py
Normal file
23
distributedcloud/dccertmon/common/watcher.py
Normal file
@ -0,0 +1,23 @@
|
||||
#
|
||||
# Copyright (c) 2025 Wind River Systems, Inc.
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
|
||||
import time
|
||||
|
||||
from oslo_log import log
|
||||
|
||||
LOG = log.getLogger(__name__)
|
||||
|
||||
|
||||
class DC_CertWatcher(object):
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
def initialize(self):
|
||||
LOG.info("initialize DC_CertWatcher")
|
||||
|
||||
def start_watch(self, on_success, on_error):
|
||||
LOG.info("DC_CertWatcher start_watch")
|
||||
time.sleep(60)
|
9
distributedcloud/dccertmon/config-generator.conf
Normal file
9
distributedcloud/dccertmon/config-generator.conf
Normal file
@ -0,0 +1,9 @@
|
||||
[DEFAULT]
|
||||
output_file = etc/dccertmon/dccertmon.conf.sample
|
||||
wrap_width = 79
|
||||
namespace = dccertmon.cmd.cert_mon
|
||||
namespace = oslo.messaging
|
||||
namespace = oslo.middleware
|
||||
namespace = oslo.log
|
||||
namespace = oslo.service.service
|
||||
namespace = oslo.service.periodic_task
|
@ -1,5 +1,5 @@
|
||||
# Copyright (c) 2016 Ericsson AB.
|
||||
# Copyright (c) 2017-2024 Wind River Systems, Inc.
|
||||
# Copyright (c) 2017-2025 Wind River Systems, Inc.
|
||||
# All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
||||
@ -23,6 +23,7 @@ TOPIC_DC_MANAGER_STATE = "dcmanager-state"
|
||||
TOPIC_DC_MANAGER_AUDIT = "dcmanager-audit"
|
||||
TOPIC_DC_MANAGER_AUDIT_WORKER = "dcmanager-audit-worker"
|
||||
TOPIC_DC_MANAGER_ORCHESTRATOR = "dcmanager-orchestrator"
|
||||
TOPIC_DC_NOTIFICATION = "DCMANAGER-NOTIFICATION"
|
||||
|
||||
CERTS_VAULT_DIR = "/opt/dc-vault/certs"
|
||||
PATCH_VAULT_DIR = "/opt/dc-vault/patches"
|
||||
|
@ -1,4 +1,4 @@
|
||||
# Copyright (c) 2017-2024 Wind River Systems, Inc.
|
||||
# Copyright (c) 2017-2025 Wind River Systems, Inc.
|
||||
# All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
||||
@ -468,12 +468,9 @@ class DCManagerNotifications(RPCClient):
|
||||
1.0 - Initial version
|
||||
"""
|
||||
|
||||
DCMANAGER_RPC_API_VERSION = "1.0"
|
||||
TOPIC_DC_NOTIFICIATION = "DCMANAGER-NOTIFICATION"
|
||||
|
||||
def __init__(self, timeout=None):
|
||||
super(DCManagerNotifications, self).__init__(
|
||||
timeout, self.TOPIC_DC_NOTIFICIATION, self.DCMANAGER_RPC_API_VERSION
|
||||
timeout, consts.TOPIC_DC_NOTIFICATION, consts.RPC_API_VERSION
|
||||
)
|
||||
|
||||
def subcloud_online(self, ctxt, subcloud_name):
|
||||
@ -495,5 +492,5 @@ class DCManagerNotifications(RPCClient):
|
||||
endpoint=endpoint,
|
||||
),
|
||||
fanout=True,
|
||||
version=self.DCMANAGER_RPC_API_VERSION,
|
||||
version=consts.RPC_API_VERSION,
|
||||
)
|
||||
|
@ -83,6 +83,14 @@ Depends:
|
||||
Description: Distributed Cloud DCAgent
|
||||
Distributed Cloud DCAgent
|
||||
|
||||
Package: distributedcloud-dccertmon
|
||||
Architecture: any
|
||||
Depends:
|
||||
${misc:Depends},
|
||||
${python3:Depends}
|
||||
Description: Distributed Cloud Certificate Monitor
|
||||
Distributed Cloud Certificate Monitor
|
||||
|
||||
Package: distributedcloud-wheels
|
||||
Architecture: any
|
||||
Depends:
|
||||
|
@ -0,0 +1 @@
|
||||
var/log/dccertmon
|
@ -0,0 +1,5 @@
|
||||
etc/dccertmon/dccertmon.conf
|
||||
usr/bin/dccertmon
|
||||
usr/lib/ocf/resource.d/openstack/dccertmon*
|
||||
usr/lib/python3/dist-packages/dccertmon/*
|
||||
usr/lib/tmpfiles.d/dccertmon.conf
|
@ -1,3 +1,4 @@
|
||||
d /var/log/dccertmon 0755 root root - -
|
||||
d /var/log/dcagent 0755 root root - -
|
||||
d /var/log/dcdbsync 0755 root root - -
|
||||
d /var/log/dcmanager 0755 root root - -
|
||||
|
@ -43,6 +43,7 @@ override_dh_install:
|
||||
install -p -D -m 644 files/dcorch.conf $(TMP_DIR)/dcorch.conf
|
||||
install -p -D -m 644 files/dcmanager.conf $(TMP_DIR)/dcmanager.conf
|
||||
install -p -D -m 644 files/dcagent.conf $(TMP_DIR)/dcagent.conf
|
||||
install -p -D -m 644 files/dccertmon.conf $(TMP_DIR)/dccertmon.conf
|
||||
|
||||
# install systemd unit files for optional second instance
|
||||
install -p -D -m 644 files/dcdbsync-openstack-api.service $(SYSTEMD_DIR)/dcdbsync-openstack-api.service
|
||||
@ -67,6 +68,7 @@ override_dh_install:
|
||||
PYTHONPATH=. oslo-config-generator --config-file=./dcorch/config-generator.conf
|
||||
PYTHONPATH=. oslo-config-generator --config-file=./dcdbsync/config-generator.conf
|
||||
PYTHONPATH=. oslo-config-generator --config-file=./dcagent/config-generator.conf
|
||||
PYTHONPATH=. oslo-config-generator --config-file=./dccertmon/config-generator.conf
|
||||
|
||||
# install default config files
|
||||
oslo-config-generator \
|
||||
@ -89,6 +91,11 @@ override_dh_install:
|
||||
--output-file ./dcagent/dcagent.conf.sample
|
||||
install -p -D -m 640 ./dcagent/dcagent.conf.sample $(SYS_CONF_DIR)/dcagent/dcagent.conf
|
||||
|
||||
oslo-config-generator \
|
||||
--config-file ./dccertmon/config-generator.conf \
|
||||
--output-file ./dccertmon/dccertmon.conf.sample
|
||||
install -p -D -m 640 ./dccertmon/dccertmon.conf.sample $(SYS_CONF_DIR)/dccertmon/dccertmon.conf
|
||||
|
||||
# install rvmc_install.py script
|
||||
install -d $(ROOT)/usr/local/bin/
|
||||
install -p -D -m 700 scripts/rvmc_install.py $(ROOT)/usr/local/bin
|
||||
@ -98,6 +105,7 @@ override_dh_install:
|
||||
rm -rf $(ROOT)/usr/lib/python3/dist-packages/dcmanager/tests
|
||||
rm -rf $(ROOT)/usr/lib/python3/dist-packages/dcorch/tests
|
||||
rm -rf $(ROOT)/usr/lib/python3/dist-packages/dcagent/tests
|
||||
rm -rf $(ROOT)/usr/lib/python3/dist-packages/dccertmon/tests
|
||||
|
||||
dh_install
|
||||
|
||||
@ -109,7 +117,8 @@ override_dh_fixperms:
|
||||
--exclude etc/dcdbsync/dcdbsync.conf \
|
||||
--exclude etc/dcmanager/dcmanager.conf \
|
||||
--exclude etc/dcorch/dcorch.conf \
|
||||
--exclude etc/dcagent/dcagent.conf
|
||||
--exclude etc/dcagent/dcagent.conf \
|
||||
--exclude etc/dccertmon/dccertmon.conf
|
||||
|
||||
execute_after_dh_fixperms:
|
||||
# forcing 600 for /var/opt/dc/ansible
|
||||
|
4
distributedcloud/etc/dccertmon/README-dccertmon.conf.txt
Normal file
4
distributedcloud/etc/dccertmon/README-dccertmon.conf.txt
Normal file
@ -0,0 +1,4 @@
|
||||
To generate the sample dccertmon.conf file, run the following
|
||||
command from the top level of the dccertmon directory:
|
||||
|
||||
tox -egenconfig
|
5
distributedcloud/etc/dccertmon/policy.json
Executable file
5
distributedcloud/etc/dccertmon/policy.json
Executable file
@ -0,0 +1,5 @@
|
||||
{
|
||||
"context_is_admin": "role:admin",
|
||||
"admin_or_owner": "is_admin:True or project_id:%(project_id)s",
|
||||
"default": "rule:admin_or_owner"
|
||||
}
|
1
distributedcloud/files/dccertmon.conf
Normal file
1
distributedcloud/files/dccertmon.conf
Normal file
@ -0,0 +1 @@
|
||||
d /var/run/dccertmon 0755 root root -
|
@ -91,6 +91,21 @@
|
||||
endscript
|
||||
}
|
||||
|
||||
/var/log/dccertmon/*.log
|
||||
{
|
||||
nodateext
|
||||
size 20M
|
||||
start 1
|
||||
rotate 20
|
||||
missingok
|
||||
notifempty
|
||||
compress
|
||||
sharedscripts
|
||||
postrotate
|
||||
systemctl reload syslog-ng > /dev/null 2>&1 || true
|
||||
endscript
|
||||
}
|
||||
|
||||
/var/log/dcmanager/ansible/*.log
|
||||
{
|
||||
nodateext
|
||||
|
@ -7,6 +7,7 @@ destination d_dcmanager_orch { file("/var/log/dcmanager/orchestrator.log" templa
|
||||
destination d_dcorch { file("/var/log/dcorch/dcorch.log" template(t_preformatted)); };
|
||||
destination d_dcdbsync { file("/var/log/dcdbsync/dcdbsync.log" template(t_preformatted)); };
|
||||
destination d_dcagent { file("/var/log/dcagent/dcagent.log" template(t_preformatted)); };
|
||||
destination d_dccertmon { file("/var/log/dccertmon/dccertmon.log" template(t_preformatted)); };
|
||||
|
||||
# Distributed Cloud Log Filters
|
||||
filter f_dcmanagermanager { facility(local4) and program(dcmanager-manager); };
|
||||
@ -22,6 +23,8 @@ filter f_dcdbsyncapi { facility(local4) and program(dcdbsync-api); };
|
||||
|
||||
filter f_dcagentapi { facility(local4) and program(dcagent-api); };
|
||||
|
||||
filter f_dccertmon { facility(local4) and program(dccertmon); };
|
||||
|
||||
# Distributed Cloud Log Path
|
||||
log {source(s_src); filter(f_dcmanagermanager); destination(d_dcmanager); };
|
||||
log {source(s_src); filter(f_dcmanageraudit); destination(d_dcmanager_audit); };
|
||||
@ -32,4 +35,5 @@ log {source(s_src); filter(f_dcorchengine); destination(d_dcorch); };
|
||||
log {source(s_src); filter(f_dcorchapiproxy); destination(d_dcorch); };
|
||||
log {source(s_src); filter(f_dcdbsyncapi); destination(d_dcdbsync); };
|
||||
log {source(s_src); filter(f_dcagentapi); destination(d_dcagent); };
|
||||
log {source(s_src); filter(f_dccertmon); destination(d_dccertmon); };
|
||||
|
||||
|
323
distributedcloud/ocf/dccertmon
Normal file
323
distributedcloud/ocf/dccertmon
Normal file
@ -0,0 +1,323 @@
|
||||
#!/bin/sh
|
||||
# OpenStack DC Certificate Monitor Service (dccertmon)
|
||||
#
|
||||
# Description: Manages a DC Certificate Monitor Service
|
||||
# (dccertmon) process as an HA resource
|
||||
#
|
||||
# Copyright (c) 2025 Wind River Systems, Inc.
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
#
|
||||
# See usage() function below for more details ...
|
||||
#
|
||||
# OCF instance parameters:
|
||||
# OCF_RESKEY_binary
|
||||
# OCF_RESKEY_config
|
||||
# OCF_RESKEY_user
|
||||
# OCF_RESKEY_pid
|
||||
# OCF_RESKEY_additional_parameters
|
||||
#######################################################################
|
||||
# Initialization:
|
||||
|
||||
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
|
||||
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
|
||||
|
||||
#######################################################################
|
||||
|
||||
# Fill in some defaults if no values are specified
|
||||
|
||||
OCF_RESKEY_binary_default="dccertmon"
|
||||
OCF_RESKEY_config_default="/etc/dccertmon/dccertmon.conf"
|
||||
OCF_RESKEY_user_default="root"
|
||||
OCF_RESKEY_pid_default="$HA_RSCTMP/$OCF_RESOURCE_INSTANCE.pid"
|
||||
|
||||
: ${OCF_RESKEY_binary=${OCF_RESKEY_binary_default}}
|
||||
: ${OCF_RESKEY_config=${OCF_RESKEY_config_default}}
|
||||
: ${OCF_RESKEY_user=${OCF_RESKEY_user_default}}
|
||||
: ${OCF_RESKEY_pid=${OCF_RESKEY_pid_default}}
|
||||
|
||||
#######################################################################
|
||||
|
||||
usage() {
|
||||
cat <<UEND
|
||||
usage: $0 (start|stop|validate-all|meta-data|status|monitor)
|
||||
|
||||
$0 manages an OpenStack DC Certificate Monitor Service (dccertmon) process as an HA resource
|
||||
|
||||
The 'start' operation starts the dccertmon service.
|
||||
The 'stop' operation stops the dccertmon service.
|
||||
The 'validate-all' operation reports whether the parameters are valid
|
||||
The 'meta-data' operation reports this RA's meta-data information
|
||||
The 'status' operation reports whether the dccertmon service is running
|
||||
The 'monitor' operation reports whether the dccertmon service seems to be working
|
||||
|
||||
UEND
|
||||
}
|
||||
|
||||
meta_data() {
|
||||
cat <<END
|
||||
<?xml version="1.0"?>
|
||||
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
|
||||
<resource-agent name="dccertmon">
|
||||
<version>1.0</version>
|
||||
|
||||
<longdesc lang="en">
|
||||
Resource agent for the DC Certificate Monitor Service (dccertmon)
|
||||
</longdesc>
|
||||
<shortdesc lang="en">Manages the DC Certificate Monitor
|
||||
Service (dccertmon)</shortdesc>
|
||||
<parameters>
|
||||
|
||||
<parameter name="binary" unique="0" required="0">
|
||||
<longdesc lang="en">
|
||||
Location of the DC Certificate Monitor binary (dccertmon)
|
||||
</longdesc>
|
||||
<shortdesc lang="en">DC Certificate Monitor binary (dccertmon)</shortdesc>
|
||||
<content type="string" default="${OCF_RESKEY_binary_default}" />
|
||||
</parameter>
|
||||
|
||||
<parameter name="config" unique="0" required="0">
|
||||
<longdesc lang="en">
|
||||
Location of the DC Certificate Monitor (dccertmon) configuration file
|
||||
</longdesc>
|
||||
<shortdesc lang="en">DC Certificate Monitor (dccertmon registry) config file</shortdesc>
|
||||
<content type="string" default="${OCF_RESKEY_config_default}" />
|
||||
</parameter>
|
||||
|
||||
<parameter name="user" unique="0" required="0">
|
||||
<longdesc lang="en">
|
||||
User running DC Certificate Monitor (dccertmon)
|
||||
</longdesc>
|
||||
<shortdesc lang="en">DC Certificate Monitor (dccertmon) user</shortdesc>
|
||||
<content type="string" default="${OCF_RESKEY_user_default}" />
|
||||
</parameter>
|
||||
|
||||
<parameter name="pid" unique="0" required="0">
|
||||
<longdesc lang="en">
|
||||
The pid file to use for this DC Certificate Monitor (dccertmon) instance
|
||||
</longdesc>
|
||||
<shortdesc lang="en">DC Certificate Monitor (dccertmon) pid file</shortdesc>
|
||||
<content type="string" default="${OCF_RESKEY_pid_default}" />
|
||||
</parameter>
|
||||
|
||||
<parameter name="additional_parameters" unique="0" required="0">
|
||||
<longdesc lang="en">
|
||||
Additional parameters to pass on to the DC Certificate Monitor (dccertmon)
|
||||
</longdesc>
|
||||
<shortdesc lang="en">Additional parameters for dccertmon</shortdesc>
|
||||
<content type="string" />
|
||||
</parameter>
|
||||
|
||||
</parameters>
|
||||
|
||||
<actions>
|
||||
<action name="start" timeout="20" />
|
||||
<action name="stop" timeout="20" />
|
||||
<action name="status" timeout="20" />
|
||||
<action name="monitor" timeout="10" interval="5" />
|
||||
<action name="validate-all" timeout="5" />
|
||||
<action name="meta-data" timeout="5" />
|
||||
</actions>
|
||||
</resource-agent>
|
||||
END
|
||||
}
|
||||
|
||||
#######################################################################
|
||||
# Functions invoked by resource manager actions
|
||||
|
||||
dccertmon_validate() {
|
||||
local rc
|
||||
|
||||
check_binary $OCF_RESKEY_binary
|
||||
check_binary curl
|
||||
check_binary tr
|
||||
check_binary grep
|
||||
check_binary cut
|
||||
check_binary head
|
||||
|
||||
# A config file on shared storage that is not available
|
||||
# during probes is OK.
|
||||
if [ ! -f $OCF_RESKEY_config ]; then
|
||||
if ! ocf_is_probe; then
|
||||
ocf_log err "Config $OCF_RESKEY_config doesn't exist"
|
||||
return $OCF_ERR_INSTALLED
|
||||
fi
|
||||
ocf_log_warn "Config $OCF_RESKEY_config not available during a probe"
|
||||
fi
|
||||
|
||||
getent passwd $OCF_RESKEY_user >/dev/null 2>&1
|
||||
rc=$?
|
||||
if [ $rc -ne 0 ]; then
|
||||
ocf_log err "User $OCF_RESKEY_user doesn't exist"
|
||||
return $OCF_ERR_INSTALLED
|
||||
fi
|
||||
|
||||
true
|
||||
}
|
||||
|
||||
dccertmon_status() {
|
||||
local pid
|
||||
local rc
|
||||
|
||||
if [ ! -f $OCF_RESKEY_pid ]; then
|
||||
ocf_log info "DC Certificate Monitor (dccertmon) is not running"
|
||||
return $OCF_NOT_RUNNING
|
||||
else
|
||||
pid=`cat $OCF_RESKEY_pid`
|
||||
fi
|
||||
|
||||
ocf_run -warn kill -s 0 $pid
|
||||
rc=$?
|
||||
if [ $rc -eq 0 ]; then
|
||||
return $OCF_SUCCESS
|
||||
else
|
||||
ocf_log info "Old PID file found, but DC Certificate Monitor (dccertmon) is not running"
|
||||
rm -f $OCF_RESKEY_pid
|
||||
return $OCF_NOT_RUNNING
|
||||
fi
|
||||
}
|
||||
|
||||
dccertmon_monitor() {
|
||||
local rc
|
||||
|
||||
dccertmon_status
|
||||
rc=$?
|
||||
|
||||
# If status returned anything but success, return that immediately
|
||||
if [ $rc -ne $OCF_SUCCESS ]; then
|
||||
return $rc
|
||||
fi
|
||||
|
||||
ocf_log debug "DC Certificate Monitor (dccertmon) monitor succeeded"
|
||||
return $OCF_SUCCESS
|
||||
}
|
||||
|
||||
dccertmon_start() {
|
||||
local rc
|
||||
|
||||
dccertmon_status
|
||||
rc=$?
|
||||
if [ $rc -eq $OCF_SUCCESS ]; then
|
||||
ocf_log info "DC Certificate Monitor (dccertmon) already running"
|
||||
return $OCF_SUCCESS
|
||||
fi
|
||||
|
||||
# Change the working dir to /, to be sure it's accesible
|
||||
cd /
|
||||
|
||||
# run the actual dccertmon daemon. Don't use ocf_run as we're sending the tool's output
|
||||
# straight to /dev/null anyway and using ocf_run would break stdout-redirection here.
|
||||
su ${OCF_RESKEY_user} -s /bin/sh -c "${OCF_RESKEY_binary} --config-file=$OCF_RESKEY_config \
|
||||
$OCF_RESKEY_additional_parameters"' >> /dev/null 2>&1 & echo $!' > $OCF_RESKEY_pid
|
||||
|
||||
# Spin waiting for the server to come up.
|
||||
# Let the CRM/LRM time us out if required
|
||||
while true; do
|
||||
dccertmon_monitor
|
||||
rc=$?
|
||||
[ $rc -eq $OCF_SUCCESS ] && break
|
||||
if [ $rc -ne $OCF_NOT_RUNNING ]; then
|
||||
ocf_log err "DC Certificate Monitor (dccertmon) start failed"
|
||||
exit $OCF_ERR_GENERIC
|
||||
fi
|
||||
sleep 1
|
||||
done
|
||||
|
||||
ocf_log info "DC Certificate Monitor (dccertmon) started"
|
||||
return $OCF_SUCCESS
|
||||
}
|
||||
|
||||
dccertmon_confirm_stop() {
|
||||
local my_bin
|
||||
local my_processes
|
||||
|
||||
my_binary=`which ${OCF_RESKEY_binary}`
|
||||
my_processes=`pgrep -l -f "^(python|/usr/bin/python|/usr/bin/python3) ${my_binary}([^\w-]|$)"`
|
||||
|
||||
if [ -n "${my_processes}" ]
|
||||
then
|
||||
ocf_log info "About to SIGKILL the following: ${my_processes}"
|
||||
pkill -KILL -f "^(python|/usr/bin/python|/usr/bin/python3) ${my_binary}([^\w-]|$)"
|
||||
fi
|
||||
}
|
||||
|
||||
dccertmon_stop() {
|
||||
local rc
|
||||
local pid
|
||||
|
||||
dccertmon_status
|
||||
rc=$?
|
||||
if [ $rc -eq $OCF_NOT_RUNNING ]; then
|
||||
ocf_log info "DC Certificate Monitor (dccertmon) already stopped"
|
||||
dccertmon_confirm_stop
|
||||
return $OCF_SUCCESS
|
||||
fi
|
||||
|
||||
# Try SIGTERM
|
||||
pid=`cat $OCF_RESKEY_pid`
|
||||
ocf_run kill -s TERM $pid
|
||||
rc=$?
|
||||
if [ $rc -ne 0 ]; then
|
||||
ocf_log err "DC Certificate Monitor (dccertmon) couldn't be stopped"
|
||||
dccertmon_confirm_stop
|
||||
exit $OCF_ERR_GENERIC
|
||||
fi
|
||||
|
||||
# stop waiting
|
||||
shutdown_timeout=15
|
||||
if [ -n "$OCF_RESKEY_CRM_meta_timeout" ]; then
|
||||
shutdown_timeout=$((($OCF_RESKEY_CRM_meta_timeout/1000)-5))
|
||||
fi
|
||||
count=0
|
||||
while [ $count -lt $shutdown_timeout ]; do
|
||||
dccertmon_status
|
||||
rc=$?
|
||||
if [ $rc -eq $OCF_NOT_RUNNING ]; then
|
||||
break
|
||||
fi
|
||||
count=`expr $count + 1`
|
||||
sleep 1
|
||||
ocf_log debug "DC Certificate Monitor (dccertmon) still hasn't stopped yet. Waiting ..."
|
||||
done
|
||||
|
||||
dccertmon_status
|
||||
rc=$?
|
||||
if [ $rc -ne $OCF_NOT_RUNNING ]; then
|
||||
# SIGTERM didn't help either, try SIGKILL
|
||||
ocf_log info "DC Certificate Monitor (dccertmon) failed to stop after ${shutdown_timeout}s \
|
||||
using SIGTERM. Trying SIGKILL ..."
|
||||
ocf_run kill -s KILL $pid
|
||||
fi
|
||||
dccertmon_confirm_stop
|
||||
|
||||
ocf_log info "DC Certificate Monitor (dccertmon) stopped"
|
||||
|
||||
rm -f $OCF_RESKEY_pid
|
||||
|
||||
return $OCF_SUCCESS
|
||||
}
|
||||
|
||||
#######################################################################
|
||||
|
||||
case "$1" in
|
||||
meta-data) meta_data
|
||||
exit $OCF_SUCCESS;;
|
||||
usage|help) usage
|
||||
exit $OCF_SUCCESS;;
|
||||
esac
|
||||
|
||||
# Anything except meta-data and help must pass validation
|
||||
dccertmon_validate || exit $?
|
||||
|
||||
# What kind of method was invoked?
|
||||
case "$1" in
|
||||
start) dccertmon_start;;
|
||||
stop) dccertmon_stop;;
|
||||
status) dccertmon_status;;
|
||||
monitor) dccertmon_monitor;;
|
||||
validate-all) ;;
|
||||
*) usage
|
||||
exit $OCF_ERR_UNIMPLEMENTED;;
|
||||
esac
|
||||
|
@ -26,6 +26,7 @@ packages =
|
||||
dcorch
|
||||
dcdbsync
|
||||
dcagent
|
||||
dccertmon
|
||||
|
||||
[entry_points]
|
||||
console_scripts =
|
||||
@ -43,8 +44,10 @@ console_scripts =
|
||||
dcorch-api-proxy = dcorch.cmd.api_proxy:main
|
||||
dcdbsync-api = dcdbsync.cmd.api:main
|
||||
dcagent-api = dcagent.cmd.audit:main
|
||||
dccertmon = dccertmon.cmd.cert_mon:main
|
||||
|
||||
oslo.config.opts =
|
||||
dccertmon.cmd.cert_mon.config = dccertmon.common.config:list_opts
|
||||
dcagent.common.config = dcagent.common.config:list_opts
|
||||
dcagent.common.api.api_config = dcagent.api.api_config:list_opts
|
||||
dcorch.common.config = dcorch.common.config:list_opts
|
||||
|
@ -91,7 +91,7 @@ setenv =
|
||||
{[testenv]setenv}
|
||||
PYTHONPATH = {toxinidir}
|
||||
commands =
|
||||
pylint {posargs} dccommon dcdbsync dcmanager dcorch dcagent --rcfile=./.pylintrc
|
||||
pylint {posargs} dccommon dcdbsync dcmanager dcorch dcagent dccertmon --rcfile=./.pylintrc
|
||||
|
||||
[testenv:black]
|
||||
# This environment checks and displays the recommended changes by Black for formatting
|
||||
|
Loading…
x
Reference in New Issue
Block a user