Report Tool: Modularize code
Currently, all of the code for the plugin algorithms is in one file, and all of the code for the correlator is in one file as well. As the correlator begins to find more failures/events/state changes and more plugin algorithms are added to the tool, these files will begin to get very long. In order for the code to be more modular and easier to extend. This update - Splits the plugin algorithms into individual files. - Adds a --report command line option to collect so that the report analysis can be run as part of the collect operation and include the results in the collect bundle itself. - Support running tool manually on any system node. - Organize tool files in git so it can be run directly from git. - Removes the unnecessary report 'tool' directory in report. - Moves the report_analysis output directory to the collect bundle itselt or redirects to /tmp/report_analysis if the collect bundle directory is not writable by the current user. - Adds support for developer plugins in /etc/collect/plugins - Removed ^M's from execution_engine.py - Added debug logging to the report.log file - Removed some default on logging making report output less verbose - Added a --file option so it can be pointed at a bundle.tar file Usage: Run report as part of collect - Analyse during collect. Package results > collect all --report Note: The analysis is stored as report_analysis.tgz inside the bundle Run On System - Point to scratch or tar file in scratch > /usr/local/bin/report/report.py --directory /scratch > /usr/local/bin/report/report.py --file /scratch/<bundle.tar> Run from utilities Git - > stx/utilities/collector/debian-scripts/report/report.py -d /path Note: The analysis is placed with the bundle in 'report_analysis' dir . ├── ALL_NODES_20221211.214001.tar ├── ALL_NODES_20221211.214001 │ ├── controller-0_20221211.214001 │ ├── controller-0_20221211.214001.tgz │ ├── controller-1_20221211.214001 │ ├── controller-1_20221211.214001.tgz │ └── report_analysis Test Plan: PASS: Verify tool is packaged in Debian PASS: Verify tool and all files are inserted into 'collect' tarballs PASS: Verify alarm plugin algorithm is working PASS: Verify daemon failures plugin algorithm is working PASS: Verify heartbeat loss plugin algorithm is working PASS: Verify maintenance errors plugin algorithm is working PASS: Verify process failures plugin algorithm is working PASS: Verify puppet errors plugin algorithm is working PASS: Verify substring plugin algorithm is working PASS: Verify swact activity plugin algorithm is working PASS: Verify state changes plugin algorithm is working PASS: Verify system info plugin algorithm is working PASS: Verify failures and correct root causes are found by correlator PASS: Verify significant events are found by correlator PASS: Verify state changes are found by correlator PASS: Verify results from plugin algorithms are printed into files PASS: Verify report tool is not double packaged for subcloud collects PASS: Verify the new --report option is passed to subcloud collect PASS: Verify creating a plugin list from both built-in as well as local host designer plugins. PASS: Verify the -d | --directory option handling; success and failure PASS: Verify the -f | --file option handling; success and failure PASS: Verify new collect --report option and when used the currently collected bundle gets auto analyzed and that analysis is included in the bundle. PASS: Verify report tool can be run on any host in the system. PASS: Verify report tool can be run directly from its git. PASS: Verify tool presents a list of collect bundles to analyze when pointed to a directory containing more than one bundle. User can select one of the presented bundles to analyze. PASS: Verify tool logging to stdio and log files ; also debug logs PASS: Verify analysis is created in the bundle directory PASS: Verify analysis output when run using the collect --report option PASS: Verify pep8 static analysis Story: 2010166 Task: 47033 Authored-By: Angela Mao <Angela.Mao@windriver.com Signed-off-by: Eric MacDonald <eric.macdonald@windriver.com> Change-Id: Ifa17588de1297d7e199c667a05cb1d21e6ae0382
This commit is contained in:
parent
5733f8bac5
commit
93a7fb0177
@ -368,6 +368,10 @@ function print_help()
|
||||
echo ""
|
||||
echo "collect [--skip-mask] ... skip masking of collect data"
|
||||
echo ""
|
||||
echo "Create a collect report"
|
||||
echo ""
|
||||
echo "collect [--report | -r ] ... run the collect report tool on the collected bundle"
|
||||
echo ""
|
||||
echo "Examples:"
|
||||
echo ""
|
||||
echo "collect ... all logs for current host"
|
||||
@ -387,6 +391,7 @@ function print_help()
|
||||
# command line arguement variables ; defaulted
|
||||
DEBUG=false
|
||||
CLEAN=false
|
||||
REPORT=false
|
||||
VERBOSE=false
|
||||
SKIP_MASK=false
|
||||
INVENTORY=false
|
||||
@ -634,6 +639,10 @@ while [[ ${#} -gt 0 ]] ; do
|
||||
VERBOSE=true
|
||||
;;
|
||||
|
||||
-r| --report)
|
||||
REPORT=true
|
||||
;;
|
||||
|
||||
--clean)
|
||||
CLEAN=true
|
||||
;;
|
||||
@ -1915,9 +1924,14 @@ function collect_subcloud_run()
|
||||
# all hosts in a subcloud are collected
|
||||
collect_cmd+=("-a")
|
||||
|
||||
# pass the report option to the subcloud if specified
|
||||
if [ "${REPORT}" = true ] ; then
|
||||
collect_cmd+=("-r")
|
||||
fi
|
||||
|
||||
# all subcloud hosts are collected in parallel unless legacy more is specified
|
||||
if [ "${PARALLEL_COLLECT_MODE}" = false ] ; then
|
||||
collect_cmd+=("-in") ;
|
||||
collect_cmd+=("-in")
|
||||
fi
|
||||
if [ "${DEBUG}" = true ] ; then
|
||||
collect_cmd+=("-d")
|
||||
@ -3033,16 +3047,15 @@ collect_subclouds()
|
||||
|
||||
function get_report_tool()
|
||||
{
|
||||
local local_dest=${1}
|
||||
local local_dest="${1}"
|
||||
local local_path="/usr/local/bin/report"
|
||||
|
||||
mkdir -p ${local_dest}
|
||||
cp -r /usr/local/bin/report/tool ${local_dest}
|
||||
mkdir -p "${local_dest}"
|
||||
cp -a "${local_path}" "${local_dest}"
|
||||
|
||||
local rc=${?}
|
||||
if [ ${rc} -ne ${PASS} ] ; then
|
||||
report_error "failed to get report tool from /usr/local/bin" ${rc}
|
||||
else
|
||||
ilog "copied report tool from host"
|
||||
report_error "failed to get report tool from ${local_path}" ${rc}
|
||||
fi
|
||||
}
|
||||
|
||||
@ -3058,16 +3071,15 @@ function get_report_tool()
|
||||
|
||||
function get_report_plugins()
|
||||
{
|
||||
local local_dest=${1}
|
||||
local local_dest="${1}"
|
||||
local local_path="/etc/collect/plugins"
|
||||
|
||||
mkdir -p ${local_dest}
|
||||
cp -r /etc/collect/plugins ${local_dest}
|
||||
mkdir -p "${local_dest}"
|
||||
cp -a "${local_path}" "${local_dest}"
|
||||
|
||||
local rc=${?}
|
||||
if [ ${rc} -ne ${PASS} ] ; then
|
||||
report_error "failed to get report plugins from /etc/collect" ${rc}
|
||||
else
|
||||
ilog "copied plugins for report tool from host"
|
||||
report_error "failed to get report plugins from ${local_path}" ${rc}
|
||||
fi
|
||||
}
|
||||
|
||||
@ -3157,22 +3169,50 @@ fi
|
||||
#
|
||||
############################################################################
|
||||
|
||||
echo -n "creating ${COLLECT_TYPE} tarball ${TARBALL_NAME} ... "
|
||||
|
||||
remove_file_local ${COLLECT_ERROR_LOG}
|
||||
remove_file_local ${HOST_COLLECT_ERROR_LOG}
|
||||
get_report_tool ${COLLECT_DIR}/report
|
||||
get_report_plugins ${COLLECT_DIR}/report
|
||||
|
||||
cd ${COLLECT_DIR}
|
||||
tar -czf report_tool.tgz report
|
||||
rc=${?}
|
||||
if [ ${rc} -ne ${PASS} ] ; then
|
||||
report_error "failed to tar report tool" ${rc}
|
||||
else
|
||||
rm -r report
|
||||
if [ "${SUBCLOUD_COLLECT}" = false ] ; then
|
||||
|
||||
# Copy the Report tool to the collect bundle
|
||||
get_report_tool ${COLLECT_DIR}
|
||||
|
||||
# Copy developer report tool plugins to the collect bundle
|
||||
get_report_plugins ${COLLECT_DIR}/report
|
||||
|
||||
if [ ${?} -eq 0 -a -e ./report ] ; then
|
||||
if [ "${REPORT}" = true ] ; then
|
||||
echo ""
|
||||
echo "------------- Collect Report ------------------"
|
||||
# run the report tool
|
||||
${COLLECT_DIR}/report/report.py -d ${COLLECT_DIR}
|
||||
echo "-----------------------------------------------"
|
||||
echo ""
|
||||
|
||||
# cleanup and tar the report tool and analysis
|
||||
rm -rf ${COLLECT_DIR}/report/plugin_algs/__pycache__
|
||||
rm -rf ${COLLECT_DIR}/report/__pycache__
|
||||
|
||||
# include the report analysis in the bundle
|
||||
if [ -d ${COLLECT_DIR}/report_analysis ] ; then
|
||||
tar -czf report_analysis.tgz report_analysis
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
|
||||
# include the report tool in the bundle.
|
||||
tar -czf report_tool.tgz report
|
||||
|
||||
# cleanup after the report tool so that the extracted collect
|
||||
# tarballs are not included in the bundling below.
|
||||
for d in `ls` ; do
|
||||
[ -d ./$d ] && remove_dir_local "./$d"
|
||||
done
|
||||
fi
|
||||
|
||||
echo -n "creating ${COLLECT_TYPE} tarball ${TARBALL_NAME} ... "
|
||||
/usr/bin/expect << EOF
|
||||
log_user ${USER_LOG_MODE}
|
||||
spawn bash -i
|
||||
|
@ -13,14 +13,7 @@ SELECT_NODES_20220527.193605
|
||||
│ ├── etc
|
||||
│ ├── root
|
||||
│ └── var
|
||||
├── report
|
||||
├── plugins (where the plugin files will be placed)
|
||||
│ ├── alarm
|
||||
│ ├── substring
|
||||
│ └── ...
|
||||
├── tool (where the tool will be placed)
|
||||
└── output (where the output files will be placed)
|
||||
|
||||
└── report_analysis (where the output files will be placed)
|
||||
|
||||
> cat plugins/alarm
|
||||
|
||||
@ -36,7 +29,7 @@ hosts=controllers
|
||||
substring=operation failed
|
||||
substring=Failed to send message
|
||||
|
||||
> report/tool/report.py --start 20220501 --end 20220530
|
||||
> report/report.py --start 20220501 --end 20220530
|
||||
|
||||
Running the command above will populate the report folder with output files.
|
||||
The tool also provides default values, more details are in 'report.py -h'.
|
||||
@ -55,27 +48,48 @@ For more detailed information about an algorithm use 'report.py <algorithm> -h'.
|
||||
|
||||
Here is the report directory after running the above command
|
||||
|
||||
report
|
||||
├── output
|
||||
│ └── SELECT_NODES_20220527.193605 (collect bundle that the report tool was run on)
|
||||
│ ├── plugins (output files for plugins)
|
||||
│ │ ├── alarm
|
||||
│ │ └── ...
|
||||
│ ├── correlator_failures
|
||||
│ ├── correlator_events
|
||||
│ ├── correlator_state_changes
|
||||
│ ├── report.log (log file for report tool)
|
||||
│ └── untar.log (log file for untarring collect bundle and host tar files)
|
||||
├── plugins (where the plugins files are)
|
||||
└── tool (where the report tool is)
|
||||
.
|
||||
├── ALL_NODES_20221211.214001.tar
|
||||
└── report_analysis
|
||||
├── correlator_alarms
|
||||
├── correlator_events
|
||||
├── correlator_failures
|
||||
├── correlator_state_changes
|
||||
├── plugins
|
||||
│ ├── alarm
|
||||
│ ├── daemon_failures
|
||||
│ ├── heartbeat_loss
|
||||
│ ├── log
|
||||
│ ├── maintenance_errors
|
||||
│ ├── process_failures
|
||||
│ ├── puppet_errors
|
||||
│ ├── state_changes
|
||||
│ ├── substring_compute-0
|
||||
│ ├── substring_compute-1
|
||||
│ ├── substring_compute-2
|
||||
│ ├── substring_controller-0
|
||||
│ ├── substring_controller-1
|
||||
│ ├── swact_activity
|
||||
│ └── system_info
|
||||
├── report.log
|
||||
└── untar.log
|
||||
|
||||
The report tool also allows users to point it at any collect bundle and
|
||||
have it automatically extract the tarball and tar files for each host
|
||||
before running.
|
||||
|
||||
> report/tool/report.py -d CGTS-19143
|
||||
The report tool can be run to analyze a collect bundle either on or off
|
||||
system by specifying the bundle to analyze using the --directory or
|
||||
-d <directory> command option.
|
||||
|
||||
Users may specify if they want the correlator to only find events
|
||||
and state changes for a specific host.
|
||||
> report/tool/report.py -d <directory containing collect bundle>
|
||||
|
||||
Users may specify if they want the correlator to only find events,
|
||||
alarm transitions, and state changes for a specific host.
|
||||
|
||||
> report/tool/report.py --hostname controller-0
|
||||
|
||||
The report tool can also be run automatically during the collect
|
||||
by using the collect --report or -r option. The report_analysis
|
||||
will be tared and included in the collect bundle when the --report
|
||||
option is used.
|
||||
|
@ -10,9 +10,10 @@
|
||||
# The Correlator class contains algorithms that search for failures.
|
||||
#
|
||||
# The Correlator class reads through all the output files created by
|
||||
# the plugins and detects failures. A summary of the failures and their
|
||||
# causes are printed to standard output and an output file is created
|
||||
# in the report directory.
|
||||
# the plugins and determines failures and their root causes, as well as
|
||||
# finds significant events, alarms transitions, and state changes.
|
||||
# A summary of the findings are printed to standard output and output
|
||||
# files are created in the report directory.
|
||||
#
|
||||
# TODO: Modularize code and separate methods into their own files
|
||||
#
|
||||
@ -24,7 +25,6 @@ import logging
|
||||
import os
|
||||
import re
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
|
@ -12,62 +12,111 @@
|
||||
# The ExecutionEngine class runs plugins and gathers relevant logs and
|
||||
# information, creating output files in the report directory.
|
||||
#
|
||||
# TODO: Modularize code and separate plugin algorithms into their own
|
||||
# files
|
||||
# Futures:
|
||||
#
|
||||
# 1. Improve how report determines the active controller.
|
||||
# Specifically what controller was active at the time of detected
|
||||
# failure rather than what controller was active when collect ran.
|
||||
#
|
||||
# 2. Consider running plugins in parallel threads
|
||||
#
|
||||
########################################################################
|
||||
|
||||
from datetime import datetime
|
||||
import gzip
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
import subprocess
|
||||
import sys
|
||||
import tarfile
|
||||
|
||||
import algorithms
|
||||
from correlator import Correlator
|
||||
from plugin_algs.alarm import alarm
|
||||
from plugin_algs.audit import audit
|
||||
from plugin_algs.daemon_failures import daemon_failures
|
||||
from plugin_algs.heartbeat_loss import heartbeat_loss
|
||||
from plugin_algs.maintenance_errors import maintenance_errors
|
||||
from plugin_algs.process_failures import process_failures
|
||||
from plugin_algs.puppet_errors import puppet_errors
|
||||
from plugin_algs.state_changes import state_changes
|
||||
from plugin_algs.substring import substring
|
||||
from plugin_algs.swact_activity import swact_activity
|
||||
from plugin_algs.system_info import system_info
|
||||
|
||||
# don't generate __pycache__ dir and files
|
||||
sys.dont_write_bytecode = True
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class ExecutionEngine:
|
||||
def __init__(self, opts, output_directory):
|
||||
def __init__(self, opts, input_dir, output_dir):
|
||||
"""Constructor for the ExecutionEngine class
|
||||
|
||||
Parameters:
|
||||
opts (dictionary): Options from command line
|
||||
output_dir (string): directory to put output files
|
||||
"""
|
||||
# don't generate __pycache__ dir and files
|
||||
sys.dont_write_bytecode = True
|
||||
self.opts = opts
|
||||
self.hosts = {"controllers": {}, "workers": {}, "storages": {}}
|
||||
self.active_controller_directory = None
|
||||
self.host_dirs = []
|
||||
|
||||
if not os.path.isdir(input_dir):
|
||||
logger.error("Error: Invalid input directory: %s", input_dir)
|
||||
sys.exit("... exiting")
|
||||
self.input_dir = input_dir
|
||||
if not os.path.isdir(output_dir):
|
||||
logger.error("Error: Invalid output directory : %s", output_dir)
|
||||
sys.exit("... exiting")
|
||||
self.output_dir = output_dir
|
||||
|
||||
# Uncompresses host tar files if not already done
|
||||
with open(os.path.join(output_directory, "untar.log"), "a") as logfile:
|
||||
for obj in (os.scandir(self.opts.directory)):
|
||||
with open(os.path.join(output_dir, "untar.log"), "a") as logfile:
|
||||
for obj in (os.scandir(self.input_dir)):
|
||||
info = os.path.splitext(obj.name)
|
||||
logger.debug("Host File Info: %s", info)
|
||||
if (obj.is_file() and obj.name != "report_tool.tgz" and
|
||||
tarfile.is_tarfile(obj.path) and not
|
||||
os.path.isdir(os.path.join(self.opts.directory,
|
||||
info[0]))):
|
||||
os.path.isdir(os.path.join(self.input_dir, info[0]))):
|
||||
try:
|
||||
logger.info("extracting %s", obj.name)
|
||||
subprocess.run(["tar", "xzfC", obj.path,
|
||||
self.opts.directory],
|
||||
self.input_dir],
|
||||
stderr=logfile, check=True)
|
||||
subprocess.run(["echo", "uncompressed", obj.name],
|
||||
check=True)
|
||||
except subprocess.CalledProcessError as e:
|
||||
logger.error(e)
|
||||
|
||||
for folder in (f.path for f in os.scandir(self.opts.directory)):
|
||||
database_path = os.path.join(folder, "var", "extra", "database")
|
||||
host_info_path = os.path.join(folder, "var", "extra", "host.info")
|
||||
# TODO: Need a better way to figure out the active controller
|
||||
# Consider getting the system infop from all hosts.
|
||||
#
|
||||
# Determine the active controller and load system info from it.
|
||||
for folder in (f.path for f in os.scandir(input_dir)):
|
||||
logger.debug("folder: %s", os.path.basename(folder))
|
||||
# skip over the tarballs
|
||||
if not os.path.isdir(folder):
|
||||
continue
|
||||
|
||||
if os.path.isdir(database_path) and os.listdir(database_path):
|
||||
host_dir = folder
|
||||
extra_path = os.path.join(host_dir, "var", "extra")
|
||||
database_path = os.path.join(host_dir, extra_path, "database")
|
||||
host_info_path = os.path.join(host_dir, extra_path, "host.info")
|
||||
|
||||
if os.path.isdir(host_dir):
|
||||
# don't analyse a directory that doesn't contain
|
||||
# a 'var/extra' dir.
|
||||
if not os.path.exists(extra_path):
|
||||
continue
|
||||
|
||||
if os.path.exists(database_path):
|
||||
if os.listdir(database_path):
|
||||
self.active_controller_directory = folder
|
||||
|
||||
self.host_dirs.append(host_dir)
|
||||
logger.debug("Host Dirs: %s", self.host_dirs)
|
||||
|
||||
if os.path.exists(host_info_path):
|
||||
hostname, subfunction = self._extract_subfunction(
|
||||
host_info_path)
|
||||
@ -78,29 +127,37 @@ class ExecutionEngine:
|
||||
elif "storage" in subfunction:
|
||||
self.hosts["storages"][hostname] = folder
|
||||
|
||||
self.active_controller_directory = folder
|
||||
if not self.active_controller_directory:
|
||||
raise ValueError("Active controller not found")
|
||||
|
||||
def execute(self, plugins, output_directory):
|
||||
def execute(self, plugins, output_dir):
|
||||
"""Run a list of plugins
|
||||
|
||||
Parameters:
|
||||
plugins (Plugin list): List of plugins to run
|
||||
output_dir (string): directory to put output files
|
||||
|
||||
Errors:
|
||||
FileNotFoundError
|
||||
"""
|
||||
plugin_output_dir = os.path.join(output_directory, "plugins")
|
||||
plugin_output_dir = os.path.join(output_dir, "plugins")
|
||||
os.makedirs(plugin_output_dir, exist_ok=True)
|
||||
|
||||
if self.opts.verbose:
|
||||
logger.info("Output files for plugins can be found at " +
|
||||
os.path.abspath(plugin_output_dir))
|
||||
if self.opts.debug:
|
||||
logger.debug("Processing Plugins for hosts: %s", self.host_dirs)
|
||||
|
||||
for plugin in plugins:
|
||||
processing = "Processing plugin: " + os.path.basename(plugin.file)
|
||||
hosts = {}
|
||||
if (
|
||||
plugin.state["hosts"] and len(plugin.state["hosts"]) >= 1
|
||||
): # if host list is given
|
||||
logger.info(
|
||||
f"Processing plugin: {os.path.basename(plugin.file)}")
|
||||
if self.opts.debug:
|
||||
logger.debug(processing)
|
||||
|
||||
for h in plugin.state["hosts"]:
|
||||
if h == "all":
|
||||
@ -114,7 +171,8 @@ class ExecutionEngine:
|
||||
|
||||
events = []
|
||||
if plugin.state["algorithm"] == algorithms.SUBSTRING:
|
||||
events = self.substring(
|
||||
events = substring(
|
||||
self.opts.start, self.opts.end,
|
||||
plugin.state["substring"],
|
||||
[
|
||||
os.path.join(folderpath, file)
|
||||
@ -128,8 +186,8 @@ class ExecutionEngine:
|
||||
f"substring_{hostname}",
|
||||
)
|
||||
if self.opts.verbose:
|
||||
logger.info("output at "
|
||||
+ os.path.relpath(output_file))
|
||||
logger.info("... output at "
|
||||
+ os.path.abspath(output_file))
|
||||
with open(output_file, "w") as file:
|
||||
file.write(
|
||||
f"Date range: {self.opts.start} until "
|
||||
@ -146,20 +204,19 @@ class ExecutionEngine:
|
||||
file.write(line + "\n")
|
||||
else:
|
||||
if plugin.state["algorithm"] == algorithms.SYSTEM_INFO:
|
||||
info = self.system_info()
|
||||
for host_dir in self.host_dirs:
|
||||
info = system_info(host_dir)
|
||||
system_info_output = os.path.join(plugin_output_dir,
|
||||
"system_info")
|
||||
with open(system_info_output, "w") as file:
|
||||
for i in info:
|
||||
file.write(i + "\n")
|
||||
|
||||
for k, v in self.hosts.items():
|
||||
file.write(f"{k}: {','.join(v.keys())}\n")
|
||||
|
||||
if self.opts.verbose:
|
||||
logger.info(processing + ", output at "
|
||||
+ os.path.relpath(system_info_output))
|
||||
else:
|
||||
logger.info(processing)
|
||||
logger.info(processing + ", output at " +
|
||||
os.path.abspath(system_info_output))
|
||||
|
||||
elif plugin.state["algorithm"] == algorithms.AUDIT:
|
||||
hosts = {}
|
||||
@ -171,7 +228,7 @@ class ExecutionEngine:
|
||||
self._create_output_file(
|
||||
f"{hostname}_audit",
|
||||
plugin_output_dir,
|
||||
self.audit(
|
||||
audit(
|
||||
plugin.state["start"],
|
||||
plugin.state["end"],
|
||||
os.path.join(
|
||||
@ -185,26 +242,39 @@ class ExecutionEngine:
|
||||
elif plugin.state["algorithm"] == algorithms.SWACT_ACTIVITY:
|
||||
self._create_output_file(
|
||||
"swact_activity", plugin_output_dir,
|
||||
self.swact_activity(), processing
|
||||
swact_activity(self.hosts, self.opts.start,
|
||||
self.opts.end),
|
||||
processing
|
||||
)
|
||||
|
||||
elif plugin.state["algorithm"] == algorithms.PUPPET_ERRORS:
|
||||
self._create_output_file(
|
||||
"puppet_errors", plugin_output_dir,
|
||||
self.puppet_errors(), processing
|
||||
puppet_errors(self.hosts, self.opts.start,
|
||||
self.opts.end),
|
||||
processing
|
||||
)
|
||||
|
||||
elif plugin.state["algorithm"] == algorithms.PROCESS_FAILURES:
|
||||
self._create_output_file(
|
||||
"process_failures", plugin_output_dir,
|
||||
self.process_failures(), processing
|
||||
process_failures(self.hosts, self.opts.start,
|
||||
self.opts.end),
|
||||
processing
|
||||
)
|
||||
|
||||
elif plugin.state["algorithm"] == algorithms.ALARM:
|
||||
alarms, logs = self.alarm(
|
||||
for host_dir in self.host_dirs:
|
||||
|
||||
alarms, logs = alarm(
|
||||
host_dir,
|
||||
self.opts.start, self.opts.end,
|
||||
plugin.state["alarm_exclude"],
|
||||
plugin.state["entity_exclude"]
|
||||
)
|
||||
if alarms is None and logs is None:
|
||||
continue
|
||||
|
||||
alarm_output = os.path.join(plugin_output_dir, "alarm")
|
||||
log_output = os.path.join(plugin_output_dir, "log")
|
||||
|
||||
@ -225,474 +295,60 @@ class ExecutionEngine:
|
||||
for date in v["dates"]:
|
||||
file.write(f" {date}\n")
|
||||
if self.opts.verbose:
|
||||
logger.info(processing + ", output at "
|
||||
+ os.path.relpath(alarm_output)
|
||||
+ ", " + os.path.relpath(log_output))
|
||||
else:
|
||||
logger.info(processing)
|
||||
elif self.opts.debug:
|
||||
logger.debug(processing + ", output at " +
|
||||
os.path.abspath(alarm_output) +
|
||||
", " + os.path.abspath(log_output))
|
||||
|
||||
elif plugin.state["algorithm"] == algorithms.HEARTBEAT_LOSS:
|
||||
self._create_output_file(
|
||||
"heartbeat_loss", plugin_output_dir,
|
||||
self.heartbeat_loss(), processing
|
||||
heartbeat_loss(self.hosts, self.opts.start,
|
||||
self.opts.end),
|
||||
processing
|
||||
)
|
||||
elif plugin.state["algorithm"] == algorithms.MAINTENANCE_ERR:
|
||||
self._create_output_file(
|
||||
"maintenance_errors", plugin_output_dir,
|
||||
self.maintenance_errors(), processing
|
||||
maintenance_errors(self.hosts, self.opts.start,
|
||||
self.opts.end),
|
||||
processing
|
||||
)
|
||||
elif plugin.state["algorithm"] == algorithms.DAEMON_FAILURES:
|
||||
self._create_output_file(
|
||||
"daemon_failures", plugin_output_dir,
|
||||
self.daemon_failures(), processing
|
||||
daemon_failures(self.hosts, self.opts.start,
|
||||
self.opts.end),
|
||||
processing
|
||||
)
|
||||
elif plugin.state["algorithm"] == algorithms.STATE_CHANGES:
|
||||
self._create_output_file(
|
||||
"state_changes", plugin_output_dir,
|
||||
self.state_changes(), processing
|
||||
state_changes(self.hosts, self.opts.start,
|
||||
self.opts.end),
|
||||
processing
|
||||
)
|
||||
|
||||
if not self.opts.verbose:
|
||||
logger.info("Output files for plugins can be found at " +
|
||||
os.path.relpath(plugin_output_dir))
|
||||
|
||||
# Running the correlator and printing the output from it
|
||||
self.run_correlator(output_directory, plugin_output_dir)
|
||||
|
||||
# Built-in algorithms ------------------------------
|
||||
def alarm(self, alarm_exclude=[], entity_exclude=[]):
|
||||
"""Alarm algorithm
|
||||
Gathers list of alarms and customer logs
|
||||
|
||||
Parameters:
|
||||
alarm_exclude (string list) : List of alarm id patterns to not
|
||||
search for
|
||||
entity_exclude (string list): List of entity id patterns to not
|
||||
search for
|
||||
"""
|
||||
alarm_data = {}
|
||||
log_data = {}
|
||||
|
||||
with open(
|
||||
os.path.join(
|
||||
self.active_controller_directory,
|
||||
"var", "extra", "database", "fm.db.sql.txt"
|
||||
)
|
||||
) as file:
|
||||
start = False
|
||||
for line in file:
|
||||
# start of event log
|
||||
if re.search(r"COPY (public\.)?event_log", line):
|
||||
start = True
|
||||
elif start and line == "\\.\n":
|
||||
break
|
||||
elif start:
|
||||
entry = re.split(r"\t", line)
|
||||
|
||||
INDEX_ALARM_ID = 5
|
||||
INDEX_ACTION = 6
|
||||
INDEX_ENTITY_ID = 8
|
||||
INDEX_ALARM_DATE = 9
|
||||
INDEX_SEVERITY = 10
|
||||
|
||||
alarm_id = entry[INDEX_ALARM_ID]
|
||||
entity_id = entry[INDEX_ENTITY_ID]
|
||||
action = entry[INDEX_ACTION]
|
||||
severity = entry[INDEX_SEVERITY]
|
||||
alarm_date = entry[INDEX_ALARM_DATE]
|
||||
|
||||
entry_date = alarm_date.replace(
|
||||
" ", "T"
|
||||
) # making time format of alarm the same
|
||||
if (self.opts.start <= entry_date
|
||||
and entry_date <= self.opts.end):
|
||||
cont = True
|
||||
# Checks if the alarm is in the user specified list of
|
||||
# alarm or entity ids
|
||||
for id in alarm_exclude:
|
||||
if id in alarm_id:
|
||||
cont = False
|
||||
break
|
||||
|
||||
for entity in entity_exclude:
|
||||
if entity in entity_id:
|
||||
cont = False
|
||||
break
|
||||
|
||||
if not cont:
|
||||
continue
|
||||
|
||||
try:
|
||||
if action == "log":
|
||||
log_info = log_data[
|
||||
f"{alarm_id} {entity_id} {severity}"
|
||||
]
|
||||
log_info["count"] += 1
|
||||
log_info["dates"].append(alarm_date)
|
||||
else:
|
||||
alarm_info = alarm_data[
|
||||
f"{alarm_id} {entity_id} {severity}"
|
||||
]
|
||||
alarm_info["dates"].append(
|
||||
f"{alarm_date} {action}")
|
||||
except KeyError:
|
||||
if entry[6] != "log":
|
||||
alarm_data[
|
||||
f"{alarm_id} {entity_id} {severity}"
|
||||
] = {
|
||||
"dates": [f"{alarm_date} {action}"],
|
||||
}
|
||||
else:
|
||||
log_data[
|
||||
f"{alarm_id} {entity_id} {severity}"
|
||||
] = {
|
||||
"count": 1,
|
||||
"dates": [alarm_date],
|
||||
}
|
||||
|
||||
for _, v in alarm_data.items():
|
||||
v["dates"] = sorted(v["dates"])
|
||||
temp = []
|
||||
temp.append(v["dates"][0])
|
||||
for i in range(1, len(v["dates"])):
|
||||
if v["dates"][i].split()[2] != v["dates"][i-1].split()[2]:
|
||||
temp.append(v["dates"][i])
|
||||
v["dates"] = temp
|
||||
|
||||
for _, v in log_data.items():
|
||||
v["dates"] = sorted(v["dates"])
|
||||
|
||||
return alarm_data, log_data
|
||||
|
||||
def substring(self, substr, files):
|
||||
"""Substring algorithm
|
||||
Looks for substrings within files
|
||||
|
||||
Parameters:
|
||||
substr (string list): List of substrings to look for
|
||||
files (string list): List of absolute filepaths to search in
|
||||
|
||||
Errors:
|
||||
FileNotFoundError
|
||||
"""
|
||||
# don't analyze older files, continue with current file
|
||||
CONTINUE_CURRENT = 0
|
||||
# analyze older files, continue with current file
|
||||
CONTINUE_CURRENT_OLD = 1
|
||||
|
||||
data = []
|
||||
for file in files:
|
||||
try:
|
||||
if not os.path.exists(file):
|
||||
if (re.search("controller-1_(.+)/var/log/mtcAgent.log",
|
||||
file)):
|
||||
continue
|
||||
raise FileNotFoundError(f"File not found: {file}")
|
||||
cont = True
|
||||
# Searching through file
|
||||
command = (f"""grep -Ea "{'|'.join(s for s in substr)}" """
|
||||
f"""{file} 2>/dev/null""")
|
||||
status = self._continue(file)
|
||||
|
||||
if (status == CONTINUE_CURRENT
|
||||
or status == CONTINUE_CURRENT_OLD):
|
||||
# continue with current file
|
||||
if status == CONTINUE_CURRENT:
|
||||
cont = False
|
||||
self._evaluate_substring(data, command)
|
||||
|
||||
# Searching through rotated log files that aren't compressed
|
||||
n = 1
|
||||
while os.path.exists(f"{file}.{n}") and cont:
|
||||
command = (f"""grep -Ea "{'|'.join(s for s in substr)}" """
|
||||
f"""{file}.{n} 2>/dev/null""")
|
||||
status = self._continue(f"{file}.{n}")
|
||||
|
||||
if (status == CONTINUE_CURRENT
|
||||
or status == CONTINUE_CURRENT_OLD):
|
||||
if status == CONTINUE_CURRENT:
|
||||
cont = False
|
||||
self._evaluate_substring(data, command)
|
||||
|
||||
n += 1
|
||||
|
||||
# Searching through rotated log files
|
||||
while os.path.exists(f"{file}.{n}.gz") and cont:
|
||||
command = (f"""zgrep -E "{'|'.join(s for s in substr)}" """
|
||||
f"""{file}.{n}.gz 2>/dev/null""")
|
||||
status = self._continue(f"{file}.{n}.gz", compressed=True)
|
||||
|
||||
if (status == CONTINUE_CURRENT
|
||||
or status == CONTINUE_CURRENT_OLD):
|
||||
if status == CONTINUE_CURRENT:
|
||||
cont = False
|
||||
self._evaluate_substring(data, command)
|
||||
|
||||
n += 1
|
||||
|
||||
except FileNotFoundError as e:
|
||||
logger.error(e)
|
||||
continue
|
||||
|
||||
return sorted(data)
|
||||
|
||||
def system_info(self):
|
||||
"""System info algorithm
|
||||
Presents basic information about the system
|
||||
"""
|
||||
data = []
|
||||
with open(
|
||||
os.path.join(
|
||||
self.active_controller_directory, "etc", "platform",
|
||||
"platform.conf"
|
||||
)
|
||||
) as file:
|
||||
for line in file:
|
||||
if "system_mode" in line:
|
||||
data.append(
|
||||
f"System Mode: "
|
||||
f"{re.match('^system_mode=(.*)', line).group(1)}"
|
||||
)
|
||||
elif "system_type" in line:
|
||||
data.append(
|
||||
f"System Type: "
|
||||
f"{re.match('^system_type=(.*)', line).group(1)}"
|
||||
)
|
||||
elif "distributed_cloud_role" in line:
|
||||
role = re.match('^distributed_cloud_role=(.*)',
|
||||
line).group(1)
|
||||
data.append(f"Distributed cloud role: {role}")
|
||||
elif "sw_version" in line:
|
||||
data.append(
|
||||
f"SW Version: "
|
||||
f"{re.match('^sw_version=(.*)', line).group(1)}"
|
||||
)
|
||||
with open(
|
||||
os.path.join(self.active_controller_directory, "etc", "build.info")
|
||||
) as file:
|
||||
for line in file:
|
||||
if "BUILD_TYPE" in line:
|
||||
data.append(
|
||||
f"Build Type: "
|
||||
f"{re.match('^BUILD_TYPE=(.*)', line).group(1)}"
|
||||
)
|
||||
elif re.match("^OS=(.*)", line):
|
||||
data.append(f"OS: {re.match('^OS=(.*)', line).group(1)}")
|
||||
|
||||
return data
|
||||
|
||||
def swact_activity(self):
|
||||
"""Swact activity algorithm
|
||||
Presents all swacting activity in the system
|
||||
"""
|
||||
data = []
|
||||
sm_files = []
|
||||
sm_customer_files = []
|
||||
swact_start = None
|
||||
swact_in_progress = False
|
||||
swact_end = None
|
||||
|
||||
for _, folder in self.hosts["controllers"].items():
|
||||
sm_path = os.path.join(folder, "var", "log", "sm.log")
|
||||
sm_files.append(sm_path)
|
||||
sm_customer_path = os.path.join(folder, "var", "log",
|
||||
"sm-customer.log")
|
||||
sm_customer_files.append(sm_customer_path)
|
||||
|
||||
sm_substrings = ["Uncontrolled swact", "Swact has started,",
|
||||
"Neighbor (.+) is now in the down",
|
||||
"Service (.+) has reached max failures",
|
||||
"Swact update"]
|
||||
data = self.substring(sm_substrings, sm_files)
|
||||
|
||||
for i, line in enumerate(data):
|
||||
if "Swact has started," in line and not swact_in_progress:
|
||||
swact_in_progress = True
|
||||
swact_start = datetime.strptime(line[0:19],
|
||||
"%Y-%m-%dT%H:%M:%S")
|
||||
elif "Swact update" in line and swact_in_progress:
|
||||
swact_in_progress = False
|
||||
swact_end = datetime.strptime(line[0:19], "%Y-%m-%dT%H:%M:%S")
|
||||
line += f" SWACT TOOK {swact_end - swact_start} \n"
|
||||
data[i] = line
|
||||
|
||||
sm_customer_substrings = [
|
||||
"swact", "active-failed\\s+\\| disabling-failed\\s+\\|"
|
||||
]
|
||||
data += self.substring(sm_customer_substrings, sm_customer_files)
|
||||
|
||||
return sorted(data)
|
||||
|
||||
def puppet_errors(self):
|
||||
"""Puppet errors algorithm
|
||||
Presents log errors from puppet logs
|
||||
"""
|
||||
data = []
|
||||
for host_type in self.hosts.keys():
|
||||
for _, folder in self.hosts[host_type].items():
|
||||
puppet_folder = os.path.join(folder, "var", "log", "puppet")
|
||||
command = (f"""grep -rh "[m ]Error: " {puppet_folder} """
|
||||
f"""2>/dev/null""")
|
||||
self._evaluate_substring(data, command)
|
||||
return sorted(data)
|
||||
|
||||
def process_failures(self):
|
||||
"""Process failures algorithm
|
||||
Presents log errors from pmond
|
||||
"""
|
||||
data = []
|
||||
files = []
|
||||
for host_type in self.hosts.keys():
|
||||
for _, folder in self.hosts[host_type].items():
|
||||
pmond = os.path.join(folder, "var", "log", "pmond.log")
|
||||
files.append(pmond)
|
||||
|
||||
data = self.substring(["Error :"], files)
|
||||
|
||||
return data
|
||||
|
||||
def heartbeat_loss(self):
|
||||
"""Heartbeat loss algorithm
|
||||
Presents all heartbeat loss error messages in the system
|
||||
"""
|
||||
data = []
|
||||
hb_files = []
|
||||
|
||||
for _, folder in self.hosts["controllers"].items():
|
||||
hb_path = os.path.join(folder, "var", "log", "hbsAgent.log")
|
||||
hb_files.append(hb_path)
|
||||
|
||||
hb_substrings = ["Heartbeat Loss"]
|
||||
data = self.substring(hb_substrings, hb_files)
|
||||
|
||||
return sorted(data)
|
||||
|
||||
def maintenance_errors(self):
|
||||
"""Maintenance errors algorithm
|
||||
Presents maintenance errors and other relevant log messages in system
|
||||
"""
|
||||
data = []
|
||||
mtc_files = []
|
||||
|
||||
for _, folder in self.hosts["controllers"].items():
|
||||
agent = os.path.join(folder, "var", "log", "mtcAgent.log")
|
||||
mtc_files.append(agent)
|
||||
|
||||
for host_type in self.hosts.keys():
|
||||
for _, folder in self.hosts[host_type].items():
|
||||
client = os.path.join(folder, "var", "log", "mtcClient.log")
|
||||
mtc_files.append(client)
|
||||
|
||||
mtc_substrings = ["Error : ", "Configuration failure",
|
||||
"In-Test Failure", "Loss Of Communication",
|
||||
"Graceful Recovery Wait ",
|
||||
"regained MTCALIVE from host that has rebooted",
|
||||
"Connectivity Recovered ; ",
|
||||
"auto recovery disabled", "Graceful Recovery Failed",
|
||||
"MNFA ENTER", "MNFA EXIT", "MNFA POOL"]
|
||||
data = self.substring(mtc_substrings, mtc_files)
|
||||
|
||||
return sorted(data)
|
||||
|
||||
def daemon_failures(self):
|
||||
"""Daemon failures algorithm
|
||||
Presents all failed puppet manifest messages in the system
|
||||
"""
|
||||
data = []
|
||||
daemon_files = []
|
||||
|
||||
for host_type in self.hosts.keys():
|
||||
for _, folder in self.hosts[host_type].items():
|
||||
daemon_path = os.path.join(folder, "var", "log", "daemon.log")
|
||||
daemon_files.append(daemon_path)
|
||||
|
||||
daemon_substrings = ["Failed to run the puppet manifest"]
|
||||
data = self.substring(daemon_substrings, daemon_files)
|
||||
|
||||
return sorted(data)
|
||||
|
||||
def state_changes(self):
|
||||
"""State changes algorithm
|
||||
Presents all messages in the system regarding the state of hosts
|
||||
"""
|
||||
data = []
|
||||
sc_files = []
|
||||
|
||||
for _, folder in self.hosts["controllers"].items():
|
||||
sc_path = os.path.join(folder, "var", "log", "mtcAgent.log")
|
||||
sc_files.append(sc_path)
|
||||
|
||||
sc_substrings = ["is ENABLED", "allStateChange (.+)locked-disabled"]
|
||||
data = self.substring(sc_substrings, sc_files)
|
||||
|
||||
return sorted(data)
|
||||
|
||||
def audit(self, start, end, audit_log_path):
|
||||
"""Counts audit events in dcmanager within a specified date range
|
||||
|
||||
Parameters:
|
||||
start (string) : start date in YYYY-MM-DD HH:MM:SS format
|
||||
end (string) : end date in YYYY-MM-DD HH:MM:SS format
|
||||
audit_log_path (string) : absolute path of augit log file
|
||||
"""
|
||||
if not shutil.which("lnav"):
|
||||
raise ValueError("Lnav program not found")
|
||||
|
||||
SECONDS_PER_HOUR = 3600
|
||||
fmt = "%Y-%m-%d %H:%M:%S"
|
||||
|
||||
d1 = datetime.strptime(start, fmt)
|
||||
d2 = datetime.strptime(end, fmt)
|
||||
seconds = (d2 - d1).total_seconds()
|
||||
|
||||
log_texts = [
|
||||
"Triggered subcloud audit%",
|
||||
"Trigger patch audit%",
|
||||
"Trigger load audit%",
|
||||
"Triggered firmware audit%",
|
||||
"Triggered kubernetes audit%",
|
||||
# Counts sum of audits from all subclouds
|
||||
]
|
||||
INDEX_MIDDLE_WORD = 1
|
||||
data = [("These rates and totals represent the sum of audits from "
|
||||
+ "all subclouds")]
|
||||
|
||||
def command(text):
|
||||
|
||||
return (
|
||||
f'lnav -R -n -c ";SELECT count(log_body) AS '
|
||||
f'{text.split(" ")[INDEX_MIDDLE_WORD]}_total from '
|
||||
f'openstack_log WHERE '
|
||||
f'(log_time > \\"{start}\\" AND not log_time > \\"{end}\\")'
|
||||
f' AND log_body like \\"{text}\\"" "{audit_log_path}"'
|
||||
)
|
||||
|
||||
for text in log_texts:
|
||||
p = subprocess.Popen(command(text), shell=True,
|
||||
stdout=subprocess.PIPE)
|
||||
for line in p.stdout:
|
||||
line = line.decode("utf-8").strip()
|
||||
if line.isnumeric():
|
||||
data.append(
|
||||
f"rate "
|
||||
f"{round((int(line)/seconds * SECONDS_PER_HOUR), 3)} "
|
||||
f"per hour. total: {line}"
|
||||
)
|
||||
else:
|
||||
data.append(line)
|
||||
return data
|
||||
self.run_correlator(output_dir, plugin_output_dir)
|
||||
|
||||
# -----------------------------------
|
||||
|
||||
def run_correlator(self, output_directory, plugin_output_dir):
|
||||
def run_correlator(self, output_dir, plugin_output_dir):
|
||||
"""Runs the correlator and prints the results differently based on if
|
||||
the tool was run with or without the verbose option
|
||||
|
||||
Parameters:
|
||||
output_directory (string) : directory to place output files from
|
||||
output_dir (string) : directory to place output files from
|
||||
correlator
|
||||
plugin_output_dir (string) : directory with output files from
|
||||
plugins
|
||||
"""
|
||||
|
||||
# logger.info("Correlator Output Dir: %s", output_dir)
|
||||
# logger.info("Correlator Plugin Dir: %s", plugin_output_dir)
|
||||
|
||||
correlator = Correlator(plugin_output_dir)
|
||||
failures, events, alarms, state_changes = correlator.run(
|
||||
self.opts.hostname)
|
||||
@ -704,20 +360,23 @@ class ExecutionEngine:
|
||||
state_changes.append("\nTotal state changes found: "
|
||||
+ str(state_changes_len) + "\n")
|
||||
|
||||
logger.info("\nRunning correlator...")
|
||||
self._create_output_file("correlator_failures", output_directory,
|
||||
# TODO: Put at the end of the report
|
||||
logger.info("\nRunning correlator... view report at "
|
||||
+ output_dir)
|
||||
self._create_output_file("correlator_failures", output_dir,
|
||||
failures, "")
|
||||
self._create_output_file("correlator_events", output_directory,
|
||||
self._create_output_file("correlator_events", output_dir,
|
||||
events, "")
|
||||
self._create_output_file("correlator_alarms", output_directory,
|
||||
self._create_output_file("correlator_alarms", output_dir,
|
||||
alarms, "")
|
||||
self._create_output_file("correlator_state_changes", output_directory,
|
||||
self._create_output_file("correlator_state_changes", output_dir,
|
||||
state_changes, "")
|
||||
|
||||
if not self.opts.verbose:
|
||||
logger.info("Output can be found at "
|
||||
+ os.path.relpath(output_directory) + "\n")
|
||||
logger.info("Failures: " + str(failures_len))
|
||||
logger.info("Events : " + str(events_len))
|
||||
logger.info("Alarms : " + str(alarms_len))
|
||||
logger.info("State Changes: " + str(state_changes_len))
|
||||
logger.info("Failures : " + str(failures_len))
|
||||
for f in failures[:-1]:
|
||||
if "Uncontrolled swact" in f:
|
||||
logger.info(f[0:19] + " "
|
||||
@ -730,16 +389,7 @@ class ExecutionEngine:
|
||||
f[43:])[0].lower() + " failure")
|
||||
else:
|
||||
logger.info(f[:-1])
|
||||
if failures_len != 0:
|
||||
logger.info("\nEvents: " + str(events_len))
|
||||
else:
|
||||
logger.info("Events: " + str(events_len))
|
||||
logger.info("Alarms: " + str(alarms_len))
|
||||
logger.info("State Changes: " + str(state_changes_len))
|
||||
else:
|
||||
logger.info("\nFailures: " + str(failures_len))
|
||||
for f in failures[:-1]:
|
||||
logger.info(f[:-1])
|
||||
|
||||
# Dictionary to keep track of number of times events happens on
|
||||
# each host
|
||||
@ -750,20 +400,13 @@ class ExecutionEngine:
|
||||
events_summ[k] = 1
|
||||
else:
|
||||
events_summ[k] += 1
|
||||
|
||||
if failures_len != 0:
|
||||
logger.info("\nEvents: " + str(events_len))
|
||||
else:
|
||||
logger.info("Events: " + str(events_len))
|
||||
for k, v in sorted(events_summ.items()):
|
||||
logger.info(k + ": " + str(v) + " time(s)")
|
||||
|
||||
if events_len != 0:
|
||||
logger.info("\nAlarms: " + str(alarms_len))
|
||||
else:
|
||||
logger.info("Alarms: " + str(alarms_len))
|
||||
logger.info("The full list of alarms can be found at "
|
||||
+ os.path.relpath(output_directory)
|
||||
+ os.path.abspath(output_dir)
|
||||
+ "/correlator_alarms")
|
||||
|
||||
# Dictionary to keep track of number of times state changes
|
||||
@ -778,64 +421,13 @@ class ExecutionEngine:
|
||||
else:
|
||||
state_changes_summ[k] += 1
|
||||
|
||||
if alarms_len != 0:
|
||||
logger.info("\nState Changes: " + str(state_changes_len))
|
||||
else:
|
||||
logger.info("State Changes: " + str(state_changes_len))
|
||||
for k, v in sorted(state_changes_summ.items()):
|
||||
logger.info(k + ": " + str(v) + " time(s)")
|
||||
|
||||
def _continue(self, file, compressed=False):
|
||||
# don't analyze older files, continue with current file
|
||||
CONTINUE_CURRENT = 0
|
||||
# analyze older files, continue with current file
|
||||
CONTINUE_CURRENT_OLD = 1
|
||||
# don't analyze current file, continue to older files
|
||||
CONTINUE_OLD = 2
|
||||
|
||||
# check date of first log event and compare with provided
|
||||
# start, end dates
|
||||
first = ""
|
||||
|
||||
if not compressed:
|
||||
with open(file) as f:
|
||||
line = f.readline()
|
||||
first = line[0:19]
|
||||
else:
|
||||
with gzip.open(file, "rb") as f:
|
||||
line = f.readline().decode("utf-8")
|
||||
first = line[0:19]
|
||||
try:
|
||||
datetime.strptime(line[0:19], "%Y-%m-%dT%H:%M:%S")
|
||||
first = line[0:19]
|
||||
except ValueError:
|
||||
return CONTINUE_CURRENT_OLD
|
||||
|
||||
if first < self.opts.start:
|
||||
return CONTINUE_CURRENT
|
||||
elif first < self.opts.end and first > self.opts.start:
|
||||
return CONTINUE_CURRENT_OLD
|
||||
elif first > self.opts.end:
|
||||
return CONTINUE_OLD
|
||||
|
||||
def _evaluate_substring(self, data, command):
|
||||
p = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE)
|
||||
for line in p.stdout:
|
||||
line = line.decode("utf-8")
|
||||
# different date locations for log events
|
||||
dates = [line[0:19], line[2:21]]
|
||||
for date in dates:
|
||||
try:
|
||||
datetime.strptime(date, "%Y-%m-%dT%H:%M:%S")
|
||||
if date > self.opts.start and date < self.opts.end:
|
||||
if line[0] == "|": # sm-customer.log edge case
|
||||
line = line[1:].strip()
|
||||
line = re.sub("\\s+", " ", line)
|
||||
data.append(line)
|
||||
break
|
||||
except ValueError:
|
||||
if date == dates[-1]:
|
||||
data.append(line)
|
||||
logger.info("\nFailures : " + str(failures_len))
|
||||
for f in failures[:-1]:
|
||||
logger.info(f[:-1])
|
||||
|
||||
def _extract_subfunction(self, host_info_path):
|
||||
GROUP_ONE = 1
|
||||
@ -859,11 +451,9 @@ class ExecutionEngine:
|
||||
else:
|
||||
file.write(i + "\n")
|
||||
if self.opts.verbose:
|
||||
output = ("output at "
|
||||
+ os.path.relpath(os.path.join(directory, filename)))
|
||||
output = ("... output at "
|
||||
+ os.path.abspath(os.path.join(directory, filename)))
|
||||
if processing == "":
|
||||
logger.info(output)
|
||||
else:
|
||||
logger.info(processing + ", " + output)
|
||||
elif processing != "":
|
||||
logger.info(processing)
|
||||
|
@ -14,7 +14,6 @@
|
||||
#
|
||||
########################################################################
|
||||
|
||||
|
||||
from datetime import datetime
|
||||
import json
|
||||
import logging
|
||||
@ -157,19 +156,18 @@ class Plugin:
|
||||
|
||||
try:
|
||||
datetime.strptime(self.state["start"], "%Y-%m-%d %H:%M:%S")
|
||||
except:
|
||||
raise ValueError(
|
||||
f"plugin : {plugin_name} needs a start time in YYYY-MM-DD "
|
||||
f"HH:MM:SS format"
|
||||
)
|
||||
except ValueError as e:
|
||||
logger.error(
|
||||
"plugin : %s needs a valid start time in YYYY-MM-DD \
|
||||
HH:MM:SS format", plugin_name)
|
||||
|
||||
try:
|
||||
datetime.strptime(self.state["end"], "%Y-%m-%d %H:%M:%S")
|
||||
except:
|
||||
raise ValueError(
|
||||
f"plugin : {plugin_name} needs an end time in YYYY-MM-DD "
|
||||
f"HH:MM:SS format"
|
||||
)
|
||||
except ValueError as e:
|
||||
logger.error(
|
||||
"plugin : %s needs a valid end time in YYYY-MM-DD \
|
||||
HH:MM:SS format", plugin_name)
|
||||
|
||||
else:
|
||||
raise ValueError(
|
||||
f"plugin: {plugin_name} unknown algorithm "
|
||||
|
129
tools/collector/debian-scripts/report/plugin_algs/alarm.py
Normal file
129
tools/collector/debian-scripts/report/plugin_algs/alarm.py
Normal file
@ -0,0 +1,129 @@
|
||||
########################################################################
|
||||
#
|
||||
# Copyright (c) 2022 Wind River Systems, Inc.
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
########################################################################
|
||||
#
|
||||
# This file contains the functions for the alarm plugin algorithm.
|
||||
#
|
||||
# The alarm plugin algorithm gathers and presents a list of all alarms
|
||||
# and customer logs, except those specified.
|
||||
#
|
||||
########################################################################
|
||||
|
||||
import os
|
||||
import re
|
||||
|
||||
|
||||
def alarm(host_dir, start, end, alarm_exclude=None,
|
||||
entity_exclude=None):
|
||||
"""Alarm algorithm
|
||||
Presents all alarms and customer logs, except those specified
|
||||
|
||||
Parameters:
|
||||
host_dir (string): path to the host directory
|
||||
start (string): Start time for analysis
|
||||
end (string): End time for analysis
|
||||
|
||||
alarm_exclude (string list): List of alarms to ignore
|
||||
entity_exclude (string list): List of entity ids to ignore
|
||||
"""
|
||||
alarm_data = {}
|
||||
log_data = {}
|
||||
|
||||
fm_database = os.path.join(
|
||||
host_dir, "var", "extra", "database", "fm.db.sql.txt")
|
||||
if not os.path.exists(fm_database):
|
||||
return None, None
|
||||
|
||||
if alarm_exclude is None:
|
||||
alarm_exclude = []
|
||||
if entity_exclude is None:
|
||||
entity_exclude = []
|
||||
|
||||
with open(fm_database) as file:
|
||||
alarms_start = False
|
||||
for line in file:
|
||||
# start of event log
|
||||
if re.search(r"COPY (public\.)?event_log", line):
|
||||
alarms_start = True
|
||||
elif alarms_start and line == "\\.\n":
|
||||
break
|
||||
elif alarms_start:
|
||||
entry = re.split(r"\t", line)
|
||||
|
||||
INDEX_ALARM_ID = 5
|
||||
INDEX_ACTION = 6
|
||||
INDEX_ENTITY_ID = 8
|
||||
INDEX_ALARM_DATE = 9
|
||||
INDEX_SEVERITY = 10
|
||||
|
||||
alarm_id = entry[INDEX_ALARM_ID]
|
||||
entity_id = entry[INDEX_ENTITY_ID]
|
||||
action = entry[INDEX_ACTION]
|
||||
severity = entry[INDEX_SEVERITY]
|
||||
alarm_date = entry[INDEX_ALARM_DATE]
|
||||
|
||||
entry_date = alarm_date.replace(
|
||||
" ", "T"
|
||||
) # making time format of alarm the same
|
||||
if start <= entry_date and entry_date <= end:
|
||||
cont = True
|
||||
# Checks if the alarm is in the user specified list of
|
||||
# alarm or entity ids
|
||||
for id in alarm_exclude:
|
||||
if id in alarm_id:
|
||||
cont = False
|
||||
break
|
||||
|
||||
for entity in entity_exclude:
|
||||
if entity in entity_id:
|
||||
cont = False
|
||||
break
|
||||
|
||||
if not cont:
|
||||
continue
|
||||
|
||||
try:
|
||||
if action == "log":
|
||||
log_info = log_data[
|
||||
f"{alarm_id} {entity_id} {severity}"
|
||||
]
|
||||
log_info["count"] += 1
|
||||
log_info["dates"].append(alarm_date)
|
||||
else:
|
||||
alarm_info = alarm_data[
|
||||
f"{alarm_id} {entity_id} {severity}"
|
||||
]
|
||||
alarm_info["dates"].append(
|
||||
f"{alarm_date} {action}")
|
||||
except KeyError:
|
||||
if entry[6] != "log":
|
||||
alarm_data[
|
||||
f"{alarm_id} {entity_id} {severity}"
|
||||
] = {
|
||||
"dates": [f"{alarm_date} {action}"],
|
||||
}
|
||||
else:
|
||||
log_data[
|
||||
f"{alarm_id} {entity_id} {severity}"
|
||||
] = {
|
||||
"count": 1,
|
||||
"dates": [alarm_date],
|
||||
}
|
||||
|
||||
for _, v in alarm_data.items():
|
||||
v["dates"] = sorted(v["dates"])
|
||||
temp = []
|
||||
temp.append(v["dates"][0])
|
||||
for i in range(1, len(v["dates"])):
|
||||
if v["dates"][i].split()[2] != v["dates"][i-1].split()[2]:
|
||||
temp.append(v["dates"][i])
|
||||
v["dates"] = temp
|
||||
|
||||
for _, v in log_data.items():
|
||||
v["dates"] = sorted(v["dates"])
|
||||
|
||||
return alarm_data, log_data
|
76
tools/collector/debian-scripts/report/plugin_algs/audit.py
Normal file
76
tools/collector/debian-scripts/report/plugin_algs/audit.py
Normal file
@ -0,0 +1,76 @@
|
||||
########################################################################
|
||||
#
|
||||
# Copyright (c) 2022 Wind River Systems, Inc.
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
########################################################################
|
||||
#
|
||||
# This file contains the functions for the audit plugin algorithm.
|
||||
#
|
||||
# The audit plugin algorithm counts the audit events found in dcmanager
|
||||
# within a specific date range.
|
||||
#
|
||||
########################################################################
|
||||
|
||||
from datetime import datetime
|
||||
import shutil
|
||||
import subprocess
|
||||
|
||||
|
||||
def audit(start, end, audit_log_path):
|
||||
"""Counts audit events, like "Trigger load audit", in dcmanager within a
|
||||
specified date range
|
||||
|
||||
Parameters:
|
||||
start (string) : start date in YYYY-MM-DD HH:MM:SS format
|
||||
end (string) : end date in YYYY-MM-DD HH:MM:SS format
|
||||
audit_log_path (string) : absolute path of augit log file
|
||||
"""
|
||||
if not shutil.which("lnav"):
|
||||
raise ValueError("Lnav program not found")
|
||||
|
||||
SECONDS_PER_HOUR = 3600
|
||||
fmt = "%Y-%m-%d %H:%M:%S"
|
||||
|
||||
d1 = datetime.strptime(start, fmt)
|
||||
d2 = datetime.strptime(end, fmt)
|
||||
seconds = (d2 - d1).total_seconds()
|
||||
|
||||
log_texts = [
|
||||
"Triggered subcloud audit%",
|
||||
"Trigger patch audit%",
|
||||
"Trigger load audit%",
|
||||
"Triggered firmware audit%",
|
||||
"Triggered kubernetes audit%",
|
||||
# Counts sum of audits from all subclouds
|
||||
]
|
||||
INDEX_MIDDLE_WORD = 1
|
||||
data = [("These rates and totals represent the sum of audits from "
|
||||
+ "all subclouds")]
|
||||
|
||||
def command(text):
|
||||
|
||||
return (
|
||||
f'lnav -R -n -c ";SELECT count(log_body) AS '
|
||||
f'{text.split(" ")[INDEX_MIDDLE_WORD]}_total from '
|
||||
f'openstack_log WHERE '
|
||||
f'(log_time > \\"{start}\\" AND not log_time > \\"{end}\\")'
|
||||
f' AND log_body like \\"{text}\\"" "{audit_log_path}"'
|
||||
)
|
||||
|
||||
for text in log_texts:
|
||||
p = subprocess.Popen(command(text), shell=True,
|
||||
stdout=subprocess.PIPE)
|
||||
for line in p.stdout:
|
||||
line = line.decode("utf-8").strip()
|
||||
if line.isnumeric():
|
||||
data.append(
|
||||
f"rate "
|
||||
f"{round((int(line)/seconds * SECONDS_PER_HOUR), 3)} "
|
||||
f"per hour. total: {line}"
|
||||
)
|
||||
else:
|
||||
data.append(line)
|
||||
|
||||
return data
|
@ -0,0 +1,42 @@
|
||||
########################################################################
|
||||
#
|
||||
# Copyright (c) 2022 Wind River Systems, Inc.
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
########################################################################
|
||||
#
|
||||
# This file contains the functions for the daemon failures plugin
|
||||
# algorithm.
|
||||
#
|
||||
# The daemon failures plugin algorithm gathers all failed puppet
|
||||
# manifest messages in the system.
|
||||
#
|
||||
########################################################################
|
||||
|
||||
import os
|
||||
|
||||
from plugin_algs.substring import substring
|
||||
|
||||
|
||||
def daemon_failures(hosts, start, end):
|
||||
"""Daemon failures algorithm
|
||||
Presents all "Failed to run the puppet manifest" log messages in the system
|
||||
|
||||
Parameters:
|
||||
hosts (dictionary): Paths to folders for each host
|
||||
start (string): Start time for analysis
|
||||
end (string): End time for analysis
|
||||
"""
|
||||
data = []
|
||||
daemon_files = []
|
||||
|
||||
for host_type in hosts.keys():
|
||||
for _, folder in hosts[host_type].items():
|
||||
daemon_path = os.path.join(folder, "var", "log", "daemon.log")
|
||||
daemon_files.append(daemon_path)
|
||||
|
||||
daemon_substrings = ["Failed to run the puppet manifest"]
|
||||
data = substring(start, end, daemon_substrings, daemon_files)
|
||||
|
||||
return sorted(data)
|
@ -0,0 +1,41 @@
|
||||
########################################################################
|
||||
#
|
||||
# Copyright (c) 2022 Wind River Systems, Inc.
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
########################################################################
|
||||
#
|
||||
# This file contains the functions for the heartbeat loss plugin
|
||||
# algorithm.
|
||||
#
|
||||
# The heartbeat loss plugin algorithm gathers all heartbeat loss error
|
||||
# messages in the system.
|
||||
#
|
||||
########################################################################
|
||||
|
||||
import os
|
||||
|
||||
from plugin_algs.substring import substring
|
||||
|
||||
|
||||
def heartbeat_loss(hosts, start, end):
|
||||
"""Heartbeat loss algorithm
|
||||
Presents all "Heartbeat Loss" error messages in the system
|
||||
|
||||
Parameters:
|
||||
hosts (dictionary): Paths to folders for each host
|
||||
start (string): Start time for analysis
|
||||
end (string): End time for analysis
|
||||
"""
|
||||
data = []
|
||||
hb_files = []
|
||||
|
||||
for _, folder in hosts["controllers"].items():
|
||||
hb_path = os.path.join(folder, "var", "log", "hbsAgent.log")
|
||||
hb_files.append(hb_path)
|
||||
|
||||
hb_substrings = ["Heartbeat Loss"]
|
||||
data = substring(start, end, hb_substrings, hb_files)
|
||||
|
||||
return sorted(data)
|
@ -0,0 +1,56 @@
|
||||
########################################################################
|
||||
#
|
||||
# Copyright (c) 2022 Wind River Systems, Inc.
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
########################################################################
|
||||
#
|
||||
# This file contains the functions for the maintenance errors plugin
|
||||
# algorithm.
|
||||
#
|
||||
# The maintenance errors plugin algorithm gathers all maintenance errors
|
||||
# in the system, as well as other relevant, significant log messages.
|
||||
#
|
||||
########################################################################
|
||||
|
||||
import os
|
||||
|
||||
from plugin_algs.substring import substring
|
||||
|
||||
|
||||
def maintenance_errors(hosts, start, end):
|
||||
"""Maintenance errors algorithm
|
||||
Presents maintenance errors and other relevant log messages in system,
|
||||
such as "Configuration failure"
|
||||
|
||||
Parameters:
|
||||
hosts (dictionary): Paths to folders for each host
|
||||
start (string): Start time for analysis
|
||||
end (string): End time for analysis
|
||||
"""
|
||||
data = []
|
||||
mtc_files = []
|
||||
|
||||
for _, folder in hosts["controllers"].items():
|
||||
agent = os.path.join(folder, "var", "log", "mtcAgent.log")
|
||||
mtc_files.append(agent)
|
||||
|
||||
for host_type in hosts.keys():
|
||||
for _, folder in hosts[host_type].items():
|
||||
client = os.path.join(folder, "var", "log", "mtcClient.log")
|
||||
mtc_files.append(client)
|
||||
|
||||
mtc_substrings = ["Error : ",
|
||||
"Configuration failure",
|
||||
"In-Test Failure",
|
||||
"Loss Of Communication",
|
||||
"Graceful Recovery Wait ",
|
||||
"regained MTCALIVE from host that has rebooted",
|
||||
"Connectivity Recovered ; ",
|
||||
"auto recovery disabled",
|
||||
"Graceful Recovery Failed",
|
||||
"MNFA ENTER", "MNFA EXIT", "MNFA POOL"]
|
||||
data = substring(start, end, mtc_substrings, mtc_files)
|
||||
|
||||
return sorted(data)
|
@ -0,0 +1,40 @@
|
||||
########################################################################
|
||||
#
|
||||
# Copyright (c) 2022 Wind River Systems, Inc.
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
########################################################################
|
||||
#
|
||||
# This file contains the functions for the process failures plugin
|
||||
# algorithm.
|
||||
#
|
||||
# The process failures plugin algorithm searchs through pmond.log and
|
||||
# gathers all log errors
|
||||
#
|
||||
########################################################################
|
||||
|
||||
import os
|
||||
|
||||
from plugin_algs.substring import substring
|
||||
|
||||
|
||||
def process_failures(hosts, start, end):
|
||||
"""Process failures algorithm
|
||||
Presents all "Error : " log messages from pmond
|
||||
|
||||
Parameters:
|
||||
hosts (dictionary): Paths to folders for each host
|
||||
start (string): Start time for analysis
|
||||
end (string): End time for analysis
|
||||
"""
|
||||
data = []
|
||||
files = []
|
||||
for host_type in hosts.keys():
|
||||
for _, folder in hosts[host_type].items():
|
||||
pmond = os.path.join(folder, "var", "log", "pmond.log")
|
||||
files.append(pmond)
|
||||
|
||||
data = substring(start, end, ["Error :"], files)
|
||||
|
||||
return sorted(data)
|
@ -0,0 +1,39 @@
|
||||
########################################################################
|
||||
#
|
||||
# Copyright (c) 2022 Wind River Systems, Inc.
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
########################################################################
|
||||
#
|
||||
# This file contains the functions for the puppet errors plugin
|
||||
# algorithm.
|
||||
#
|
||||
# The puppet errors plugin algorithm searches through all the puppet
|
||||
# logs for any errors.
|
||||
#
|
||||
########################################################################
|
||||
|
||||
import os
|
||||
|
||||
from plugin_algs.substring import _evaluate_substring
|
||||
|
||||
|
||||
def puppet_errors(hosts, start, end):
|
||||
"""Puppet errors algorithm
|
||||
Presents all "Error: " log messages from puppet logs
|
||||
|
||||
Parameters:
|
||||
hosts (dictionary): Paths to folders for each host
|
||||
start (string): Start time for analysis
|
||||
end (string): End time for analysis
|
||||
"""
|
||||
data = []
|
||||
for host_type in hosts.keys():
|
||||
for _, folder in hosts[host_type].items():
|
||||
puppet_folder = os.path.join(folder, "var", "log", "puppet")
|
||||
command = (f"""grep -rh "[m ]Error: " {puppet_folder} """
|
||||
f"""2>/dev/null""")
|
||||
_evaluate_substring(start, end, data, command)
|
||||
|
||||
return sorted(data)
|
@ -0,0 +1,42 @@
|
||||
########################################################################
|
||||
#
|
||||
# Copyright (c) 2022 Wind River Systems, Inc.
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
########################################################################
|
||||
#
|
||||
# This file contains the functions for the state changes plugin
|
||||
# algorithm.
|
||||
#
|
||||
# The state changes plugin algorithm gathers all log messages in the
|
||||
# system pertaining to the state of hosts.
|
||||
#
|
||||
########################################################################
|
||||
|
||||
import os
|
||||
|
||||
from plugin_algs.substring import substring
|
||||
|
||||
|
||||
def state_changes(hosts, start, end):
|
||||
"""State changes algorithm
|
||||
Presents all messages in the system regarding the state of hosts, such
|
||||
as "is ENABLED"
|
||||
|
||||
Parameters:
|
||||
hosts (dictionary): Paths to folders for each host
|
||||
start (string): Start time for analysis
|
||||
end (string): End time for analysis
|
||||
"""
|
||||
data = []
|
||||
sc_files = []
|
||||
|
||||
for _, folder in hosts["controllers"].items():
|
||||
sc_path = os.path.join(folder, "var", "log", "mtcAgent.log")
|
||||
sc_files.append(sc_path)
|
||||
|
||||
sc_substrings = ["is ENABLED", "allStateChange (.+)locked-disabled"]
|
||||
data = substring(start, end, sc_substrings, sc_files)
|
||||
|
||||
return sorted(data)
|
173
tools/collector/debian-scripts/report/plugin_algs/substring.py
Normal file
173
tools/collector/debian-scripts/report/plugin_algs/substring.py
Normal file
@ -0,0 +1,173 @@
|
||||
########################################################################
|
||||
#
|
||||
# Copyright (c) 2022 Wind River Systems, Inc.
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
########################################################################
|
||||
#
|
||||
# This file contains the functions for the substring plugin algorithm.
|
||||
#
|
||||
# The substring plugin algorithm looks for a set of substrings within
|
||||
# a list of log files and extracts those log messages.
|
||||
#
|
||||
########################################################################
|
||||
|
||||
from datetime import datetime
|
||||
import gzip
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import subprocess
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def substring(start, end, substr, files):
|
||||
"""Substring algorithm
|
||||
Looks for all substrings in substr within files
|
||||
|
||||
Parameters:
|
||||
start (string): Start time for analysis
|
||||
end (string): End time for analysis
|
||||
substr (string list): List of substrings to look for
|
||||
files (string list): List of absolute filepaths to search in
|
||||
|
||||
Errors:
|
||||
FileNotFoundError
|
||||
"""
|
||||
# don't analyze older files, continue with current file
|
||||
CONTINUE_CURRENT = 0
|
||||
# analyze older files, continue with current file
|
||||
CONTINUE_CURRENT_OLD = 1
|
||||
|
||||
data = []
|
||||
for file in files:
|
||||
try:
|
||||
if not os.path.exists(file):
|
||||
if (re.search("controller-1_(.+)/var/log/mtcAgent.log",
|
||||
file)):
|
||||
continue
|
||||
raise FileNotFoundError(f"File not found: {file}")
|
||||
cont = True
|
||||
# Searching through file
|
||||
command = (f"""grep -Ea "{'|'.join(s for s in substr)}" """
|
||||
f"""{file} 2>/dev/null""")
|
||||
status = _continue(start, end, file)
|
||||
|
||||
if (status == CONTINUE_CURRENT
|
||||
or status == CONTINUE_CURRENT_OLD):
|
||||
# continue with current file
|
||||
if status == CONTINUE_CURRENT:
|
||||
cont = False
|
||||
_evaluate_substring(start, end, data, command)
|
||||
|
||||
# Searching through rotated log files that aren't compressed
|
||||
n = 1
|
||||
while os.path.exists(f"{file}.{n}") and cont:
|
||||
command = (f"""grep -Ea "{'|'.join(s for s in substr)}" """
|
||||
f"""{file}.{n} 2>/dev/null""")
|
||||
status = _continue(start, end, f"{file}.{n}")
|
||||
|
||||
if (status == CONTINUE_CURRENT
|
||||
or status == CONTINUE_CURRENT_OLD):
|
||||
if status == CONTINUE_CURRENT:
|
||||
cont = False
|
||||
_evaluate_substring(start, end, data, command)
|
||||
|
||||
n += 1
|
||||
|
||||
# Searching through rotated log files
|
||||
while os.path.exists(f"{file}.{n}.gz") and cont:
|
||||
command = (f"""zgrep -E "{'|'.join(s for s in substr)}" """
|
||||
f"""{file}.{n}.gz 2>/dev/null""")
|
||||
status = _continue(start, end, f"{file}.{n}.gz",
|
||||
compressed=True)
|
||||
|
||||
if (status == CONTINUE_CURRENT
|
||||
or status == CONTINUE_CURRENT_OLD):
|
||||
if status == CONTINUE_CURRENT:
|
||||
cont = False
|
||||
_evaluate_substring(start, end, data, command)
|
||||
|
||||
n += 1
|
||||
|
||||
except FileNotFoundError as e:
|
||||
logger.error(e)
|
||||
continue
|
||||
|
||||
return sorted(data)
|
||||
|
||||
|
||||
def _continue(start, end, file, compressed=False):
|
||||
"""Determines if substring algorithm should analyze the current file
|
||||
and older files based on if the time of the first log message in file
|
||||
is less than or greater than end and less than or greater than start
|
||||
|
||||
Parameters:
|
||||
start (string): Start time for analysis
|
||||
end (string): End time for analysis
|
||||
file (string): Current file
|
||||
compressed (boolean): Indicates if current file is compressed or not
|
||||
"""
|
||||
# don't analyze older files, continue with current file
|
||||
CONTINUE_CURRENT = 0
|
||||
# analyze older files, continue with current file
|
||||
CONTINUE_CURRENT_OLD = 1
|
||||
# don't analyze current file, continue to older files
|
||||
CONTINUE_OLD = 2
|
||||
|
||||
# check date of first log event and compare with provided
|
||||
# start, end dates
|
||||
first = ""
|
||||
|
||||
if not compressed:
|
||||
with open(file) as f:
|
||||
line = f.readline()
|
||||
first = line[0:19]
|
||||
else:
|
||||
with gzip.open(file, "rb") as f:
|
||||
line = f.readline().decode("utf-8")
|
||||
first = line[0:19]
|
||||
try:
|
||||
datetime.strptime(line[0:19], "%Y-%m-%dT%H:%M:%S")
|
||||
first = line[0:19]
|
||||
except ValueError:
|
||||
return CONTINUE_CURRENT_OLD
|
||||
|
||||
if first < start:
|
||||
return CONTINUE_CURRENT
|
||||
elif first < end and first > start:
|
||||
return CONTINUE_CURRENT_OLD
|
||||
elif first > end:
|
||||
return CONTINUE_OLD
|
||||
|
||||
|
||||
def _evaluate_substring(start, end, data, command):
|
||||
"""Adds log messages from output from running command to data if the
|
||||
timestamp of log message in greater than start and less than end
|
||||
|
||||
Parameters:
|
||||
start (string): Start time for analysis
|
||||
end (string): End time for analysis
|
||||
data (string list): Log messages extracted so far
|
||||
command (string): Shell command to run
|
||||
"""
|
||||
p = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE)
|
||||
for line in p.stdout:
|
||||
line = line.decode("utf-8")
|
||||
# different date locations for log events
|
||||
dates = [line[0:19], line[2:21]]
|
||||
for date in dates:
|
||||
try:
|
||||
datetime.strptime(date, "%Y-%m-%dT%H:%M:%S")
|
||||
if date > start and date < end:
|
||||
if line[0] == "|": # sm-customer.log edge case
|
||||
line = line[1:].strip()
|
||||
line = re.sub("\\s+", " ", line)
|
||||
data.append(line)
|
||||
break
|
||||
except ValueError:
|
||||
if date == dates[-1]:
|
||||
data.append(line)
|
@ -0,0 +1,70 @@
|
||||
########################################################################
|
||||
#
|
||||
# Copyright (c) 2022 Wind River Systems, Inc.
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
########################################################################
|
||||
#
|
||||
# This file contains the functions for the swact activity plugin
|
||||
# algorithm.
|
||||
#
|
||||
# The swact activity plugin algorithm gathers information about all
|
||||
# swacting activity in the system.
|
||||
#
|
||||
########################################################################
|
||||
|
||||
from datetime import datetime
|
||||
import os
|
||||
|
||||
from plugin_algs.substring import substring
|
||||
|
||||
|
||||
def swact_activity(hosts, start, end):
|
||||
"""Swact activity algorithm
|
||||
Presents all log messages about swacting activity in the system, such as
|
||||
"Uncontrolled swact"
|
||||
|
||||
Parameters:
|
||||
hosts (dictionary): Paths to folders for each host
|
||||
start (string): Start time for analysis
|
||||
end (string): End time for analysis
|
||||
"""
|
||||
data = []
|
||||
sm_files = []
|
||||
sm_customer_files = []
|
||||
swact_start = None
|
||||
swact_in_progress = False
|
||||
swact_end = None
|
||||
|
||||
for _, folder in hosts["controllers"].items():
|
||||
sm_path = os.path.join(folder, "var", "log", "sm.log")
|
||||
sm_files.append(sm_path)
|
||||
sm_customer_path = os.path.join(folder, "var", "log",
|
||||
"sm-customer.log")
|
||||
sm_customer_files.append(sm_customer_path)
|
||||
|
||||
sm_substrings = ["Uncontrolled swact", "Swact has started,",
|
||||
"Neighbor (.+) is now in the down",
|
||||
"Service (.+) has reached max failures",
|
||||
"Swact update"]
|
||||
data = substring(start, end, sm_substrings, sm_files)
|
||||
|
||||
for i, line in enumerate(data):
|
||||
if "Swact has started," in line and not swact_in_progress:
|
||||
swact_in_progress = True
|
||||
swact_start = datetime.strptime(line[0:19],
|
||||
"%Y-%m-%dT%H:%M:%S")
|
||||
elif "Swact update" in line and swact_in_progress:
|
||||
swact_in_progress = False
|
||||
swact_end = datetime.strptime(line[0:19], "%Y-%m-%dT%H:%M:%S")
|
||||
line += f" SWACT TOOK {swact_end - swact_start} \n"
|
||||
data[i] = line
|
||||
|
||||
sm_customer_substrings = [
|
||||
"swact", "active-failed\\s+\\| disabling-failed\\s+\\|"
|
||||
]
|
||||
data += substring(start, end, sm_customer_substrings,
|
||||
sm_customer_files)
|
||||
|
||||
return sorted(data)
|
@ -0,0 +1,63 @@
|
||||
########################################################################
|
||||
#
|
||||
# Copyright (c) 2022 Wind River Systems, Inc.
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
########################################################################
|
||||
#
|
||||
# This file contains the functions for the system info plugin algorithm.
|
||||
#
|
||||
# The system info plugin algorithm gathers top level system information,
|
||||
# such at the build type, sw version, and more.
|
||||
#
|
||||
########################################################################
|
||||
|
||||
import os
|
||||
import re
|
||||
|
||||
|
||||
def system_info(host_dir):
|
||||
"""System info algorithm
|
||||
Presents basic information about the system, such as the build type
|
||||
|
||||
Parameters:
|
||||
host_dir (string): path to the collect host dir
|
||||
"""
|
||||
data = []
|
||||
with open(
|
||||
os.path.join(host_dir, "etc", "platform", "platform.conf")
|
||||
) as file:
|
||||
for line in file:
|
||||
if "system_mode" in line:
|
||||
data.append(
|
||||
f"System Mode: "
|
||||
f"{re.match('^system_mode=(.*)', line).group(1)}"
|
||||
)
|
||||
elif "system_type" in line:
|
||||
data.append(
|
||||
f"System Type: "
|
||||
f"{re.match('^system_type=(.*)', line).group(1)}"
|
||||
)
|
||||
elif "distributed_cloud_role" in line:
|
||||
role = re.match('^distributed_cloud_role=(.*)',
|
||||
line).group(1)
|
||||
data.append(f"Distributed cloud role: {role}")
|
||||
elif "sw_version" in line:
|
||||
data.append(
|
||||
f"SW Version: "
|
||||
f"{re.match('^sw_version=(.*)', line).group(1)}"
|
||||
)
|
||||
with open(
|
||||
os.path.join(host_dir, "etc", "build.info")
|
||||
) as file:
|
||||
for line in file:
|
||||
if "BUILD_TYPE" in line:
|
||||
data.append(
|
||||
f"Build Type: "
|
||||
f"{re.match('^BUILD_TYPE=(.*)', line).group(1)}"
|
||||
)
|
||||
elif re.match("^OS=(.*)", line):
|
||||
data.append(f"OS: {re.match('^OS=(.*)', line).group(1)}")
|
||||
|
||||
return data
|
5
tools/collector/debian-scripts/report/plugins/substring_hosts
Executable file
5
tools/collector/debian-scripts/report/plugins/substring_hosts
Executable file
@ -0,0 +1,5 @@
|
||||
algorithm=substring
|
||||
files=var/log/mtcClient.log, var/log/pmond.log, var/log/lmond.log, var/log/hbsClient.log
|
||||
hosts=all
|
||||
substring=FAILED
|
||||
substring=Error:
|
@ -1,110 +1,201 @@
|
||||
#!/usr/bin/env python3
|
||||
########################################################################
|
||||
#
|
||||
# Copyright (c) 2022 Wind River Systems, Inc.
|
||||
# Copyright (c) 2022 - 2023 Wind River Systems, Inc.
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
########################################################################
|
||||
#
|
||||
# Description: The Report tool is used to gather relevant log events
|
||||
# and information about the system from a collect bundle.
|
||||
# Description: The Report tool is used to gather relevant log, events
|
||||
# and information about the system from a collect bundle
|
||||
# and present that data for quick / easy issue analysis.
|
||||
#
|
||||
# The report tool allows user created plugins which decides relevance
|
||||
# for log events. Plugins contain an algorithm label which instructs the
|
||||
# tool what information to search and how to search for it.
|
||||
# Overview:
|
||||
#
|
||||
# The report tool requires the collect bundle and host tarballs to be
|
||||
# untarred.
|
||||
# The report tool implements an 'Execution Engine' runs 'Algorithms'
|
||||
# against 'Plugins' to gather logs, events & system information that
|
||||
# the 'Correlator' analyzes to produce a summary of events, alarms,
|
||||
# state changes and failures found by the plugins.
|
||||
#
|
||||
# The report tool reads user plugins from the report directory in the
|
||||
# top level of the collect bundle, and outputs files containing files
|
||||
# containing relevant logs to this directory as well.
|
||||
# Report Tool: report.py
|
||||
#
|
||||
# Parses command line arguments, sets up logging, extracts the top
|
||||
# level collect bundle, initializes the execution engine, loads the
|
||||
# plugins and invokes the execution engine.
|
||||
#
|
||||
# Execution Engine: execution_engine.py
|
||||
#
|
||||
# Initialization extracts the bundle's host tarballs, finds the
|
||||
# active controller and host types from each tarball. When executed
|
||||
# runs the algorithms specified by each of the loaded plugins and
|
||||
# then calls the correlator.
|
||||
#
|
||||
# Correlator: correlator.py
|
||||
#
|
||||
# Analyzes the data and logs gathered by the plugins and produces
|
||||
# and displays a report_analysis that contains a summary of:
|
||||
#
|
||||
# - Alarms ... when and what alarms were found to have occurred
|
||||
# - Events ... noteworthy events; Graceful Recovery, MNFA
|
||||
# - Failures ... host or service management failures; swacts
|
||||
# - State ... summary of host state changes; enable -> disable
|
||||
#
|
||||
# Algorithms: algorithms.py
|
||||
#
|
||||
# The report tool supports a set of built-in algorithms used to
|
||||
# gather collect bundle events, logs and data.
|
||||
#
|
||||
# The following algorithms in 'plugin_algs' directory are supported:
|
||||
#
|
||||
# - audit.py ............. counts dcmanager audit events
|
||||
# - alarm.py ............. summarizes alarm state transitions and when
|
||||
# - heartbeat_loss.py ..... gathers maintenance heartbeat failures
|
||||
# - daemon_failures.py .... gathers various common daemon log errors
|
||||
# - maintenance_errors.py . gathers maintenance error logs
|
||||
# - puppet_errors.py ...... gathers puppet failures and logs
|
||||
# - state_changes.py ...... gathers a summary of host state changes
|
||||
# - swact_activity.py ..... identifies various swact occurrences
|
||||
# - process_failures.py ... gathers pmond process failure logs
|
||||
# - substring.py ....... gathers substring plugin specified info
|
||||
# - system_info.py ........ gathers system info ; type, mode, etc
|
||||
#
|
||||
# Plugins: plugins.py
|
||||
#
|
||||
# Plugins are small label based text files that specify an algorithm
|
||||
# and other applicable labels used to find specific data, logs or
|
||||
# events for that plugin.
|
||||
#
|
||||
# The following default internal plugins are automatically included
|
||||
# with the report tool stored in the 'plugins' directory.
|
||||
#
|
||||
# - alarm ............ specifies alarms to look for
|
||||
# - audit .............. find dcmanager audit events
|
||||
# - daemon_failures ....... runs the daemon failure algorithm
|
||||
# - heartbeat_loss ........ runs the mtce heartbeat loss algorithm
|
||||
# - maintenance_errors .... find specific maintenance logs
|
||||
# - process_failures ...... find pmon or sm process failures
|
||||
# - puppet_errors ......... find configuration failure puppet logs
|
||||
# - state_changes ......... find host state changes
|
||||
# - substring ............. find logs containing named substrings
|
||||
# - swact_activity ........ find swact failure and events
|
||||
# - system_info ........... gather system information
|
||||
#
|
||||
# The report tool will also run additional (optional) user defined
|
||||
# plugins developed and placed in the localhost's filesystem at
|
||||
# /etc/collect/plugins.
|
||||
#
|
||||
# Typical Usage:
|
||||
#
|
||||
# command line functionality
|
||||
# ------------------------------- ----------------------------------
|
||||
# > report.py - Run all plugins in directory
|
||||
# > report.py [plugin ...] - Run only specified plugins
|
||||
# > report.py <algorithm> [labels] - Run algorithm with labels
|
||||
# > report.py --help - help message
|
||||
# > report.py -d <collect bundle dir> - Run all plugins against bundle
|
||||
# > report.py -d <dir> [plugin ...] - Run only specified plugins
|
||||
# > report.py -d <dir> <algs> [labels]- Run algorithm with labels
|
||||
# > report.py <algorithm> --help - algorithm specific help
|
||||
#
|
||||
# See --help output for a complete list of full and abbreviated
|
||||
# command line options and examples of plugins.
|
||||
#
|
||||
# TODO: revise README
|
||||
# Refer to README file for more usage and output examples
|
||||
#######################################################################
|
||||
|
||||
import argparse
|
||||
from cmath import log
|
||||
from datetime import datetime
|
||||
from datetime import timedelta
|
||||
from datetime import timezone
|
||||
import logging
|
||||
import os
|
||||
import shutil
|
||||
import subprocess
|
||||
import sys
|
||||
import tarfile
|
||||
import time
|
||||
|
||||
from execution_engine import ExecutionEngine
|
||||
from plugin import Plugin
|
||||
|
||||
|
||||
now = datetime.now(timezone.utc)
|
||||
base_dir = os.path.dirname(os.path.realpath(__file__))
|
||||
parent_dir = os.path.dirname(base_dir)
|
||||
default_path = os.path.dirname(parent_dir)
|
||||
report_dir = os.path.dirname(os.path.realpath(__file__))
|
||||
analysis_folder_name = "report_analysis"
|
||||
bundle_name = None
|
||||
plugins = []
|
||||
|
||||
clean = True
|
||||
|
||||
# TODO: rework this description
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Log Event Reporter",
|
||||
epilog="Place plugins in 'plugins' directory found in 'report' directory "
|
||||
"at top level of collect bundle.\nOutput files will be placed in 'report' "
|
||||
"directory.\nThis tool will create a report.log and untar.log file along "
|
||||
"with other output files.",
|
||||
epilog="Analyzes data collected by the plugins and produces a "
|
||||
"report_analysis stored with the collect bundle. The report tool "
|
||||
"can be run either on or off system by specifying the bundle to "
|
||||
"analyze using the --directory or -d <directory> command option.",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"-v",
|
||||
"--verbose",
|
||||
"--debug",
|
||||
action="store_true",
|
||||
help="Verbose output",
|
||||
help="Enable debug logs",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"-s",
|
||||
"--start",
|
||||
default="20000101",
|
||||
help="Specify a start date in YYYYMMDD format for analysis "
|
||||
"(default:20000101)",
|
||||
"--clean", "-c",
|
||||
action="store_true",
|
||||
help="Cleanup (remove) existing report data",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"-e",
|
||||
"--end",
|
||||
"--directory", "-d",
|
||||
default="",
|
||||
required=False,
|
||||
help="Specify the full path to a directory containing a collect "
|
||||
"bundle to analyze. This is a required parameter",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--file", "-f",
|
||||
default="",
|
||||
required=False,
|
||||
help="Specify the path to and filename of the tar bundle to analyze",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--end", "-e",
|
||||
default=datetime.strftime(now + timedelta(days=1), "%Y%m%d"),
|
||||
help="Specify an end date in YYYYMMDD format for analysis "
|
||||
"(default: current date)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-p",
|
||||
"--plugin",
|
||||
default=None,
|
||||
nargs="*",
|
||||
help="Specify what plugins to run (default: runs every plugin in plugins "
|
||||
"folder)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-d",
|
||||
"--directory",
|
||||
default=default_path,
|
||||
help="Specify top level of collect bundle to analyze "
|
||||
"(default: two levels above tool directory)",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--hostname",
|
||||
default="all",
|
||||
help="Specify host for correlator to find significant events and state "
|
||||
"changes for (default: all hosts)",
|
||||
help="Specify host for correlator to find significant events and "
|
||||
"state changes for (default: all hosts)",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--plugin", "-p",
|
||||
default=None,
|
||||
nargs="*",
|
||||
help="Specify comma separated list of plugins to run "
|
||||
"(default: runs all found plugins)",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--start", "-s",
|
||||
default="20000101",
|
||||
help="Specify a start date in YYYYMMDD format for analysis "
|
||||
"(default:20000101)",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--verbose", "-v",
|
||||
action="store_true",
|
||||
help="Enable verbose output",
|
||||
)
|
||||
|
||||
subparsers = parser.add_subparsers(help="algorithms", dest="algorithm")
|
||||
|
||||
# substring algorithm arguments
|
||||
@ -267,27 +358,55 @@ args.start = datetime.strptime(args.start, "%Y%m%d").strftime(
|
||||
"%Y-%m-%dT%H:%M:%S")
|
||||
args.end = datetime.strptime(args.end, "%Y%m%d").strftime("%Y-%m-%dT%H:%M:%S")
|
||||
|
||||
if args.directory.endswith("/"):
|
||||
output_directory = os.path.join(
|
||||
default_path, "report", "output",
|
||||
os.path.basename(os.path.dirname(args.directory))
|
||||
)
|
||||
else:
|
||||
output_directory = os.path.join(
|
||||
default_path, "report", "output", os.path.basename(args.directory)
|
||||
)
|
||||
if args.file:
|
||||
if not os.path.exists(args.file):
|
||||
exit_msg = "Error: Specified file (" + args.file + ") does not exist."
|
||||
sys.exit(exit_msg)
|
||||
elif os.path.isdir(args.file):
|
||||
exit_msg = "Error: Specified file (" + args.file + ") is a directory."
|
||||
exit_msg += "\nPlease specify the full path to a tar file when using "
|
||||
exit_msg += "the --file option.\nOtherwise, use the --directory option"
|
||||
exit_msg += " instead."
|
||||
sys.exit(exit_msg)
|
||||
elif not tarfile.is_tarfile(args.file):
|
||||
exit_msg = "Error: Specified file (" + args.file + ") is not a tar "
|
||||
exit_msg += "file.\nPlease specify a tar file using the --file option."
|
||||
sys.exit(exit_msg)
|
||||
else:
|
||||
try:
|
||||
input_dir = os.path.splitext(args.file)[0]
|
||||
input_file = os.path.dirname(os.path.realpath(args.file))
|
||||
output_dir = os.path.join(input_dir, analysis_folder_name)
|
||||
# print("input_file : ", input_file)
|
||||
subprocess.run(["tar", "xfC", args.file, input_file], check=True)
|
||||
# print("extracted ", args.file)
|
||||
except subprocess.CalledProcessError as e:
|
||||
print(e)
|
||||
|
||||
# creating report log
|
||||
os.makedirs(output_directory, exist_ok=True)
|
||||
open(os.path.join(output_directory, "report.log"), "w").close()
|
||||
elif args.directory:
|
||||
# Get the bundle input and report output dirs
|
||||
output_dir = os.path.join(args.directory, analysis_folder_name)
|
||||
input_dir = os.path.join(args.directory)
|
||||
else:
|
||||
exit_msg = "Error: Please use either the --file or --directory option to "
|
||||
exit_msg += "specify a\ncollect bundle file or directory containing a "
|
||||
exit_msg += "collect bundle file to analyze."
|
||||
sys.exit(exit_msg)
|
||||
|
||||
# TODO: date current analysis if there rather than remove
|
||||
if args.clean and not clean:
|
||||
clean = True
|
||||
if clean is True and os.path.exists(output_dir):
|
||||
shutil.rmtree(output_dir)
|
||||
os.makedirs(output_dir, exist_ok=True)
|
||||
|
||||
# setting up logger
|
||||
formatter = logging.Formatter("%(message)s")
|
||||
logger = logging.getLogger()
|
||||
|
||||
logging.basicConfig(
|
||||
filename=os.path.join(output_directory, "report.log"),
|
||||
level=logging.INFO,
|
||||
filename=os.path.join(output_dir, "report.log"),
|
||||
level=logging.DEBUG,
|
||||
format="%(asctime)s %(levelname)s: %(message)s",
|
||||
datefmt="%Y-%m-%dT%H:%M:%S",
|
||||
)
|
||||
@ -295,48 +414,166 @@ logging.Formatter.converter = time.gmtime
|
||||
|
||||
ch = logging.StreamHandler()
|
||||
ch.setLevel(logging.INFO)
|
||||
if args.debug:
|
||||
ch.setLevel(logging.DEBUG)
|
||||
ch.setFormatter(formatter)
|
||||
|
||||
logger.addHandler(ch)
|
||||
|
||||
if not os.path.isdir(args.directory):
|
||||
sys.exit("Top level of collect bundle given to analyze is not a directory")
|
||||
else:
|
||||
for obj in (os.scandir(args.directory)):
|
||||
info = os.path.splitext(obj.name)
|
||||
# Command line parsing done. Logging setup
|
||||
|
||||
# TODO: ask user which file to report on if more than one tarball in
|
||||
# directory
|
||||
# Check if collect tarball is in given directory and extracts it if
|
||||
# not already done
|
||||
if (obj.is_file() and info[1] == ".tar"):
|
||||
try:
|
||||
result = subprocess.check_output(["tar", "tf", obj.path],
|
||||
encoding="UTF-8")
|
||||
result = result.split("\n", 1)
|
||||
if not os.path.isdir(os.path.join(args.directory,
|
||||
os.path.dirname(result[0]))):
|
||||
subprocess.run(["tar", "xfC", obj.path, args.directory],
|
||||
check=True)
|
||||
subprocess.run(["echo", "extracted", obj.name], check=True)
|
||||
args.directory = os.path.join(args.directory,
|
||||
os.path.dirname(result[0]))
|
||||
#####################################################################
|
||||
# Find and extract the bundle to analyze
|
||||
#####################################################################
|
||||
# Find and extract the bundle to analyze
|
||||
|
||||
# Creating report log
|
||||
open(os.path.join(output_dir, "report.log"), "w").close()
|
||||
|
||||
if args.debug:
|
||||
logger.debug("Arguments : %s", args)
|
||||
logger.debug("Report Dir: %s", report_dir)
|
||||
logger.debug("Input Dir: %s", input_dir)
|
||||
logger.debug("Output Dir: %s", output_dir)
|
||||
|
||||
if not os.path.isdir(input_dir):
|
||||
sys.exit("Error: Specified input directory is not a directory")
|
||||
|
||||
# Search 'input_dir' for bundles.
|
||||
bundle_tar_file_found = False
|
||||
bundle_name = None
|
||||
bundle_names = []
|
||||
bundles = []
|
||||
ignore_list = [analysis_folder_name]
|
||||
ignore_list += ["apps", "horizon", "lighttpd", "lost+found", "sysinv-tmpdir"]
|
||||
|
||||
with open(os.path.join(output_dir, "untar.log"), "a") as logfile:
|
||||
for obj in (os.scandir(input_dir)):
|
||||
# Don't display dirs from the ignore list.
|
||||
# This makes the bundle selection list cleaner when
|
||||
# report is run against /scratch
|
||||
ignored = False
|
||||
for ignore in ignore_list:
|
||||
if obj.name == ignore:
|
||||
ignored = True
|
||||
if ignored is True:
|
||||
continue
|
||||
|
||||
if obj.is_dir(follow_symlinks=False):
|
||||
date_time = obj.name[-15:]
|
||||
if args.debug:
|
||||
logger.debug("Found Dir : %s : %s", obj.name, date_time)
|
||||
else:
|
||||
if not tarfile.is_tarfile(obj.path):
|
||||
continue
|
||||
filename = os.path.splitext(obj.name)[0]
|
||||
date_time = filename[-15:]
|
||||
if args.debug:
|
||||
logger.debug("Found File: %s : %s", obj.name, date_time)
|
||||
|
||||
# TODO: Add more filtering above to avoid directories that are
|
||||
# clearly not collect data is not added to the list of
|
||||
# options.
|
||||
|
||||
# Add this bundle to the list. Avoid duplicates
|
||||
found = False
|
||||
name = obj.name
|
||||
if obj.name.endswith('.tar'):
|
||||
bundle_tar_file_found = True
|
||||
name = os.path.splitext(obj.name)[0]
|
||||
for bundle in bundles:
|
||||
if bundle == name:
|
||||
found = True
|
||||
break
|
||||
if found is False:
|
||||
bundles.append(name)
|
||||
bundle_names.append(name)
|
||||
else:
|
||||
logger.debug("Discarding duplicate %s", obj.name)
|
||||
|
||||
if args.debug:
|
||||
logger.debug("Bundle %d : %s", len(bundles), bundles)
|
||||
logger.debug("Bundle Sel: %s", bundle_names)
|
||||
|
||||
if bundles:
|
||||
if bundle_tar_file_found is False:
|
||||
# If a collect bundle .tar file is not found then treat this
|
||||
# case as though the input_dir is a hosts tarball directory
|
||||
# like would be seen when running report on the system during
|
||||
# the collect operation.
|
||||
bundle_name = input_dir
|
||||
|
||||
elif len(bundles) > 1:
|
||||
retry = True
|
||||
while retry is True:
|
||||
logger.info("0 - exit")
|
||||
index = 1
|
||||
# TODO: filter files/dirs with date.time ; 20221102.143258
|
||||
for bundle in bundle_names:
|
||||
if bundle.endswith(('.tar', '.tgz', '.gz')):
|
||||
logger.info("%d - %s", index, os.path.splitext(bundle)[0])
|
||||
else:
|
||||
logger.info("%d - %s", index, bundle)
|
||||
index += 1
|
||||
try:
|
||||
select = int(input('Please select the bundle to analyze: '))
|
||||
except ValueError:
|
||||
logger.info("Invalid input; integer between 1 "
|
||||
"and %d required", len(bundles))
|
||||
continue
|
||||
if not select:
|
||||
sys.exit()
|
||||
if select <= len(bundles):
|
||||
index = 0
|
||||
for bundle in bundle_names:
|
||||
if index == select-1:
|
||||
logger.info("%s selected", bundle)
|
||||
bundle_name = bundle
|
||||
break
|
||||
else:
|
||||
index += 1
|
||||
retry = False
|
||||
else:
|
||||
logger.info("Invalid selection (%s) index=%d",
|
||||
select, index)
|
||||
# single bundle found
|
||||
else:
|
||||
# logger.info("bundle_names: %s", bundle_names)
|
||||
bundle_name = bundle_names[0]
|
||||
|
||||
# handle the no bundles found case
|
||||
if bundle_name is None:
|
||||
sys.exit("No valid collect bundles found.")
|
||||
|
||||
# extract the bundle if not already extracted
|
||||
path_file = os.path.join(input_dir, bundle_name)
|
||||
if not os.path.isdir(path_file):
|
||||
try:
|
||||
logger.info("extracting %s", path_file)
|
||||
subprocess.run(["tar", "xfC", path_file+".tar", input_dir], check=True)
|
||||
except subprocess.CalledProcessError as e:
|
||||
logger.error(e)
|
||||
|
||||
elif args.debug:
|
||||
logger.debug("already extracted ...")
|
||||
|
||||
# create the output directory ; report_analysis
|
||||
output_dir = os.path.join(path_file, analysis_folder_name)
|
||||
if not os.path.exists(output_dir):
|
||||
os.makedirs(output_dir, exist_ok=True)
|
||||
|
||||
# initialize the execution engine
|
||||
try:
|
||||
engine = ExecutionEngine(args, output_directory)
|
||||
engine = ExecutionEngine(args, path_file, output_dir)
|
||||
except ValueError as e:
|
||||
logger.error(str(e))
|
||||
sys.exit("Confirm you are running the report tool on a collect bundle")
|
||||
|
||||
if args.algorithm:
|
||||
plugins.append(Plugin(opts=vars(args)))
|
||||
else:
|
||||
if args.plugin:
|
||||
elif args.plugin:
|
||||
for p in args.plugin:
|
||||
path = os.path.join(default_path, "report", "plugins", p)
|
||||
path = os.path.join(report_dir, "plugins", p)
|
||||
if os.path.exists(path):
|
||||
try:
|
||||
plugins.append(Plugin(path))
|
||||
@ -344,17 +581,23 @@ else:
|
||||
logger.error(str(e))
|
||||
|
||||
else:
|
||||
logger.warning(f"{p} plugin does not exist")
|
||||
else:
|
||||
path = os.path.join(default_path, "report", "plugins")
|
||||
if not os.path.exists(path):
|
||||
os.mkdir(path)
|
||||
logger.error("Plugins folder is empty")
|
||||
else:
|
||||
for file in os.listdir(path):
|
||||
try:
|
||||
plugins.append(Plugin(os.path.join(path, file)))
|
||||
except Exception as e:
|
||||
logger.error(str(e))
|
||||
logger.warning("%s plugin does not exist", p)
|
||||
else:
|
||||
# load builtin plugins
|
||||
builtin_plugins = os.path.join(report_dir, "plugins")
|
||||
if os.path.exists(builtin_plugins):
|
||||
for file in os.listdir(builtin_plugins):
|
||||
plugins.append(Plugin(os.path.join(builtin_plugins, file)))
|
||||
logger.debug("loading built-in plugin: %s", file)
|
||||
|
||||
engine.execute(plugins, output_directory)
|
||||
# add localhost plugins
|
||||
localhost_plugins = os.path.join("/etc/collect", "plugins")
|
||||
if os.path.exists(localhost_plugins):
|
||||
for file in os.listdir(localhost_plugins):
|
||||
plugins.append(Plugin(os.path.join(localhost_plugins, file)))
|
||||
logger.debug("loading localhost plugin: %s", file)
|
||||
|
||||
# analyze the collect bundle
|
||||
engine.execute(plugins, output_dir)
|
||||
|
||||
sys.exit()
|
||||
|
@ -13,10 +13,12 @@ override_dh_auto_install:
|
||||
|
||||
install -m 755 -d $(SYSCONFDIR)/collect.d
|
||||
install -m 755 -d $(SYSCONFDIR)/collect
|
||||
install -m 755 -d $(SYSCONFDIR)/collect/plugins # Report Tool
|
||||
install -m 755 -d $(SYSCONFDIR)/collect/plugins
|
||||
install -m 755 -d $(ROOT)/usr/local/sbin
|
||||
install -m 755 -d $(ROOT)/usr/local/bin
|
||||
install -m 755 -d $(ROOT)/usr/local/bin/report/tool # Report Tool
|
||||
install -m 755 -d $(ROOT)/usr/local/bin/report
|
||||
install -m 755 -d $(ROOT)/usr/local/bin/report/plugins
|
||||
install -m 755 -d $(ROOT)/usr/local/bin/report/plugin_algs
|
||||
install -m 755 -d $(SBINDIR)
|
||||
|
||||
install -m 755 -p collect $(ROOT)/usr/local/sbin/collect
|
||||
@ -29,23 +31,40 @@ override_dh_auto_install:
|
||||
install -m 755 -p mariadb-cli.sh $(ROOT)/usr/local/sbin/mariadb-cli
|
||||
|
||||
# Report Tool
|
||||
install -m 755 -p report/report.py $(ROOT)/usr/local/bin/report/tool/report.py
|
||||
install -m 755 -p report/execution_engine.py $(ROOT)/usr/local/bin/report/tool/execution_engine.py
|
||||
install -m 755 -p report/algorithms.py $(ROOT)/usr/local/bin/report/tool/algorithms.py
|
||||
install -m 755 -p report/plugin.py $(ROOT)/usr/local/bin/report/tool/plugin.py
|
||||
install -m 755 -p report/correlator.py $(ROOT)/usr/local/bin/report/tool/correlator.py
|
||||
install -m 755 -p report/README $(ROOT)/usr/local/bin/report/tool/README
|
||||
install -m 755 -p report/plugins/alarm $(SYSCONFDIR)/collect/plugins/alarm
|
||||
install -m 755 -p report/plugins/daemon_failures $(SYSCONFDIR)/collect/plugins/daemon_failures
|
||||
install -m 755 -p report/plugins/heartbeat_loss $(SYSCONFDIR)/collect/plugins/heartbeat_loss
|
||||
install -m 755 -p report/plugins/maintenance_errors $(SYSCONFDIR)/collect/plugins/maintenance_errors
|
||||
install -m 755 -p report/plugins/process_failures $(SYSCONFDIR)/collect/plugins/process_failures
|
||||
install -m 755 -p report/plugins/puppet_errors $(SYSCONFDIR)/collect/plugins/puppet_errors
|
||||
install -m 755 -p report/plugins/state_changes $(SYSCONFDIR)/collect/plugins/state_changes
|
||||
install -m 755 -p report/plugins/substring $(SYSCONFDIR)/collect/plugins/substring
|
||||
install -m 755 -p report/plugins/swact_activity $(SYSCONFDIR)/collect/plugins/swact_activity
|
||||
install -m 755 -p report/plugins/system_info $(SYSCONFDIR)/collect/plugins/system_info
|
||||
install -m 755 -p report/report.py $(ROOT)/usr/local/bin/report/report.py
|
||||
install -m 755 -p report/execution_engine.py $(ROOT)/usr/local/bin/report/execution_engine.py
|
||||
install -m 755 -p report/algorithms.py $(ROOT)/usr/local/bin/report/algorithms.py
|
||||
install -m 755 -p report/plugin.py $(ROOT)/usr/local/bin/report/plugin.py
|
||||
install -m 755 -p report/correlator.py $(ROOT)/usr/local/bin/report/correlator.py
|
||||
install -m 755 -p report/README $(ROOT)/usr/local/bin/report/README
|
||||
|
||||
# Report Tool Plugin Algorithms
|
||||
install -m 755 -p report/plugin_algs/alarm.py $(ROOT)/usr/local/bin/report/plugin_algs/alarm.py
|
||||
install -m 755 -p report/plugin_algs/audit.py $(ROOT)/usr/local/bin/report/plugin_algs/audit.py
|
||||
install -m 755 -p report/plugin_algs/daemon_failures.py $(ROOT)/usr/local/bin/report/plugin_algs/daemon_failures.py
|
||||
install -m 755 -p report/plugin_algs/heartbeat_loss.py $(ROOT)/usr/local/bin/report/plugin_algs/heartbeat_loss.py
|
||||
install -m 755 -p report/plugin_algs/maintenance_errors.py $(ROOT)/usr/local/bin/report/plugin_algs/maintenance_errors.py
|
||||
install -m 755 -p report/plugin_algs/process_failures.py $(ROOT)/usr/local/bin/report/plugin_algs/process_failures.py
|
||||
install -m 755 -p report/plugin_algs/puppet_errors.py $(ROOT)/usr/local/bin/report/plugin_algs/puppet_errors.py
|
||||
install -m 755 -p report/plugin_algs/state_changes.py $(ROOT)/usr/local/bin/report/plugin_algs/state_changes.py
|
||||
install -m 755 -p report/plugin_algs/substring.py $(ROOT)/usr/local/bin/report/plugin_algs/substring.py
|
||||
install -m 755 -p report/plugin_algs/swact_activity.py $(ROOT)/usr/local/bin/report/plugin_algs/swact_activity.py
|
||||
install -m 755 -p report/plugin_algs/system_info.py $(ROOT)/usr/local/bin/report/plugin_algs/system_info.py
|
||||
|
||||
# Report Tool Plugins
|
||||
install -m 755 -p report/plugins/alarm $(ROOT)/usr/local/bin/report/plugins/alarm
|
||||
install -m 755 -p report/plugins/daemon_failures $(ROOT)/usr/local/bin/report/plugins/daemon_failures
|
||||
install -m 755 -p report/plugins/heartbeat_loss $(ROOT)/usr/local/bin/report/plugins/heartbeat_loss
|
||||
install -m 755 -p report/plugins/maintenance_errors $(ROOT)/usr/local/bin/report/plugins/maintenance_errors
|
||||
install -m 755 -p report/plugins/process_failures $(ROOT)/usr/local/bin/report/plugins/process_failures
|
||||
install -m 755 -p report/plugins/puppet_errors $(ROOT)/usr/local/bin/report/plugins/puppet_errors
|
||||
install -m 755 -p report/plugins/state_changes $(ROOT)/usr/local/bin/report/plugins/state_changes
|
||||
install -m 755 -p report/plugins/substring $(ROOT)/usr/local/bin/report/plugins/substring
|
||||
install -m 755 -p report/plugins/swact_activity $(ROOT)/usr/local/bin/report/plugins/swact_activity
|
||||
install -m 755 -p report/plugins/system_info $(ROOT)/usr/local/bin/report/plugins/system_info
|
||||
install -m 755 -p report/plugins/substring_hosts $(SYSCONFDIR)/collect/plugins/substring_hosts
|
||||
|
||||
# Collect Plugins
|
||||
install -m 755 -p collect_sysinv.sh $(SYSCONFDIR)/collect.d/collect_sysinv
|
||||
install -m 755 -p collect_psqldb.sh $(SYSCONFDIR)/collect.d/collect_psqldb
|
||||
install -m 755 -p collect_mariadb.sh $(SYSCONFDIR)/collect.d/collect_mariadb
|
||||
|
Loading…
x
Reference in New Issue
Block a user