From 2d449bfade34538abb2e3b7699e282fb7114d980 Mon Sep 17 00:00:00 2001 From: Rodion Promyshlennikov Date: Fri, 30 Sep 2016 22:23:59 +0300 Subject: [PATCH] Add test that checks http and logs alarms Added test checks "nova-logs-error" and "nova-api-http-errors" alarms. Added test checks "neutron-logs-error" and "neutron-api-http-errors" alarms. Added test checks "glance-logs-error" and "glance-api-http-errors" alarms. Added test checks "heat-logs-error" and "heat-api-http-errors" alarms. Added test checks "cinder-logs-error" and "cinder-api-http-errors" alarms. Small typo fix in docstrings of used types. Change-Id: I4aa5a5cdd9e69766aa4cdf7b39275ccd45dd86e8 --- stacklight_tests/helpers/helpers.py | 33 +++- stacklight_tests/helpers/remote_ops.py | 4 +- stacklight_tests/toolchain/test_alarms.py | 200 ++++++++++++++++++++++ 3 files changed, 233 insertions(+), 4 deletions(-) diff --git a/stacklight_tests/helpers/helpers.py b/stacklight_tests/helpers/helpers.py index f9f46ae..334441c 100644 --- a/stacklight_tests/helpers/helpers.py +++ b/stacklight_tests/helpers/helpers.py @@ -12,6 +12,7 @@ # License for the specific language governing permissions and limitations # under the License. +import contextlib import os import re import signal @@ -119,7 +120,7 @@ class PluginHelper(object): :param plugin: name of the plugin. :type plugin: str :param parameter: name of the parameter. - :type name: str + :type parameter: str :returns: parameter's value """ asserts.assert_true( @@ -202,7 +203,7 @@ class PluginHelper(object): """Get the virtual IP address. :param vip_name: name of the VIP. - :type name: str + :type vip_name: str :returns: the VIP address in dotted-decimal notation :rtype: str """ @@ -713,6 +714,34 @@ class PluginHelper(object): else: return result + @contextlib.contextmanager + def make_logical_db_unavailable(self, db_name, controller): + """Context manager that renames all tables in provided database + to make it unavailable and renames it back on exit. + + :param db_name: logical database name + :type db_name: str + :param controller: controller with MySQL database + :type controller: nailgun node + :returns: None, works as context manager + """ + cmd = ( + "mysql -AN -e " + "\"select concat(" + "'rename table {db_name}.', table_name, ' " + "to {db_name}.' , {method}(table_name) , ';') " + "from information_schema.tables " + "where table_schema = '{db_name}';" + "\" | mysql") + + with self.fuel_web.get_ssh_for_nailgun_node(controller) as remote: + remote.check_call(cmd.format(db_name=db_name, method="upper")) + + yield + + with self.fuel_web.get_ssh_for_nailgun_node(controller) as remote: + remote.check_call(cmd.format(db_name=db_name, method="lower")) + def _raise_TimeOut(sig, stack): raise TimeoutException() diff --git a/stacklight_tests/helpers/remote_ops.py b/stacklight_tests/helpers/remote_ops.py index 90edadd..17c92e1 100644 --- a/stacklight_tests/helpers/remote_ops.py +++ b/stacklight_tests/helpers/remote_ops.py @@ -105,9 +105,9 @@ def clear_resource(remote, resource, wait=None): :param remote: SSH connection to the node. :type remote: SSHClient :param resource: resource name. - :type name: str + :type resource: str :param wait: number of seconds to wait for the operation to complete. - :type operation: int + :type wait: int """ cmd = "pcs resource clear {}".format(resource) if wait is not None: diff --git a/stacklight_tests/toolchain/test_alarms.py b/stacklight_tests/toolchain/test_alarms.py index 45161ca..b664c60 100644 --- a/stacklight_tests/toolchain/test_alarms.py +++ b/stacklight_tests/toolchain/test_alarms.py @@ -94,6 +94,25 @@ class TestToolchainAlarms(api.ToolchainApi): self.check_alarms("service", "rabbitmq", "memory", controller["hostname"], OKAY_STATUS) + def _verify_service_alarms(self, trigger_fn, trigger_count, + metrics, status): + """Check services' alarm metrics. + + :param trigger_fn: function that affects an alarm of needed service + :type trigger_fn: callable + :param trigger_count: how many times call trigger function + :type trigger_count: int + :param metrics: mapping with needed metrics of alarms to check + :type metrics: dict + :param status: value of metric to check + :type status: int (in most cases) + :return: None + """ + for _ in range(trigger_count): + trigger_fn() + for service, source in metrics.items(): + self.check_alarms("service", service, source, None, status) + @test(depends_on_groups=["deploy_toolchain"], groups=["check_mysql_fs_alarms", "toolchain", "alarms"]) @log_snapshot_after_test @@ -301,3 +320,184 @@ class TestToolchainAlarms(api.ToolchainApi): self.helpers.cluster_id, ["compute"])[0] self._check_filesystem_alarms(compute, "/var/lib/nova", "nova-fs", "/var/lib/nova/bigfile", "compute") + + @test(depends_on_groups=["deploy_toolchain"], + groups=["check_nova_api_logs_errors_alarms", + "http_logs_errors_alarms", "toolchain", "alarms"]) + @log_snapshot_after_test + def check_nova_api_logs_errors_alarms(self): + """Check that nova-logs-error and nova-api-http-errors alarms work as + expected. + + Scenario: + 1. Rename all nova tables to UPPERCASE. + 2. Run some nova list command repeatedly. + 3. Check the last value of the nova-logs-error alarm in InfluxDB. + 4. Check the last value of the nova-api-http-errors alarm + in InfluxDB. + 5. Revert all nova tables names to lowercase. + + Duration 10m + """ + def get_servers_list(): + try: + self.helpers.os_conn.get_servers() + except Exception: + pass + self.env.revert_snapshot("deploy_toolchain") + + controller = self.fuel_web.get_nailgun_cluster_nodes_by_roles( + self.helpers.cluster_id, ["controller"])[0] + + with self.helpers.make_logical_db_unavailable("nova", controller): + metrics = {"nova-logs": "error", + "nova-api": "http_errors"} + self._verify_service_alarms( + get_servers_list, 100, metrics, WARNING_STATUS) + + @test(depends_on_groups=["deploy_toolchain"], + groups=["check_neutron_api_logs_errors_alarms", + "http_logs_errors_alarms", "toolchain", "alarms"]) + @log_snapshot_after_test + def check_neutron_api_logs_errors_alarms(self): + """Check that neutron-logs-error and neutron-api-http-errors + alarms work as expected. + + Scenario: + 1. Rename all neutron tables to UPPERCASE. + 2. Run some neutron agents list command repeatedly. + 3. Check the last value of the neutron-logs-error alarm + in InfluxDB. + 4. Check the last value of the neutron-api-http-errors alarm + in InfluxDB. + 5. Revert all neutron tables names to lowercase. + + Duration 10m + """ + def get_agents_list(): + try: + self.helpers.os_conn.list_agents() + except Exception: + pass + + self.env.revert_snapshot("deploy_toolchain") + + controller = self.fuel_web.get_nailgun_cluster_nodes_by_roles( + self.helpers.cluster_id, ["controller"])[0] + + with self.helpers.make_logical_db_unavailable("neutron", controller): + metrics = {"neutron-logs": "error", + "neutron-api": "http_errors"} + self._verify_service_alarms( + get_agents_list, 100, metrics, WARNING_STATUS) + + @test(depends_on_groups=["deploy_toolchain"], + groups=["check_glance_api_logs_errors_alarms", + "http_logs_errors_alarms", "toolchain", "alarms"]) + @log_snapshot_after_test + def check_glance_api_logs_errors_alarms(self): + """Check that glance-logs-error and glance-api-http-errors alarms work as + expected. + + Scenario: + 1. Rename all glance tables to UPPERCASE. + 2. Run some glance image list command repeatedly. + 3. Check the last value of the glance-logs-error alarm in InfluxDB. + 4. Check the last value of the glance-api-http-errors alarm + in InfluxDB. + 5. Revert all glance tables names to lowercase. + + Duration 10m + """ + def get_images_list(): + try: + # NOTE(rpromyshlennikov): List is needed here + # because glance image list is lazy method + return list(self.helpers.os_conn.get_image_list()) + except Exception: + pass + + self.env.revert_snapshot("deploy_toolchain") + + controller = self.fuel_web.get_nailgun_cluster_nodes_by_roles( + self.helpers.cluster_id, ["controller"])[0] + + with self.helpers.make_logical_db_unavailable("glance", controller): + metrics = {"glance-logs": "error", + "glance-api": "http_errors"} + self._verify_service_alarms( + get_images_list, 100, metrics, WARNING_STATUS) + + @test(depends_on_groups=["deploy_toolchain"], + groups=["check_heat_api_logs_errors_alarms", + "http_logs_errors_alarms", "toolchain", "alarms"]) + @log_snapshot_after_test + def check_heat_api_logs_errors_alarms(self): + """Check that heat-logs-error and heat-api-http-errors alarms work as + expected. + + Scenario: + 1. Rename all heat tables to UPPERCASE. + 2. Run some heat stack list command repeatedly. + 3. Check the last value of the heat-logs-error alarm in InfluxDB. + 4. Check the last value of the heat-api-http-errors alarm + in InfluxDB. + 5. Revert all heat tables names to lowercase. + + Duration 10m + """ + def get_stacks_list(): + try: + with self.fuel_web.get_ssh_for_nailgun_node( + controller) as remote: + return remote.execute( + ". openrc && heat stack-list > /dev/null 2>&1") + except Exception: + pass + + self.env.revert_snapshot("deploy_toolchain") + + controller = self.fuel_web.get_nailgun_cluster_nodes_by_roles( + self.helpers.cluster_id, ["controller"])[0] + + with self.helpers.make_logical_db_unavailable("heat", controller): + metrics = {"heat-logs": "error", + "heat-api": "http_errors"} + self._verify_service_alarms( + get_stacks_list, 100, metrics, WARNING_STATUS) + + @test(depends_on_groups=["deploy_toolchain"], + groups=["check_cinder_api_logs_errors_alarms", + "http_logs_errors_alarms", "toolchain", "alarms"]) + @log_snapshot_after_test + def check_cinder_api_logs_errors_alarms(self): + """Check that cinder-logs-error and cinder-api-http-errors alarms work as + expected. + + Scenario: + 1. Rename all cinder tables to UPPERCASE. + 2. Run some cinder list command repeatedly. + 3. Check the last value of the cinder-logs-error alarm in InfluxDB. + 4. Check the last value of the cinder-api-http-errors alarm + in InfluxDB. + 5. Revert all cinder tables names to lowercase. + + Duration 10m + """ + + def get_volumes_list(): + try: + self.helpers.os_conn.cinder.volumes.list() + except Exception: + pass + + self.env.revert_snapshot("deploy_toolchain") + + controller = self.fuel_web.get_nailgun_cluster_nodes_by_roles( + self.helpers.cluster_id, ["controller"])[0] + + with self.helpers.make_logical_db_unavailable("cinder", controller): + metrics = {"cinder-logs": "error", + "cinder-api": "http_errors"} + self._verify_service_alarms( + get_volumes_list, 100, metrics, WARNING_STATUS)