diff --git a/cinder/db/migrations/versions/daa98075b90d_add_resource_indexes.py b/cinder/db/migrations/versions/daa98075b90d_add_resource_indexes.py new file mode 100644 index 00000000000..b1135c6a722 --- /dev/null +++ b/cinder/db/migrations/versions/daa98075b90d_add_resource_indexes.py @@ -0,0 +1,60 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +"""Add resource indexes + +Revision ID: daa98075b90d +Revises: 9c74c1c6971f +Create Date: 2021-11-26 10:26:41.883072 +""" + +from alembic import op +from oslo_db.sqlalchemy import enginefacade +from oslo_db.sqlalchemy import utils +from oslo_log import log as logging + + +LOG = logging.getLogger(__name__) + + +# revision identifiers, used by Alembic. +revision = 'daa98075b90d' +down_revision = 'c92a3e68beed' +branch_labels = None +depends_on = None + +INDEXES = ( + ('groups', 'groups_deleted_project_id_idx', ('deleted', 'project_id')), + + ('group_snapshots', 'group_snapshots_deleted_project_id_idx', + ('deleted', 'project_id')), + + ('volumes', 'volumes_deleted_project_id_idx', ('deleted', 'project_id')), + ('volumes', 'volumes_deleted_host_idx', ('deleted', 'host')), + + ('backups', 'backups_deleted_project_id_idx', ('deleted', 'project_id')), + + ('snapshots', 'snapshots_deleted_project_id_idx', ('deleted', + 'project_id')), +) + + +def upgrade(): + engine = enginefacade.reader.get_engine() + is_mysql = engine.dialect.name == 'mysql' + + for table, idx_name, fields in INDEXES: + # Skip creation in mysql if it already has the index + if is_mysql and utils.index_exists(engine, table, idx_name): + LOG.info('Skipping index %s, already exists', idx_name) + else: + op.create_index(idx_name, table, fields) diff --git a/cinder/db/sqlalchemy/models.py b/cinder/db/sqlalchemy/models.py index b349ede242a..5df343609c2 100644 --- a/cinder/db/sqlalchemy/models.py +++ b/cinder/db/sqlalchemy/models.py @@ -218,6 +218,11 @@ class Group(BASE, CinderBase): """Represents a generic volume group.""" __tablename__ = 'groups' + __table_args__ = ( + # Speed up normal listings + sa.Index('groups_deleted_project_id_idx', 'deleted', 'project_id'), + CinderBase.__table_args__, + ) id = sa.Column(sa.String(36), primary_key=True) @@ -272,6 +277,12 @@ class GroupSnapshot(BASE, CinderBase): """Represents a group snapshot.""" __tablename__ = 'group_snapshots' + __table_args__ = ( + # Speed up normal listings + sa.Index('group_snapshots_deleted_project_id_idx', + 'deleted', 'project_id'), + CinderBase.__table_args__, + ) id = sa.Column(sa.String(36), primary_key=True) @@ -304,6 +315,11 @@ class Volume(BASE, CinderBase): __tablename__ = 'volumes' __table_args__ = ( sa.Index('volumes_service_uuid_idx', 'deleted', 'service_uuid'), + # Speed up normal listings + sa.Index('volumes_deleted_project_id_idx', 'deleted', 'project_id'), + # Speed up service start, create volume from image when using direct + # urls, host REST API, and the cinder-manage update host cmd + sa.Index('volumes_deleted_host_idx', 'deleted', 'host'), CinderBase.__table_args__, ) @@ -894,6 +910,11 @@ class Snapshot(BASE, CinderBase): """Represents a snapshot of volume.""" __tablename__ = 'snapshots' + __table_args__ = ( + # Speed up normal listings + sa.Index('snapshots_deleted_project_id_idx', 'deleted', 'project_id'), + CinderBase.__table_args__, + ) id = sa.Column(sa.String(36), primary_key=True) # TODO: (Y release) Change nullable to False @@ -996,6 +1017,11 @@ class Backup(BASE, CinderBase): """Represents a backup of a volume to Swift.""" __tablename__ = 'backups' + __table_args__ = ( + # Speed up normal listings + sa.Index('backups_deleted_project_id_idx', 'deleted', 'project_id'), + CinderBase.__table_args__, + ) id = sa.Column(sa.String(36), primary_key=True) # Backups don't have use_quota field since we don't have temporary backups diff --git a/cinder/tests/unit/db/test_migrations.py b/cinder/tests/unit/db/test_migrations.py index 612b9caef0c..3c8ed6dbdb7 100644 --- a/cinder/tests/unit/db/test_migrations.py +++ b/cinder/tests/unit/db/test_migrations.py @@ -216,6 +216,17 @@ class MigrationsWalk( # But it's nullable self.assertTrue(table.c.shared_targets.nullable) + def _check_daa98075b90d(self, connection): + """Test resources have indexes.""" + for table in ('groups', 'group_snapshots', 'volumes', 'snapshots', + 'backups'): + db_utils.index_exists(connection, + table, + f'{table}_deleted_project_id_idx') + + db_utils.index_exists(connection, + 'volumes', 'volumes_deleted_host_idx') + class TestMigrationsWalkSQLite( MigrationsWalk, diff --git a/cinder/tests/unit/volume/test_volume_usage_audit.py b/cinder/tests/unit/volume/test_volume_usage_audit.py index bb9270d7000..c2bdb75dd8b 100644 --- a/cinder/tests/unit/volume/test_volume_usage_audit.py +++ b/cinder/tests/unit/volume/test_volume_usage_audit.py @@ -187,9 +187,8 @@ class GetActiveByWindowTestCase(base.BaseVolumeTestCase): datetime.datetime(1, 4, 1, 1, 1, 1), project_id=fake.PROJECT_ID) self.assertEqual(3, len(volumes)) - self.assertEqual(fake.VOLUME2_ID, volumes[0].id) - self.assertEqual(fake.VOLUME3_ID, volumes[1].id) - self.assertEqual(fake.VOLUME4_ID, volumes[2].id) + self.assertEqual({fake.VOLUME2_ID, fake.VOLUME3_ID, fake.VOLUME4_ID}, + {v.id for v in volumes}) def test_snapshot_get_all_active_by_window(self): # Find all all snapshots valid within a timeframe window. @@ -229,12 +228,11 @@ class GetActiveByWindowTestCase(base.BaseVolumeTestCase): datetime.datetime(1, 3, 1, 1, 1, 1), datetime.datetime(1, 4, 1, 1, 1, 1)).objects self.assertEqual(3, len(snapshots)) - self.assertEqual(snap2.id, snapshots[0].id) - self.assertEqual(fake.VOLUME_ID, snapshots[0].volume_id) - self.assertEqual(snap3.id, snapshots[1].id) - self.assertEqual(fake.VOLUME_ID, snapshots[1].volume_id) - self.assertEqual(snap4.id, snapshots[2].id) - self.assertEqual(fake.VOLUME_ID, snapshots[2].volume_id) + + self.assertEqual({snap2.id, snap3.id, snap4.id}, + {s.id for s in snapshots}) + self.assertEqual({fake.VOLUME_ID}, + {s.volume_id for s in snapshots}) def test_backup_get_all_active_by_window(self): # Find all backups valid within a timeframe window. @@ -266,6 +264,5 @@ class GetActiveByWindowTestCase(base.BaseVolumeTestCase): project_id=fake.PROJECT_ID ) self.assertEqual(3, len(backups)) - self.assertEqual(fake.BACKUP2_ID, backups[0].id) - self.assertEqual(fake.BACKUP3_ID, backups[1].id) - self.assertEqual(fake.BACKUP4_ID, backups[2].id) + self.assertEqual({fake.BACKUP2_ID, fake.BACKUP3_ID, fake.BACKUP4_ID}, + {b.id for b in backups}) diff --git a/doc/source/admin/troubleshoot.rst b/doc/source/admin/troubleshoot.rst index c597f3bf420..0446cb4ac3f 100644 --- a/doc/source/admin/troubleshoot.rst +++ b/doc/source/admin/troubleshoot.rst @@ -18,3 +18,4 @@ Storage installation. ts-no-emulator-x86-64.rst ts-non-existent-host.rst ts-non-existent-vlun.rst + ts-db-cpu-spikes.rst diff --git a/doc/source/admin/ts-db-cpu-spikes.rst b/doc/source/admin/ts-db-cpu-spikes.rst new file mode 100644 index 00000000000..ddafd73fb2b --- /dev/null +++ b/doc/source/admin/ts-db-cpu-spikes.rst @@ -0,0 +1,37 @@ +===================================== +Database CPU spikes during operations +===================================== + +Query load upon the database can become a bottleneck that cascades across a +deployment and ultimately degrades not only the Cinder service but also the +whole OpenStack deployment. + +Often, depending on load, query patterns, periodic tasks, and so on and so +forth, additional indexes may be needed to help provide hints to the database +so it can most efficently attempt to reduce the number of rows which need to +be examined in order to return a result set. + +Adding indexes +-------------- + +In older releases, before 2023.1 (Antelope), there were some tables that +performed poorly in the presence of a large number of deleted resources +(volumes, snapshots, backups, etc) which resulted in high CPU loads on the DB +servers not only when listing those resources, but also when doing some +operations on them. This was resolved by adding appropriate indexes to them. + +This example below is specific to MariaDB/MySQL, but the syntax should be easy +to modify for operators using PostgreSQL, and it represents the changes that +older releases could add to resolve these DB server CPU spikes in such a way +that they would not conflict with the ones that Cinder introduced in 2023.1 +(Antelope). + +.. code-block:: sql + + use cinder; + create index groups_deleted_project_id_idx on groups (deleted, project_id); + create index group_snapshots_deleted_project_id_idx on groups (deleted, project_id); + create index volumes_deleted_project_id_idx on volumes (deleted, project_id); + create index volumes_deleted_host_idx on volumes (deleted, host); + create index snapshots_deleted_project_id_idx on snapshots (deleted, project_id); + create index backups_deleted_project_id_idx on backups (deleted, project_id); diff --git a/releasenotes/notes/db-resource-indexes-8010c9a881277503.yaml b/releasenotes/notes/db-resource-indexes-8010c9a881277503.yaml new file mode 100644 index 00000000000..4d062c258ee --- /dev/null +++ b/releasenotes/notes/db-resource-indexes-8010c9a881277503.yaml @@ -0,0 +1,24 @@ +--- +upgrade: + - | + The ``cinder-manage db sync`` command for this verison of cinder will add + additional database indexes. Depending on database size and complexity, + this will take time to complete for every single index to be created. On + MySQL or MariaDB, these indexes will only be created if an index does not + already exist with the same name: + + * ``groups_deleted_project_id_idx`` + * ``group_snapshots_deleted_project_id_idx`` + * ``volumes_deleted_project_id_idx`` + * ``volumes_deleted_host_idx`` + * ``snapshots_deleted_project_id_idx`` + * ``backups_deleted_project_id_idx`` + + An example of the SQL commands to generate these indexes can be found + in the `specific troubleshooting guide + `_. +fixes: + - | + `Bug #1952443 `_: Improve + performance for creating volume from image, listing volumes, snapshots, + backups, groups, and group_snapshots.