From 03cbcd3cd0d7f0f9da748d0f113df7d26741ba21 Mon Sep 17 00:00:00 2001 From: Jan Hartkopf Date: Wed, 22 Sep 2021 16:07:51 +0200 Subject: [PATCH] Ceph: Add option to keep only last n snapshots per backup Add a new config option "backup_ceph_max_snapshots" to only keep the last n snapshots of a volume backup on the source volume storage to save disk space. By default, the current behavior of keeping all snapshots is used, but can now be adjusted as requested. Commit bc9ab142da919087c71525978f960115ff0259b9 changed the previous behavior of keeping only the last snapshot to keeping all snapshots of a backup. However, this can take up much more disk space on the source volume storage. If enabled, the driver checks the number of snapshots of a volume backup after successful creation of a new snapshot and removes older ones, so that only the n most recent snapshots are kept on the source storage. This change also creates a new special case which was not present before: If a user deletes at least n backup snapshots, the next incremental snapshot creation would fail (due to how RBD works). This will be handled by forcing a full backup instead. Therefore, this commit introduces a configurable tradeoff between required space on the source volume storage and a potentially longer backup time. Change-Id: Ief71e6e62ccc0654f4e1e71ccc53df66d5ffbfac Related-Bug: #1703011 Signed-off-by: Jan Hartkopf --- cinder/backup/drivers/ceph.py | 125 ++++++++++++++++-- cinder/message/message_field.py | 3 + .../unit/backup/drivers/test_backup_ceph.py | 34 +++++ .../backup/ceph-backup-driver.rst | 21 +++ ...nly-last-n-snapshots-89dc532656f453f4.yaml | 8 ++ 5 files changed, 181 insertions(+), 10 deletions(-) create mode 100644 releasenotes/notes/ceph-add-option-to-keep-only-last-n-snapshots-89dc532656f453f4.yaml diff --git a/cinder/backup/drivers/ceph.py b/cinder/backup/drivers/ceph.py index cbe1eef21d4..f78b5928178 100644 --- a/cinder/backup/drivers/ceph.py +++ b/cinder/backup/drivers/ceph.py @@ -48,6 +48,7 @@ import os import re import subprocess import tempfile +import textwrap import time from typing import Dict, List, Optional, Tuple @@ -62,6 +63,8 @@ from cinder.backup import driver from cinder import exception from cinder.i18n import _ from cinder import interface +from cinder.message import api as message_api +from cinder.message import message_field from cinder import objects from cinder import utils import cinder.volume.drivers.rbd as rbd_driver @@ -95,6 +98,19 @@ service_opts = [ cfg.BoolOpt('backup_ceph_image_journals', default=False, help='If True, apply JOURNALING and EXCLUSIVE_LOCK feature ' 'bits to the backup RBD objects to allow mirroring'), + cfg.IntOpt('backup_ceph_max_snapshots', default=0, + help=textwrap.dedent("""\ + Number of the most recent snapshots to keep. + + 0 indicates to keep an unlimited number of snapshots. + + Configuring this option can save disk space by only keeping + a limited number of snapshots on the source volume storage. + However, if a user deletes all incremental backups which + still have snapshots on the source storage, the next + incremental backup will automatically become a full backup + as no common snapshot exists anymore. + """)), cfg.BoolOpt('restore_discard_excess_bytes', default=True, help='If True, always discard excess bytes when restoring ' 'volumes i.e. pad with zeroes.') @@ -198,6 +214,8 @@ class CephBackupDriver(driver.BackupDriver): self._ceph_backup_pool = CONF.backup_ceph_pool self._ceph_backup_conf = CONF.backup_ceph_conf + self.message_api = message_api.API() + @staticmethod def get_driver_options() -> list: return service_opts @@ -799,24 +817,62 @@ class CephBackupDriver(driver.BackupDriver): rbd_conf = volume_file.rbd_conf source_rbd_image = eventlet.tpool.Proxy(volume_file.rbd_image) volume_id = backup.volume_id - base_name = None + base_name = self._get_backup_base_name(volume_id, backup=backup) + snaps_to_keep = CONF.backup_ceph_max_snapshots # If backup.parent_id is None performs full RBD backup if backup.parent_id is None: - base_name = self._get_backup_base_name(volume_id, backup=backup) from_snap, image_created = self._full_rbd_backup(backup.container, base_name, length) # Otherwise performs incremental rbd backup else: - # Find the base name from the parent backup's service_metadata - base_name = self._get_backup_base_name(volume_id, backup=backup) - rbd_img = source_rbd_image - from_snap, image_created = self._incremental_rbd_backup(backup, - base_name, - length, - rbd_img, - volume_id) + # Check if there is at least one snapshot to base an incremental + # backup on. If not, we cannot perform an incremental backup and + # fall back to full backup. + no_source_snaps = snaps_to_keep > 0 and \ + self._get_backup_snap_name( + source_rbd_image, + base_name, + backup.parent_id) is None + + # If true, force full backup + if no_source_snaps: + # Unset parent so we get a new backup base name + backup.parent = None + # The backup will be a full one, so it has no parent ID. + # This will mark the backup as a full backup in the database. + backup.parent_id = None + backup.save() + + base_name = self.\ + _get_backup_base_name(volume_id, backup=backup) + + LOG.info("Incremental backup was requested, but there are no " + "snapshots present to use as base, " + "forcing full backup.") + self.message_api.create( + context=self.context, + action=message_field.Action.BACKUP_CREATE, + resource_uuid=volume_id, + detail=message_field.Detail. + INCREMENTAL_BACKUP_FORCES_FULL_BACKUP, + level="WARNING" + ) + + from_snap, image_created = self._full_rbd_backup( + backup.container, + base_name, + length) + else: + # Incremental backup + rbd_img = source_rbd_image + from_snap, image_created = \ + self._incremental_rbd_backup(backup, + base_name, + length, + rbd_img, + volume_id) LOG.debug("Using --from-snap '%(snap)s' for incremental backup of " "volume %(volume)s.", @@ -847,6 +903,13 @@ class CephBackupDriver(driver.BackupDriver): LOG.debug("Differential backup transfer completed in %.4fs", (time.time() - before)) + # only keep last n snapshots and delete older ones + if snaps_to_keep > 0: + self._remove_last_snapshots(source_rbd_image, snaps_to_keep) + else: + LOG.debug("Not deleting any snapshots because " + "all should be kept") + except exception.BackupRBDOperationFailed: with excutils.save_and_reraise_exception(): LOG.debug("Differential backup transfer failed") @@ -863,6 +926,48 @@ class CephBackupDriver(driver.BackupDriver): return {'service_metadata': '{"base": "%s"}' % base_name} + def _remove_last_snapshots(self, source_rbd_image, snaps_to_keep: int): + # only keep last n snapshots and delete older ones for the source + # image provided + snap_list = [] + + try: + snap_list = self.get_backup_snaps(source_rbd_image) + except Exception as e: + LOG.debug( + "Failed to get snapshot list for %s: %s", source_rbd_image, e + ) + + remaining_snaps = len(snap_list) + LOG.debug("Snapshot list: %s", snap_list) + + if remaining_snaps > snaps_to_keep: + snaps_to_delete = remaining_snaps - snaps_to_keep + LOG.debug( + "There are %s snapshots and %s should be kept, " + "deleting the oldest %s snapshots", + remaining_snaps, + snaps_to_keep, + snaps_to_delete, + ) + + for i in range(snaps_to_delete): + LOG.debug("Deleting snapshot %s", snap_list[i]) + + try: + source_rbd_image.remove_snap(snap_list[i]["name"]) + except Exception as e: + LOG.debug( + "Failed to delete snapshot %s: %s", snap_list[i], e + ) + else: + LOG.debug( + "There are %s snapshots and %s should be kept, " + "not deleting any snapshots", + remaining_snaps, + snaps_to_keep, + ) + @staticmethod def _file_is_rbd(volume_file: linuxrbd.RBDVolumeIOWrapper) -> bool: """Returns True if the volume_file is actually an RBD image.""" diff --git a/cinder/message/message_field.py b/cinder/message/message_field.py index ba145c23be9..8c39915c8a8 100644 --- a/cinder/message/message_field.py +++ b/cinder/message/message_field.py @@ -134,6 +134,8 @@ class Detail(object): '029', _("The image disk format must be the same as the volume format for " "the volume type you are requesting.")) + INCREMENTAL_BACKUP_FORCES_FULL_BACKUP = ( + '030', _("Incremental backup not possible, forcing full backup.")) ALL = (UNKNOWN_ERROR, DRIVER_NOT_INITIALIZED, @@ -164,6 +166,7 @@ class Detail(object): VOLUME_INVALID_STATE, REIMAGE_VOLUME_FAILED, IMAGE_FORMAT_UNACCEPTABLE, + INCREMENTAL_BACKUP_FORCES_FULL_BACKUP, ) # Exception and detail mappings diff --git a/cinder/tests/unit/backup/drivers/test_backup_ceph.py b/cinder/tests/unit/backup/drivers/test_backup_ceph.py index 8d8cc9f0b2a..c541b97c66d 100644 --- a/cinder/tests/unit/backup/drivers/test_backup_ceph.py +++ b/cinder/tests/unit/backup/drivers/test_backup_ceph.py @@ -536,6 +536,40 @@ class BackupCephTestCase(test.TestCase): self.assertEqual(checksum.digest(), self.checksum.digest()) + @common_mocks + def test_backup_snapshot_lifecycle(self): + with mock.patch.object(self.service, '_rbd_diff_transfer'), \ + mock.patch.object(self.service, "get_backup_snaps") \ + as mock_get_backup_snaps: + + CONF.set_override('backup_ceph_max_snapshots', 1) + + mocked_snaps = [ + {'name': 'backup.mock.snap.153464362.12'}, + {'name': 'backup.mock.snap.225341241.90'}, + {'name': 'backup.mock.snap.399994362.10'}] + + mock_get_backup_snaps.return_value = mocked_snaps + self.mock_rbd.RBD.remove_snap = mock.Mock() + + image = self.service.rbd.Image() + meta = linuxrbd.RBDImageMetadata(image, + 'pool_foo', + 'user_foo', + 'conf_foo') + rbdio = linuxrbd.RBDVolumeIOWrapper(meta) + rbdio.seek(0) + + self.service._backup_rbd(self.backup, rbdio, + self.volume.name, self.volume.size) + + self.assertEqual(2, self.mock_rbd.Image.return_value. + remove_snap.call_count) + expected_calls = [mock.call('backup.mock.snap.153464362.12'), + mock.call('backup.mock.snap.225341241.90')] + self.mock_rbd.Image.return_value.remove_snap.\ + assert_has_calls(expected_calls) + @common_mocks def test_backup_volume_from_rbd_set_parent_id(self): with mock.patch.object(self.service, '_backup_rbd') as \ diff --git a/doc/source/configuration/block-storage/backup/ceph-backup-driver.rst b/doc/source/configuration/block-storage/backup/ceph-backup-driver.rst index 81de086932d..d71bc40f61f 100644 --- a/doc/source/configuration/block-storage/backup/ceph-backup-driver.rst +++ b/doc/source/configuration/block-storage/backup/ceph-backup-driver.rst @@ -21,6 +21,26 @@ stored as snapshots so that minimal space is consumed in the backup store. It takes far less time to restore a volume than to take a full copy. +By default, all incremental backups are held on the source volume storage, +which can take up much disk space on the usually more expensive +primary storage compared to backup storage. Enabling the option +``backup_ceph_max_snapshots`` can save disk space on the source +volume storage by only keeping a limited number of snapshots per backup volume. +After every successful creation of a new incremental backup, the Ceph backup +driver will then ensure that excess snapshots of the corresponding backup +volume are deleted so that only the ``backup_ceph_max_snapshots`` +most recent snapshots are kept on the primary storage. +However, this can cause incremental backups to automatically become full +backups instead if a user manually deleted at least +``backup_ceph_max_snapshots`` incremental backups. In that case +the next snapshot, being a full backup, will require more disk space on +the backup storage and will take longer to complete than an incremental +backup would have. + +Thus, the option allows to configure a tradeoff between required space on the +source volume storage and required space on the backup storage as well as a +longer backup process under the above conditions. + .. note:: Block Storage enables you to: @@ -57,3 +77,4 @@ This example shows the default options for the Ceph backup driver. backup_ceph_pool = backups backup_ceph_stripe_unit = 0 backup_ceph_stripe_count = 0 + backup_ceph_max_snapshots = 0 diff --git a/releasenotes/notes/ceph-add-option-to-keep-only-last-n-snapshots-89dc532656f453f4.yaml b/releasenotes/notes/ceph-add-option-to-keep-only-last-n-snapshots-89dc532656f453f4.yaml new file mode 100644 index 00000000000..7ea332ae851 --- /dev/null +++ b/releasenotes/notes/ceph-add-option-to-keep-only-last-n-snapshots-89dc532656f453f4.yaml @@ -0,0 +1,8 @@ +--- +features: + - | + Ceph driver: Add config option to keep only the last n snapshots per backup + to save disk space on the source volume storage. Enabling this option can + cause incremental backups to become full backups instead under special + circumstances. Please take a look at the Ceph backup driver docs for + more information. \ No newline at end of file