Merge "Fix NetApp NFS driver to never spawn a native thread again"

This commit is contained in:
Zuul 2023-03-07 16:58:22 +00:00 committed by Gerrit Code Review
commit 4a1a39ecd7
6 changed files with 51 additions and 76 deletions

View File

@ -16,7 +16,6 @@
"""Unit tests for the NetApp NFS storage driver."""
import copy
import os
import threading
import time
from unittest import mock
@ -620,36 +619,6 @@ class NetAppNfsDriverTestCase(test.TestCase):
os.path.exists.assert_called_once_with(
'dir/' + fake.CLONE_DESTINATION_NAME)
def test__spawn_clean_cache_job_clean_job_setup(self):
self.driver.cleaning = True
mock_debug_log = self.mock_object(nfs_base.LOG, 'debug')
self.mock_object(utils, 'synchronized', return_value=lambda f: f)
retval = self.driver._spawn_clean_cache_job()
self.assertIsNone(retval)
self.assertEqual(1, mock_debug_log.call_count)
def test__spawn_clean_cache_job_new_clean_job(self):
class FakeTimer(object):
def start(self):
pass
fake_timer = FakeTimer()
self.mock_object(utils, 'synchronized', return_value=lambda f: f)
self.mock_object(fake_timer, 'start')
self.mock_object(nfs_base.LOG, 'debug')
self.mock_object(self.driver, '_clean_image_cache')
self.mock_object(threading, 'Timer', return_value=fake_timer)
retval = self.driver._spawn_clean_cache_job()
self.assertIsNone(retval)
threading.Timer.assert_called_once_with(
0, self.driver._clean_image_cache)
fake_timer.start.assert_called_once_with()
def test_cleanup_volume_on_failure(self):
path = '%s/%s' % (fake.NFS_SHARE, fake.NFS_VOLUME['name'])
mock_local_path = self.mock_object(self.driver, 'local_path')
@ -1077,13 +1046,22 @@ class NetAppNfsDriverTestCase(test.TestCase):
self.driver, '_delete_snapshots_marked_for_deletion')
mock_call_ems_logging = self.mock_object(
self.driver, '_handle_ems_logging')
mock_call_clean_image_cache = self.mock_object(
self.driver, '_clean_image_cache')
# image cache cleanup task can be configured with custom timeout
cache_cleanup_interval = loopingcalls.ONE_HOUR
self.driver.configuration.netapp_nfs_image_cache_cleanup_interval = (
cache_cleanup_interval)
self.driver._add_looping_tasks()
mock_add_task.assert_has_calls([
mock.call(mock_call_snap_cleanup, loopingcalls.ONE_MINUTE,
loopingcalls.ONE_MINUTE),
mock.call(mock_call_ems_logging, loopingcalls.ONE_HOUR)])
mock.call(mock_call_ems_logging, loopingcalls.ONE_HOUR),
mock.call(mock_call_clean_image_cache, cache_cleanup_interval)
])
def test__clone_from_cache(self):
image_id = 'fake_image_id'

View File

@ -137,7 +137,6 @@ class NetAppCmodeNfsDriverTestCase(test.TestCase):
mock_debug_log = self.mock_object(nfs_cmode.LOG, 'debug')
self.mock_object(self.driver, 'get_filter_function')
self.mock_object(self.driver, 'get_goodness_function')
self.mock_object(self.driver, '_spawn_clean_cache_job')
self.driver.zapi_client = mock.Mock()
self.mock_object(self.driver, '_get_pool_stats', return_value={})
expected_stats = {
@ -153,7 +152,6 @@ class NetAppCmodeNfsDriverTestCase(test.TestCase):
retval = self.driver._update_volume_stats()
self.assertIsNone(retval)
self.assertTrue(self.driver._spawn_clean_cache_job.called)
self.assertEqual(1, mock_debug_log.call_count)
self.assertEqual(expected_stats, self.driver._stats)

View File

@ -25,7 +25,6 @@ import copy
import math
import os
import re
import threading
import time
from oslo_concurrency import processutils
@ -116,6 +115,13 @@ class NetAppNfsDriver(driver.ManageableVD,
self._handle_ems_logging,
loopingcalls.ONE_HOUR)
# Add the task that periodically cleanup old expired internal
# image caching.
self.loopingcalls.add_task(
self._clean_image_cache,
self.configuration.netapp_nfs_image_cache_cleanup_interval
)
def _delete_snapshots_marked_for_deletion(self):
snapshots = self.zapi_client.get_snapshots_marked_for_deletion()
for snapshot in snapshots:
@ -547,49 +553,32 @@ class NetAppNfsDriver(driver.ManageableVD,
os.utime(src_path, None)
_do_clone()
@utils.synchronized('clean_cache')
def _spawn_clean_cache_job(self):
"""Spawns a clean task if not running."""
if getattr(self, 'cleaning', None):
LOG.debug('Image cache cleaning in progress. Returning... ')
return
else:
# Set cleaning to True
self.cleaning = True
t = threading.Timer(0, self._clean_image_cache)
t.start()
def _clean_image_cache(self):
"""Clean the image cache files in cache of space crunch."""
try:
LOG.debug('Image cache cleaning in progress.')
thres_size_perc_start = (
self.configuration.thres_avl_size_perc_start)
thres_size_perc_stop = self.configuration.thres_avl_size_perc_stop
for share in getattr(self, '_mounted_shares', []):
try:
total_size, total_avl = self._get_capacity_info(share)
avl_percent = int((float(total_avl) / total_size) * 100)
if avl_percent <= thres_size_perc_start:
LOG.info('Cleaning cache for share %s.', share)
eligible_files = self._find_old_cache_files(share)
threshold_size = int(
(thres_size_perc_stop * total_size) / 100)
bytes_to_free = int(threshold_size - total_avl)
LOG.debug('Files to be queued for deletion %s',
eligible_files)
self._delete_files_till_bytes_free(
eligible_files, share, bytes_to_free)
else:
continue
except Exception as e:
LOG.warning('Exception during cache cleaning'
' %(share)s. Message - %(ex)s',
{'share': share, 'ex': e})
LOG.debug('Image cache cleaning in progress.')
thres_size_perc_start = (
self.configuration.thres_avl_size_perc_start)
thres_size_perc_stop = self.configuration.thres_avl_size_perc_stop
for share in self._mounted_shares:
try:
total_size, total_avl = self._get_capacity_info(share)
avl_percent = int((float(total_avl) / total_size) * 100)
if avl_percent <= thres_size_perc_start:
LOG.info('Cleaning cache for share %s.', share)
eligible_files = self._find_old_cache_files(share)
threshold_size = int(
(thres_size_perc_stop * total_size) / 100)
bytes_to_free = int(threshold_size - total_avl)
LOG.debug('Files to be queued for deletion %s',
eligible_files)
self._delete_files_till_bytes_free(
eligible_files, share, bytes_to_free)
else:
continue
finally:
LOG.debug('Image cache cleaning done.')
self.cleaning = False
except Exception as e:
LOG.warning('Exception during cache cleaning'
' %(share)s. Message - %(ex)s',
{'share': share, 'ex': e})
def _shortlist_del_eligible_files(self, share, old_files):
"""Prepares list of eligible files to be deleted from cache."""

View File

@ -337,7 +337,6 @@ class NetAppCmodeNfsDriver(nfs_base.NetAppNfsDriver,
# Used for service state report
data['replication_enabled'] = self.replication_enabled
self._spawn_clean_cache_job()
self._stats = data
def _get_pool_stats(self, filter_function=None, goodness_function=None):

View File

@ -122,6 +122,11 @@ netapp_cluster_opts = [
'provisioning of block storage volumes should occur.')), ]
netapp_img_cache_opts = [
cfg.IntOpt('netapp_nfs_image_cache_cleanup_interval',
default=600,
min=60,
help=('Sets time in seconds between NFS image cache '
'cleanup tasks.')),
cfg.IntOpt('thres_avl_size_perc_start',
default=20,
help=('If the percentage of available space for an NFS share '

View File

@ -0,0 +1,6 @@
---
fixes:
- |
`Bug #2008017 <https://bugs.launchpad.net/cinder/+bug/2008017>`_: Fixed
NetApp NFS driver to never spawn a native thread avoid thread starvation
and other related issues.