Skip to content

Instantly share code, notes, and snippets.

@alistairncoles
Created April 12, 2015 13:15
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save alistairncoles/a118e563495d9bd0903e to your computer and use it in GitHub Desktop.
Save alistairncoles/a118e563495d9bd0903e to your computer and use it in GitHub Desktop.
cleanup expired data fragments with no durable
diff --git a/swift/obj/diskfile.py b/swift/obj/diskfile.py
index 0f2ede6..92ba19f 100644
--- a/swift/obj/diskfile.py
+++ b/swift/obj/diskfile.py
@@ -2058,10 +2058,16 @@ class ECDiskFileManager(DiskFileManager):
# been found so far as the valid fileset
discard()
set_valid_fileset()
- elif ext == '.data' and have_valid_fileset():
- # valid fileset without a .durable means we must have a newer .ts,
- # so discard the older .data file
- discard()
+ elif ext == '.data':
+ # not yet found a .durable
+ if have_valid_fileset():
+ # valid fileset means we must have a newer
+ # .ts, so discard the older .data file
+ discard()
+ else:
+ # .data newer than a .durable or .ts, don't discard yet
+ context.setdefault('fragments_without_durable', []).append(
+ filename)
elif ext == '.ts':
if have_valid_fileset() or not accept_first():
# newer .data, .durable or .ts already found so discard this
@@ -2075,7 +2081,7 @@ class ECDiskFileManager(DiskFileManager):
# newer .data, .durable or .ts already found so discard this
discard()
else:
- # ignore unexpected files e.g. .data newer than .durable or junk
+ # ignore unexpected files
pass
return have_valid_fileset()
@@ -2184,6 +2190,10 @@ class ECDiskFileManager(DiskFileManager):
key 'obsolete'; a list of files remaining in the directory,
reverse sorted, stored under the key 'files'.
"""
+ def is_reclaimable(filename):
+ timestamp = self.parse_on_disk_filename(filename)['timestamp']
+ return (time.time() - float(timestamp)) > reclaim_age
+
files = listdir(hsh_path)
results = {}
if len(files) == 1:
@@ -2191,20 +2201,16 @@ class ECDiskFileManager(DiskFileManager):
# allow for a single junk file in dir that may violate ondisk
# contract, so cannot call gather_ondisk_files and cannot assume
# that the filename will parse.
- relcaim_rules = {
+ reclaim_rules = {
'.ts': True,
- # TODO: cleanup old data file with no .durable
- # '.data': True,
'.durable': False,
}
- for ext, should_wait in relcaim_rules.items():
+ for ext, should_wait in reclaim_rules.items():
if not files[0].endswith(ext):
continue
if should_wait:
- timestamp = self.parse_on_disk_filename(
- files[0])['timestamp']
- if (time.time() - float(timestamp)) > reclaim_age:
- # place reclaim-able tombstone on obsolete list
+ if is_reclaimable(files[0]):
+ # place reclaim-able file on obsolete list
results['obsolete'] = [files[0]]
else:
results[ext] = files[0]
@@ -2214,6 +2220,13 @@ class ECDiskFileManager(DiskFileManager):
files.sort(reverse=True)
results = self.gather_ondisk_files(files, include_obsolete=True,
frag_index=frag_index)
+ if '.durable' in results and not results.get('fragments'):
+ # a .durable with no .data is deleted as soon as it is found
+ results.setdefault('obsolete', []).append(results.pop('.durable'))
+ for filename in results.get('fragments_without_durable', []):
+ # stray fragments are not deleted until reclaim-age
+ if is_reclaimable(filename):
+ results.setdefault('obsolete', []).append(filename)
for filename in results.get('obsolete', []):
remove_file(join(hsh_path, filename))
files.remove(filename)
diff --git a/test/unit/obj/test_diskfile.py b/test/unit/obj/test_diskfile.py
index 2fbf173..e26d9e3 100644
--- a/test/unit/obj/test_diskfile.py
+++ b/test/unit/obj/test_diskfile.py
@@ -526,7 +526,8 @@ class DiskFileManagerMixin(BaseDiskFileTestMixin):
self.fail('%s with files %s' % (str(e), files))
shuffle(files)
- def _test_hash_cleanup_listdir_files(self, scenarios, policy):
+ def _test_hash_cleanup_listdir_files(self, scenarios, policy,
+ reclaim_age=None):
# check that expected files are left in hashdir after cleanup
for test in scenarios:
class_under_test = self.df_router[policy]
@@ -537,10 +538,14 @@ class DiskFileManagerMixin(BaseDiskFileTestMixin):
open(os.path.join(hashdir, fname), 'w')
expected_after_cleanup = set([f[0] for f in test
if f[1] or len(f) > 2 and f[2]])
- with mock.patch('swift.obj.diskfile.time') as mock_time:
- # don't reclaim anything
- mock_time.time.return_value = 0.0
- class_under_test.hash_cleanup_listdir(hashdir)
+ if reclaim_age:
+ class_under_test.hash_cleanup_listdir(
+ hashdir, reclaim_age=reclaim_age)
+ else:
+ with mock.patch('swift.obj.diskfile.time') as mock_time:
+ # don't reclaim anything
+ mock_time.time.return_value = 0.0
+ class_under_test.hash_cleanup_listdir(hashdir)
after_cleanup = set(os.listdir(hashdir))
errmsg = "expected %r, got %r for test %r" % (
sorted(expected_after_cleanup), sorted(after_cleanup), test
@@ -1117,9 +1122,11 @@ class TestECDiskFileManager(DiskFileManagerMixin, unittest.TestCase):
mgr_cls = diskfile.ECDiskFileManager
def test_get_ondisk_files_with_ec_policy(self):
- # Each scenario specifies a list of (filename, extension) tuples. If
- # extension is set then that filename should be returned by the method
- # under test for that extension type.
+ # Each scenario specifies a list of (filename, extension, [survives])
+ # tuples. If extension is set then that filename should be returned by
+ # the method under test for that extension type. If the optional
+ # 'survives' is True, the filename should still be in the dir after
+ # cleanup.
scenarios = [[('0000000007.00000.ts', '.ts')],
[('0000000007.00000.ts', '.ts'),
@@ -1141,7 +1148,7 @@ class TestECDiskFileManager(DiskFileManagerMixin, unittest.TestCase):
# data newer than durable ignored, even if its only data
[('0000000008.00000#1.data', False, True),
- ('0000000007.00000.durable', '.durable')],
+ ('0000000007.00000.durable', False, False)],
# data older than durable is ignored
[('0000000007.00000.durable', '.durable'),
@@ -1150,7 +1157,7 @@ class TestECDiskFileManager(DiskFileManagerMixin, unittest.TestCase):
('0000000004.00000.ts', False)],
# data older than durable ignored, even if its only data
- [('0000000007.00000.durable', '.durable'),
+ [('0000000007.00000.durable', False, False),
('0000000006.00000#1.data', False),
('0000000004.00000.ts', False)],
@@ -1251,6 +1258,66 @@ class TestECDiskFileManager(DiskFileManagerMixin, unittest.TestCase):
# note: not calling self._test_hash_cleanup_listdir_files(scenarios, 0)
# here due to the anomalous scenario as commented above
+ def test_hash_cleanup_listdir_reclaim(self):
+ # Each scenario specifies a list of (filename, extension, [survives])
+ # tuples. If extension is set or 'survives' is True, the filename
+ # should still be in the dir after cleanup.
+ much_older = Timestamp(time() - 2000).internal
+ older = Timestamp(time() - 1001).internal
+ newer = Timestamp(time() - 900).internal
+ scenarios = [[('%s.ts' % older, False, False)],
+
+ # fresh tombstone is preserved
+ [('%s.ts' % newer, '.ts', True)],
+
+ # isolated .durable is cleaned up immediately
+ [('%s.durable' % newer, False, False)],
+
+ # ...even when other older files are in dir
+ [('%s.durable' % older, False, False),
+ ('%s.ts' % much_older, False, False)],
+
+ # isolated .data files are cleaned up when stale
+ [('%s#2.data' % older, False, False),
+ ('%s#4.data' % older, False, False)],
+
+ # ...even when there is an older durable fileset
+ [('%s#2.data' % older, False, False),
+ ('%s#4.data' % older, False, False),
+ ('%s#2.data' % much_older, '.data', True),
+ ('%s#4.data' % much_older, False, True),
+ ('%s.durable' % much_older, '.durable', True)],
+
+ # ... but preserved if still fresh
+ [('%s#2.data' % newer, False, True),
+ ('%s#4.data' % newer, False, True)],
+
+ # ... and we could have a mixture of fresh and stale .data
+ [('%s#2.data' % newer, False, True),
+ ('%s#4.data' % older, False, False)],
+
+ # TODO these remaining scenarios represent some oddities
+ # that should perhaps be addressed but they arise from
+ # being strictly compatible with the legacy replication
+ # DiskFileManager behavior...
+
+ # tombstone reclaim is inhibited by newer non-durable data
+ [('%s#2.data' % newer, False, True),
+ ('%s#4.data' % older, False, False),
+ ('%s.ts' % much_older, '.ts', True)],
+
+ # even older durable inhibits tombstone being reclaimed
+ [('%s.ts' % older, '.ts', True),
+ ('%s.durable' % much_older, False, False)],
+
+ # tombstone reclaim is inhibited by *any* other file
+ [('junk', False, True),
+ ('%s.ts' % much_older, '.ts', True)],
+ ]
+
+ self._test_hash_cleanup_listdir_files(scenarios, POLICIES.default,
+ reclaim_age=1000)
+
def test_get_ondisk_files_with_stray_meta(self):
# get_ondisk_files does not tolerate a stray .meta file
scenarios = [['0000000007.00000.meta'],
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment