-
-
Save alistairncoles/a118e563495d9bd0903e to your computer and use it in GitHub Desktop.
cleanup expired data fragments with no durable
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
diff --git a/swift/obj/diskfile.py b/swift/obj/diskfile.py | |
index 0f2ede6..92ba19f 100644 | |
--- a/swift/obj/diskfile.py | |
+++ b/swift/obj/diskfile.py | |
@@ -2058,10 +2058,16 @@ class ECDiskFileManager(DiskFileManager): | |
# been found so far as the valid fileset | |
discard() | |
set_valid_fileset() | |
- elif ext == '.data' and have_valid_fileset(): | |
- # valid fileset without a .durable means we must have a newer .ts, | |
- # so discard the older .data file | |
- discard() | |
+ elif ext == '.data': | |
+ # not yet found a .durable | |
+ if have_valid_fileset(): | |
+ # valid fileset means we must have a newer | |
+ # .ts, so discard the older .data file | |
+ discard() | |
+ else: | |
+ # .data newer than a .durable or .ts, don't discard yet | |
+ context.setdefault('fragments_without_durable', []).append( | |
+ filename) | |
elif ext == '.ts': | |
if have_valid_fileset() or not accept_first(): | |
# newer .data, .durable or .ts already found so discard this | |
@@ -2075,7 +2081,7 @@ class ECDiskFileManager(DiskFileManager): | |
# newer .data, .durable or .ts already found so discard this | |
discard() | |
else: | |
- # ignore unexpected files e.g. .data newer than .durable or junk | |
+ # ignore unexpected files | |
pass | |
return have_valid_fileset() | |
@@ -2184,6 +2190,10 @@ class ECDiskFileManager(DiskFileManager): | |
key 'obsolete'; a list of files remaining in the directory, | |
reverse sorted, stored under the key 'files'. | |
""" | |
+ def is_reclaimable(filename): | |
+ timestamp = self.parse_on_disk_filename(filename)['timestamp'] | |
+ return (time.time() - float(timestamp)) > reclaim_age | |
+ | |
files = listdir(hsh_path) | |
results = {} | |
if len(files) == 1: | |
@@ -2191,20 +2201,16 @@ class ECDiskFileManager(DiskFileManager): | |
# allow for a single junk file in dir that may violate ondisk | |
# contract, so cannot call gather_ondisk_files and cannot assume | |
# that the filename will parse. | |
- relcaim_rules = { | |
+ reclaim_rules = { | |
'.ts': True, | |
- # TODO: cleanup old data file with no .durable | |
- # '.data': True, | |
'.durable': False, | |
} | |
- for ext, should_wait in relcaim_rules.items(): | |
+ for ext, should_wait in reclaim_rules.items(): | |
if not files[0].endswith(ext): | |
continue | |
if should_wait: | |
- timestamp = self.parse_on_disk_filename( | |
- files[0])['timestamp'] | |
- if (time.time() - float(timestamp)) > reclaim_age: | |
- # place reclaim-able tombstone on obsolete list | |
+ if is_reclaimable(files[0]): | |
+ # place reclaim-able file on obsolete list | |
results['obsolete'] = [files[0]] | |
else: | |
results[ext] = files[0] | |
@@ -2214,6 +2220,13 @@ class ECDiskFileManager(DiskFileManager): | |
files.sort(reverse=True) | |
results = self.gather_ondisk_files(files, include_obsolete=True, | |
frag_index=frag_index) | |
+ if '.durable' in results and not results.get('fragments'): | |
+ # a .durable with no .data is deleted as soon as it is found | |
+ results.setdefault('obsolete', []).append(results.pop('.durable')) | |
+ for filename in results.get('fragments_without_durable', []): | |
+ # stray fragments are not deleted until reclaim-age | |
+ if is_reclaimable(filename): | |
+ results.setdefault('obsolete', []).append(filename) | |
for filename in results.get('obsolete', []): | |
remove_file(join(hsh_path, filename)) | |
files.remove(filename) | |
diff --git a/test/unit/obj/test_diskfile.py b/test/unit/obj/test_diskfile.py | |
index 2fbf173..e26d9e3 100644 | |
--- a/test/unit/obj/test_diskfile.py | |
+++ b/test/unit/obj/test_diskfile.py | |
@@ -526,7 +526,8 @@ class DiskFileManagerMixin(BaseDiskFileTestMixin): | |
self.fail('%s with files %s' % (str(e), files)) | |
shuffle(files) | |
- def _test_hash_cleanup_listdir_files(self, scenarios, policy): | |
+ def _test_hash_cleanup_listdir_files(self, scenarios, policy, | |
+ reclaim_age=None): | |
# check that expected files are left in hashdir after cleanup | |
for test in scenarios: | |
class_under_test = self.df_router[policy] | |
@@ -537,10 +538,14 @@ class DiskFileManagerMixin(BaseDiskFileTestMixin): | |
open(os.path.join(hashdir, fname), 'w') | |
expected_after_cleanup = set([f[0] for f in test | |
if f[1] or len(f) > 2 and f[2]]) | |
- with mock.patch('swift.obj.diskfile.time') as mock_time: | |
- # don't reclaim anything | |
- mock_time.time.return_value = 0.0 | |
- class_under_test.hash_cleanup_listdir(hashdir) | |
+ if reclaim_age: | |
+ class_under_test.hash_cleanup_listdir( | |
+ hashdir, reclaim_age=reclaim_age) | |
+ else: | |
+ with mock.patch('swift.obj.diskfile.time') as mock_time: | |
+ # don't reclaim anything | |
+ mock_time.time.return_value = 0.0 | |
+ class_under_test.hash_cleanup_listdir(hashdir) | |
after_cleanup = set(os.listdir(hashdir)) | |
errmsg = "expected %r, got %r for test %r" % ( | |
sorted(expected_after_cleanup), sorted(after_cleanup), test | |
@@ -1117,9 +1122,11 @@ class TestECDiskFileManager(DiskFileManagerMixin, unittest.TestCase): | |
mgr_cls = diskfile.ECDiskFileManager | |
def test_get_ondisk_files_with_ec_policy(self): | |
- # Each scenario specifies a list of (filename, extension) tuples. If | |
- # extension is set then that filename should be returned by the method | |
- # under test for that extension type. | |
+ # Each scenario specifies a list of (filename, extension, [survives]) | |
+ # tuples. If extension is set then that filename should be returned by | |
+ # the method under test for that extension type. If the optional | |
+ # 'survives' is True, the filename should still be in the dir after | |
+ # cleanup. | |
scenarios = [[('0000000007.00000.ts', '.ts')], | |
[('0000000007.00000.ts', '.ts'), | |
@@ -1141,7 +1148,7 @@ class TestECDiskFileManager(DiskFileManagerMixin, unittest.TestCase): | |
# data newer than durable ignored, even if its only data | |
[('0000000008.00000#1.data', False, True), | |
- ('0000000007.00000.durable', '.durable')], | |
+ ('0000000007.00000.durable', False, False)], | |
# data older than durable is ignored | |
[('0000000007.00000.durable', '.durable'), | |
@@ -1150,7 +1157,7 @@ class TestECDiskFileManager(DiskFileManagerMixin, unittest.TestCase): | |
('0000000004.00000.ts', False)], | |
# data older than durable ignored, even if its only data | |
- [('0000000007.00000.durable', '.durable'), | |
+ [('0000000007.00000.durable', False, False), | |
('0000000006.00000#1.data', False), | |
('0000000004.00000.ts', False)], | |
@@ -1251,6 +1258,66 @@ class TestECDiskFileManager(DiskFileManagerMixin, unittest.TestCase): | |
# note: not calling self._test_hash_cleanup_listdir_files(scenarios, 0) | |
# here due to the anomalous scenario as commented above | |
+ def test_hash_cleanup_listdir_reclaim(self): | |
+ # Each scenario specifies a list of (filename, extension, [survives]) | |
+ # tuples. If extension is set or 'survives' is True, the filename | |
+ # should still be in the dir after cleanup. | |
+ much_older = Timestamp(time() - 2000).internal | |
+ older = Timestamp(time() - 1001).internal | |
+ newer = Timestamp(time() - 900).internal | |
+ scenarios = [[('%s.ts' % older, False, False)], | |
+ | |
+ # fresh tombstone is preserved | |
+ [('%s.ts' % newer, '.ts', True)], | |
+ | |
+ # isolated .durable is cleaned up immediately | |
+ [('%s.durable' % newer, False, False)], | |
+ | |
+ # ...even when other older files are in dir | |
+ [('%s.durable' % older, False, False), | |
+ ('%s.ts' % much_older, False, False)], | |
+ | |
+ # isolated .data files are cleaned up when stale | |
+ [('%s#2.data' % older, False, False), | |
+ ('%s#4.data' % older, False, False)], | |
+ | |
+ # ...even when there is an older durable fileset | |
+ [('%s#2.data' % older, False, False), | |
+ ('%s#4.data' % older, False, False), | |
+ ('%s#2.data' % much_older, '.data', True), | |
+ ('%s#4.data' % much_older, False, True), | |
+ ('%s.durable' % much_older, '.durable', True)], | |
+ | |
+ # ... but preserved if still fresh | |
+ [('%s#2.data' % newer, False, True), | |
+ ('%s#4.data' % newer, False, True)], | |
+ | |
+ # ... and we could have a mixture of fresh and stale .data | |
+ [('%s#2.data' % newer, False, True), | |
+ ('%s#4.data' % older, False, False)], | |
+ | |
+ # TODO these remaining scenarios represent some oddities | |
+ # that should perhaps be addressed but they arise from | |
+ # being strictly compatible with the legacy replication | |
+ # DiskFileManager behavior... | |
+ | |
+ # tombstone reclaim is inhibited by newer non-durable data | |
+ [('%s#2.data' % newer, False, True), | |
+ ('%s#4.data' % older, False, False), | |
+ ('%s.ts' % much_older, '.ts', True)], | |
+ | |
+ # even older durable inhibits tombstone being reclaimed | |
+ [('%s.ts' % older, '.ts', True), | |
+ ('%s.durable' % much_older, False, False)], | |
+ | |
+ # tombstone reclaim is inhibited by *any* other file | |
+ [('junk', False, True), | |
+ ('%s.ts' % much_older, '.ts', True)], | |
+ ] | |
+ | |
+ self._test_hash_cleanup_listdir_files(scenarios, POLICIES.default, | |
+ reclaim_age=1000) | |
+ | |
def test_get_ondisk_files_with_stray_meta(self): | |
# get_ondisk_files does not tolerate a stray .meta file | |
scenarios = [['0000000007.00000.meta'], |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment