Skip to content

Instantly share code, notes, and snippets.

@alistairncoles
Created January 30, 2017 16:54
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save alistairncoles/648c0d8e7f35f1bc4d5cf8994d8a9ce0 to your computer and use it in GitHub Desktop.
Save alistairncoles/648c0d8e7f35f1bc4d5cf8994d8a9ce0 to your computer and use it in GitHub Desktop.
diff --git a/swift/obj/diskfile.py b/swift/obj/diskfile.py
index 64e531c..1b5527a 100644
--- a/swift/obj/diskfile.py
+++ b/swift/obj/diskfile.py
@@ -237,8 +237,9 @@ def read_hashes(partition_dir):
"""
Read the existing hashes.pkl
- :returns: the hashes, or None if hashes.pkl is corrupt, cannot be read or
- does not exist
+ :returns: A dict mapping suffixes to hashes. If hashes.pkl is corrupt,
+ cannot be read or does not exist, then the dict contains a
+ 'created' key.
"""
hashes_file = join(partition_dir, HASH_FILE)
hashes = None
@@ -255,9 +256,11 @@ def read_hashes(partition_dir):
# given invalid input depending on the way in which the
# input is invalid.
pass
- if hashes is not None:
- # hashes.pkl w/o updated key is "forever old"
- hashes.setdefault('updated', -1)
+ if hashes is None:
+ # set the same value for every call that failed to read the hashes.pkl
+ hashes = {'created': -1}
+ # hashes.pkl w/o updated key is "forever old"
+ hashes.setdefault('updated', -1)
return hashes
@@ -268,7 +271,6 @@ def write_hashes(partition_dir, hashes):
The updated key is added to hashes before it is written.
"""
hashes_file = join(partition_dir, HASH_FILE)
- hashes = hashes or {}
hashes['updated'] = time.time()
return write_pickle(hashes, hashes_file, partition_dir, PICKLE_PROTOCOL)
@@ -302,25 +304,26 @@ def consolidate_hashes(partition_dir):
hashes = read_hashes(partition_dir)
found_invalidation_entry = False
- try:
- with open(invalidations_file, 'rb') as inv_fh:
- for line in inv_fh:
- found_invalidation_entry = True
- suffix = line.strip()
- if hashes is not None and \
- hashes.get(suffix, '') is not None:
+ if 'created' in hashes:
+ # replace value with unique timestamp to distinguish this write of
+ # a new hashes.pkl from any other concurrent write
+ hashes['created'] = time.time()
+ else:
+ try:
+ with open(invalidations_file, 'rb') as inv_fh:
+ for line in inv_fh:
+ found_invalidation_entry = True
+ suffix = line.strip()
hashes[suffix] = None
- except (IOError, OSError) as e:
- if e.errno != errno.ENOENT:
- raise
+ except (IOError, OSError) as e:
+ if e.errno != errno.ENOENT:
+ raise
- if found_invalidation_entry:
+ if found_invalidation_entry or 'created' in hashes:
write_hashes(partition_dir, hashes)
-
- # Now that all the invalidations are reflected in hashes.pkl, it's
- # safe to clear out the invalidations file.
- if found_invalidation_entry:
- with open(invalidations_file, 'wb') as inv_fh:
+ # Now that all the invalidations are reflected in hashes.pkl, it's
+ # safe to clear out the invalidations file.
+ with open(invalidations_file, 'wb'):
pass
return hashes
@@ -1073,18 +1076,19 @@ class BaseDiskFileManager(object):
if orig_hashes is None:
hashes = {}
do_listdir = True
- # We have to read_hashes again because when consolidate_hashes
- # returns None to trigger do_listdir - it actually has to writeback
- # an empty hashes dict with an updated key to prevent a race with
- # another set of invalidations. Or if the exception handling
- # fired; read_hashes might just return None again.
- # N.B. orig_hashes is None only effects new parts and error/edge
- # conditions - so try not to get overly caught up trying to
- # optimize it out unless you manage to convince yourself there's a
- # bad behavior.
+ # we have to read_hashes again because of the Exception handling
+ # around consolidate_hashes - it's a long story; this may return
+ # None again - but for now it should be obviously benign and avoids
+ # a recursive tail spin in tests. N.B. orig_hashes is None only
+ # effects new parts and error/edge conditions - so try not to get
+ # overly caught up trying to optimize it out unless you manage to
+ # convince yourself there's a bad behavior
orig_hashes = read_hashes(partition_path)
else:
hashes = copy.deepcopy(orig_hashes)
+ if hashes.pop('created', None):
+ # hashes.pkl did not previously exist
+ do_listdir = True
if do_listdir:
for suff in os.listdir(partition_path):
diff --git a/test/unit/obj/test_diskfile.py b/test/unit/obj/test_diskfile.py
index 67753b4..2aa0b33 100644
--- a/test/unit/obj/test_diskfile.py
+++ b/test/unit/obj/test_diskfile.py
@@ -6308,6 +6308,28 @@ class TestSuffixHashes(unittest.TestCase):
self.assertIn(suffix, hashes)
self.assertEqual([], df_mgr.logger.get_lines_for_level('warning'))
+ def test_unpickle_error_and_get_hashes_failure(self):
+ for policy in self.iter_policies():
+ df_mgr = self.df_router[policy]
+ df = df_mgr.get_diskfile('sda1', '0', 'a', 'c', 'o',
+ policy=policy)
+ suffix = os.path.basename(os.path.dirname(df._datadir))
+ df.delete(self.ts())
+ part_path = os.path.join(self.devices, 'sda1',
+ diskfile.get_data_dir(policy), '0')
+ hashes_file = os.path.join(part_path, diskfile.HASH_FILE)
+ # write a corrupt hashes.pkl
+ open(hashes_file, 'w')
+ # simulate first call to get_hashes failing after attempting to
+ # consolidate hashes
+ with mock.patch('swift.obj.diskfile.os.listdir',
+ side_effect=Exception()):
+ self.assertRaises(
+ Exception, df_mgr.get_hashes, 'sda1', '0', [], policy)
+ # verify subsequent call to get_hashes reaches correct outcome
+ hashes = df_mgr.get_hashes('sda1', '0', [], policy)
+ self.assertIn(suffix, hashes)
+
def test_invalidate_hash_consolidation(self):
def assert_consolidation(suffixes):
# verify that suffixes are invalidated after consolidation
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment