alistairncoles/gist.diff Secret

## gist.diff
diff --git a/swift/obj/diskfile.py b/swift/obj/diskfile.py
index 64e531c..1b5527a 100644
--- a/swift/obj/diskfile.py
+++ b/swift/obj/diskfile.py
@@ -237,8 +237,9 @@ def read_hashes(partition_dir):
     """
     Read the existing hashes.pkl

-    :returns: the hashes, or None if hashes.pkl is corrupt, cannot be read or
-              does not exist
+    :returns: A dict mapping suffixes to hashes. If hashes.pkl is corrupt,
+              cannot be read or does not exist, then the dict contains a
+              'created' key.
     """
     hashes_file = join(partition_dir, HASH_FILE)
     hashes = None
@@ -255,9 +256,11 @@ def read_hashes(partition_dir):
             # given invalid input depending on the way in which the
             # input is invalid.
             pass
-    if hashes is not None:
-        # hashes.pkl w/o updated key is "forever old"
-        hashes.setdefault('updated', -1)
+    if hashes is None:
+        # set the same value for every call that failed to read the hashes.pkl
+        hashes = {'created': -1}
+    # hashes.pkl w/o updated key is "forever old"
+    hashes.setdefault('updated', -1)
     return hashes


@@ -268,7 +271,6 @@ def write_hashes(partition_dir, hashes):
     The updated key is added to hashes before it is written.
     """
     hashes_file = join(partition_dir, HASH_FILE)
-    hashes = hashes or {}
     hashes['updated'] = time.time()
     return write_pickle(hashes, hashes_file, partition_dir, PICKLE_PROTOCOL)

@@ -302,25 +304,26 @@ def consolidate_hashes(partition_dir):
         hashes = read_hashes(partition_dir)

         found_invalidation_entry = False
-        try:
-            with open(invalidations_file, 'rb') as inv_fh:
-                for line in inv_fh:
-                    found_invalidation_entry = True
-                    suffix = line.strip()
-                    if hashes is not None and \
-                            hashes.get(suffix, '') is not None:
+        if 'created' in hashes:
+            # replace value with unique timestamp to distinguish this write of
+            # a new hashes.pkl from any other concurrent write
+            hashes['created'] = time.time()
+        else:
+            try:
+                with open(invalidations_file, 'rb') as inv_fh:
+                    for line in inv_fh:
+                        found_invalidation_entry = True
+                        suffix = line.strip()
                         hashes[suffix] = None
-        except (IOError, OSError) as e:
-            if e.errno != errno.ENOENT:
-                raise
+            except (IOError, OSError) as e:
+                if e.errno != errno.ENOENT:
+                    raise

-        if found_invalidation_entry:
+        if found_invalidation_entry or 'created' in hashes:
             write_hashes(partition_dir, hashes)
-
-        # Now that all the invalidations are reflected in hashes.pkl, it's
-        # safe to clear out the invalidations file.
-        if found_invalidation_entry:
-            with open(invalidations_file, 'wb') as inv_fh:
+            # Now that all the invalidations are reflected in hashes.pkl, it's
+            # safe to clear out the invalidations file.
+            with open(invalidations_file, 'wb'):
                 pass

         return hashes
@@ -1073,18 +1076,19 @@ class BaseDiskFileManager(object):
         if orig_hashes is None:
             hashes = {}
             do_listdir = True
-            # We have to read_hashes again because when consolidate_hashes
-            # returns None to trigger do_listdir - it actually has to writeback
-            # an empty hashes dict with an updated key to prevent a race with
-            # another set of invalidations.  Or if the exception handling
-            # fired; read_hashes might just return None again.
-            # N.B. orig_hashes is None only effects new parts and error/edge
-            # conditions - so try not to get overly caught up trying to
-            # optimize it out unless you manage to convince yourself there's a
-            # bad behavior.
+            # we have to read_hashes again because of the Exception handling
+            # around consolidate_hashes - it's a long story; this may return
+            # None again - but for now it should be obviously benign and avoids
+            # a recursive tail spin in tests.  N.B. orig_hashes is None only
+            # effects new parts and error/edge conditions - so try not to get
+            # overly caught up trying to optimize it out unless you manage to
+            # convince yourself there's a bad behavior
             orig_hashes = read_hashes(partition_path)
         else:
             hashes = copy.deepcopy(orig_hashes)
+            if hashes.pop('created', None):
+                # hashes.pkl did not previously exist
+                do_listdir = True

         if do_listdir:
             for suff in os.listdir(partition_path):
diff --git a/test/unit/obj/test_diskfile.py b/test/unit/obj/test_diskfile.py
index 67753b4..2aa0b33 100644
--- a/test/unit/obj/test_diskfile.py
+++ b/test/unit/obj/test_diskfile.py
@@ -6308,6 +6308,28 @@ class TestSuffixHashes(unittest.TestCase):
             self.assertIn(suffix, hashes)
             self.assertEqual([], df_mgr.logger.get_lines_for_level('warning'))

+    def test_unpickle_error_and_get_hashes_failure(self):
+        for policy in self.iter_policies():
+            df_mgr = self.df_router[policy]
+            df = df_mgr.get_diskfile('sda1', '0', 'a', 'c', 'o',
+                                     policy=policy)
+            suffix = os.path.basename(os.path.dirname(df._datadir))
+            df.delete(self.ts())
+            part_path = os.path.join(self.devices, 'sda1',
+                                     diskfile.get_data_dir(policy), '0')
+            hashes_file = os.path.join(part_path, diskfile.HASH_FILE)
+            # write a corrupt hashes.pkl
+            open(hashes_file, 'w')
+            # simulate first call to get_hashes failing after attempting to
+            # consolidate hashes
+            with mock.patch('swift.obj.diskfile.os.listdir',
+                            side_effect=Exception()):
+                self.assertRaises(
+                    Exception, df_mgr.get_hashes, 'sda1', '0', [], policy)
+            # verify subsequent call to get_hashes reaches correct outcome
+            hashes = df_mgr.get_hashes('sda1', '0', [], policy)
+            self.assertIn(suffix, hashes)
+
     def test_invalidate_hash_consolidation(self):
         def assert_consolidation(suffixes):
             # verify that suffixes are invalidated after consolidation
	diff --git a/swift/obj/diskfile.py b/swift/obj/diskfile.py
	index 64e531c..1b5527a 100644
	--- a/swift/obj/diskfile.py
	+++ b/swift/obj/diskfile.py
	@@ -237,8 +237,9 @@ def read_hashes(partition_dir):
	"""
	Read the existing hashes.pkl

	- :returns: the hashes, or None if hashes.pkl is corrupt, cannot be read or
	- does not exist
	+ :returns: A dict mapping suffixes to hashes. If hashes.pkl is corrupt,
	+ cannot be read or does not exist, then the dict contains a
	+ 'created' key.
	"""
	hashes_file = join(partition_dir, HASH_FILE)
	hashes = None
	@@ -255,9 +256,11 @@ def read_hashes(partition_dir):
	# given invalid input depending on the way in which the
	# input is invalid.
	pass
	- if hashes is not None:
	- # hashes.pkl w/o updated key is "forever old"
	- hashes.setdefault('updated', -1)
	+ if hashes is None:
	+ # set the same value for every call that failed to read the hashes.pkl
	+ hashes = {'created': -1}
	+ # hashes.pkl w/o updated key is "forever old"
	+ hashes.setdefault('updated', -1)
	return hashes


	@@ -268,7 +271,6 @@ def write_hashes(partition_dir, hashes):
	The updated key is added to hashes before it is written.
	"""
	hashes_file = join(partition_dir, HASH_FILE)
	- hashes = hashes or {}
	hashes['updated'] = time.time()
	return write_pickle(hashes, hashes_file, partition_dir, PICKLE_PROTOCOL)

	@@ -302,25 +304,26 @@ def consolidate_hashes(partition_dir):
	hashes = read_hashes(partition_dir)

	found_invalidation_entry = False
	- try:
	- with open(invalidations_file, 'rb') as inv_fh:
	- for line in inv_fh:
	- found_invalidation_entry = True
	- suffix = line.strip()
	- if hashes is not None and \
	- hashes.get(suffix, '') is not None:
	+ if 'created' in hashes:
	+ # replace value with unique timestamp to distinguish this write of
	+ # a new hashes.pkl from any other concurrent write
	+ hashes['created'] = time.time()
	+ else:
	+ try:
	+ with open(invalidations_file, 'rb') as inv_fh:
	+ for line in inv_fh:
	+ found_invalidation_entry = True
	+ suffix = line.strip()
	hashes[suffix] = None
	- except (IOError, OSError) as e:
	- if e.errno != errno.ENOENT:
	- raise
	+ except (IOError, OSError) as e:
	+ if e.errno != errno.ENOENT:
	+ raise

	- if found_invalidation_entry:
	+ if found_invalidation_entry or 'created' in hashes:
	write_hashes(partition_dir, hashes)
	-
	- # Now that all the invalidations are reflected in hashes.pkl, it's
	- # safe to clear out the invalidations file.
	- if found_invalidation_entry:
	- with open(invalidations_file, 'wb') as inv_fh:
	+ # Now that all the invalidations are reflected in hashes.pkl, it's
	+ # safe to clear out the invalidations file.
	+ with open(invalidations_file, 'wb'):
	pass

	return hashes
	@@ -1073,18 +1076,19 @@ class BaseDiskFileManager(object):
	if orig_hashes is None:
	hashes = {}
	do_listdir = True
	- # We have to read_hashes again because when consolidate_hashes
	- # returns None to trigger do_listdir - it actually has to writeback
	- # an empty hashes dict with an updated key to prevent a race with
	- # another set of invalidations. Or if the exception handling
	- # fired; read_hashes might just return None again.
	- # N.B. orig_hashes is None only effects new parts and error/edge
	- # conditions - so try not to get overly caught up trying to
	- # optimize it out unless you manage to convince yourself there's a
	- # bad behavior.
	+ # we have to read_hashes again because of the Exception handling
	+ # around consolidate_hashes - it's a long story; this may return
	+ # None again - but for now it should be obviously benign and avoids
	+ # a recursive tail spin in tests. N.B. orig_hashes is None only
	+ # effects new parts and error/edge conditions - so try not to get
	+ # overly caught up trying to optimize it out unless you manage to
	+ # convince yourself there's a bad behavior
	orig_hashes = read_hashes(partition_path)
	else:
	hashes = copy.deepcopy(orig_hashes)
	+ if hashes.pop('created', None):
	+ # hashes.pkl did not previously exist
	+ do_listdir = True

	if do_listdir:
	for suff in os.listdir(partition_path):
	diff --git a/test/unit/obj/test_diskfile.py b/test/unit/obj/test_diskfile.py
	index 67753b4..2aa0b33 100644
	--- a/test/unit/obj/test_diskfile.py
	+++ b/test/unit/obj/test_diskfile.py
	@@ -6308,6 +6308,28 @@ class TestSuffixHashes(unittest.TestCase):
	self.assertIn(suffix, hashes)
	self.assertEqual([], df_mgr.logger.get_lines_for_level('warning'))

	+ def test_unpickle_error_and_get_hashes_failure(self):
	+ for policy in self.iter_policies():
	+ df_mgr = self.df_router[policy]
	+ df = df_mgr.get_diskfile('sda1', '0', 'a', 'c', 'o',
	+ policy=policy)
	+ suffix = os.path.basename(os.path.dirname(df._datadir))
	+ df.delete(self.ts())
	+ part_path = os.path.join(self.devices, 'sda1',
	+ diskfile.get_data_dir(policy), '0')
	+ hashes_file = os.path.join(part_path, diskfile.HASH_FILE)
	+ # write a corrupt hashes.pkl
	+ open(hashes_file, 'w')
	+ # simulate first call to get_hashes failing after attempting to
	+ # consolidate hashes
	+ with mock.patch('swift.obj.diskfile.os.listdir',
	+ side_effect=Exception()):
	+ self.assertRaises(
	+ Exception, df_mgr.get_hashes, 'sda1', '0', [], policy)
	+ # verify subsequent call to get_hashes reaches correct outcome
	+ hashes = df_mgr.get_hashes('sda1', '0', [], policy)
	+ self.assertIn(suffix, hashes)
	+
	def test_invalidate_hash_consolidation(self):
	def assert_consolidation(suffixes):
	# verify that suffixes are invalidated after consolidation