Skip to content

Instantly share code, notes, and snippets.

@prashanthpai
Last active April 26, 2016 13:42
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save prashanthpai/e246be62656f25d7e31b to your computer and use it in GitHub Desktop.
Save prashanthpai/e246be62656f25d7e31b to your computer and use it in GitHub Desktop.
Renamer() with and without fsync
#!/usr/bin/env python
import benchmark
import os
import uuid
import errno
import shutil
import random
import hashlib
from swift.common.utils import mkdirs
def fsync_dir(dirpath):
dirfd = None
try:
dirfd = os.open(dirpath, os.O_DIRECTORY | os.O_RDONLY)
os.fsync(dirfd)
except OSError as err:
if err.errno == errno.ENOTDIR:
raise
finally:
if dirfd:
os.close(dirfd)
def renamer(old, new, fsync=False):
"""
Swift's current method with os.renamer() commented out and
optional fsync added.
"""
dirpath = os.path.dirname(new)
try:
ret = mkdirs(dirpath)
# os.rename(old, new)
except OSError:
ret = mkdirs(dirpath)
# os.rename(old, new)
if fsync:
fsync_dir(dirpath)
def makedirs_count(path, count=0):
"""
Same as os.makedirs() except that this method returns the number of
new directories that had to be created.
https://hg.python.org/cpython/file/v2.7.3/Lib/os.py#l136
Also, this does not raise an error if target directory already exists.
This behaviour is similar to Python 3.x's os.makedirs() called with
exist_ok=True. Also similar to swift.common.utils.mkdirs()
https://hg.python.org/cpython/file/v3.4.2/Lib/os.py#l212
"""
head, tail = os.path.split(path)
if not tail:
head, tail = os.path.split(head)
if head and tail and not os.path.exists(head):
try:
count = makedirs_count(head, count)
except OSError as e:
if e.errno != errno.EEXIST:
raise
if tail == os.path.curdir:
return
try:
os.mkdir(path)
except OSError as e:
if e.errno != errno.EEXIST or not os.path.isdir(path):
raise
else:
count += 1
return count
def renamer2(old, new, fsync=True):
"""
Attempt to fix / hide race conditions like empty object directories
being removed by backend processes during uploads, by retrying.
The containing directory of 'new' and all newly created directories are
fsync'd by default.
This _will_ come at a performance penalty. In cases where this additional
fsync is not necessary, it is expected that the caller of renamer()
turn it off explicitly.
:param old: old path to be renamed
:param new: new path to be renamed to
:param fsync: fsync on containing directory of new
"""
dirpath = os.path.dirname(new)
try:
count = makedirs_count(dirpath)
# os.rename(old, new)
except OSError:
count = makedirs_count(dirpath)
# os.rename(old, new)
if fsync:
# If count=0, no new directories were created. But we still need to
# fsync leaf dir after os.rename().
# If count>0, starting from leaf dir, fsync parent dirs of all
# directories created by makedirs_count()
for i in range(0, count + 1):
fsync_dir(dirpath)
dirpath = os.path.dirname(dirpath)
def clean():
for sp in ('objects', 'objects-1', 'objects-2'):
shutil.rmtree(os.path.join(MOUNT_PATH, sp), ignore_errors=True)
# flush fs buffers
os.system('sync')
# free pagecache, dentries and inodes
with open('/proc/sys/vm/drop_caches', 'w') as f:
f.write('3\n')
MOUNT_PATH = '/mnt/test'
class RenamerFsyncDirs(benchmark.Benchmark):
def setUp(self):
# Silly way to create object dir path names. Swift object path template:
# /objects-<sp_index>/<partition-number>/<last-3-characters-of-hash>/<hash>/<timestamp>.data
self.paths = []
for sp in ('objects', 'objects-1', 'objects-2'):
for partnumber in range(0, 100):
for i in range(0, 200):
md5hash = hashlib.md5(str(uuid.uuid4())).hexdigest()
path = '/'.join([MOUNT_PATH, sp, str(partnumber), md5hash[-3:], md5hash, 'obj.data'])
self.paths.append(path)
random.shuffle(self.paths)
print "Total object paths = %d" % len(self.paths)
def eachSetUp(self):
clean()
def test_renamer(self):
for path in self.paths:
renamer(None, path, fsync=False)
def test_renamer_fsync_only_leaf(self):
for path in self.paths:
renamer(None, path, fsync=True)
def test_renamer_fsync_parent_dirs(self):
for path in self.paths:
renamer2(None, path, fsync=True)
def tearDown(self):
clean()
if __name__ == '__main__':
benchmark.main(each=3)
@prashanthpai
Copy link
Author

Total object paths = 60000

Benchmark Report

RenamerFsyncDirs

name rank runs mean sd timesBaseline
renamer 1 3 4.595 0.02221 1.0
renamer fsync only leaf 2 3 21.82 0.2758 4.75001699024
renamer fsync parent dirs 3 3 23.47 0.2978 5.10740909722

Each of the above 9 runs were run in random, non-consecutive order by
benchmark v0.1.5 (http://jspi.es/benchmark) with Python 2.7.5
Linux-3.16.4-200.fc20.x86_64-x86_64 on 2014-12-23 09:34:12.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment