Last active
April 26, 2016 13:42
-
-
Save prashanthpai/e246be62656f25d7e31b to your computer and use it in GitHub Desktop.
Renamer() with and without fsync
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import benchmark | |
import os | |
import uuid | |
import errno | |
import shutil | |
import random | |
import hashlib | |
from swift.common.utils import mkdirs | |
def fsync_dir(dirpath): | |
dirfd = None | |
try: | |
dirfd = os.open(dirpath, os.O_DIRECTORY | os.O_RDONLY) | |
os.fsync(dirfd) | |
except OSError as err: | |
if err.errno == errno.ENOTDIR: | |
raise | |
finally: | |
if dirfd: | |
os.close(dirfd) | |
def renamer(old, new, fsync=False): | |
""" | |
Swift's current method with os.renamer() commented out and | |
optional fsync added. | |
""" | |
dirpath = os.path.dirname(new) | |
try: | |
ret = mkdirs(dirpath) | |
# os.rename(old, new) | |
except OSError: | |
ret = mkdirs(dirpath) | |
# os.rename(old, new) | |
if fsync: | |
fsync_dir(dirpath) | |
def makedirs_count(path, count=0): | |
""" | |
Same as os.makedirs() except that this method returns the number of | |
new directories that had to be created. | |
https://hg.python.org/cpython/file/v2.7.3/Lib/os.py#l136 | |
Also, this does not raise an error if target directory already exists. | |
This behaviour is similar to Python 3.x's os.makedirs() called with | |
exist_ok=True. Also similar to swift.common.utils.mkdirs() | |
https://hg.python.org/cpython/file/v3.4.2/Lib/os.py#l212 | |
""" | |
head, tail = os.path.split(path) | |
if not tail: | |
head, tail = os.path.split(head) | |
if head and tail and not os.path.exists(head): | |
try: | |
count = makedirs_count(head, count) | |
except OSError as e: | |
if e.errno != errno.EEXIST: | |
raise | |
if tail == os.path.curdir: | |
return | |
try: | |
os.mkdir(path) | |
except OSError as e: | |
if e.errno != errno.EEXIST or not os.path.isdir(path): | |
raise | |
else: | |
count += 1 | |
return count | |
def renamer2(old, new, fsync=True): | |
""" | |
Attempt to fix / hide race conditions like empty object directories | |
being removed by backend processes during uploads, by retrying. | |
The containing directory of 'new' and all newly created directories are | |
fsync'd by default. | |
This _will_ come at a performance penalty. In cases where this additional | |
fsync is not necessary, it is expected that the caller of renamer() | |
turn it off explicitly. | |
:param old: old path to be renamed | |
:param new: new path to be renamed to | |
:param fsync: fsync on containing directory of new | |
""" | |
dirpath = os.path.dirname(new) | |
try: | |
count = makedirs_count(dirpath) | |
# os.rename(old, new) | |
except OSError: | |
count = makedirs_count(dirpath) | |
# os.rename(old, new) | |
if fsync: | |
# If count=0, no new directories were created. But we still need to | |
# fsync leaf dir after os.rename(). | |
# If count>0, starting from leaf dir, fsync parent dirs of all | |
# directories created by makedirs_count() | |
for i in range(0, count + 1): | |
fsync_dir(dirpath) | |
dirpath = os.path.dirname(dirpath) | |
def clean(): | |
for sp in ('objects', 'objects-1', 'objects-2'): | |
shutil.rmtree(os.path.join(MOUNT_PATH, sp), ignore_errors=True) | |
# flush fs buffers | |
os.system('sync') | |
# free pagecache, dentries and inodes | |
with open('/proc/sys/vm/drop_caches', 'w') as f: | |
f.write('3\n') | |
MOUNT_PATH = '/mnt/test' | |
class RenamerFsyncDirs(benchmark.Benchmark): | |
def setUp(self): | |
# Silly way to create object dir path names. Swift object path template: | |
# /objects-<sp_index>/<partition-number>/<last-3-characters-of-hash>/<hash>/<timestamp>.data | |
self.paths = [] | |
for sp in ('objects', 'objects-1', 'objects-2'): | |
for partnumber in range(0, 100): | |
for i in range(0, 200): | |
md5hash = hashlib.md5(str(uuid.uuid4())).hexdigest() | |
path = '/'.join([MOUNT_PATH, sp, str(partnumber), md5hash[-3:], md5hash, 'obj.data']) | |
self.paths.append(path) | |
random.shuffle(self.paths) | |
print "Total object paths = %d" % len(self.paths) | |
def eachSetUp(self): | |
clean() | |
def test_renamer(self): | |
for path in self.paths: | |
renamer(None, path, fsync=False) | |
def test_renamer_fsync_only_leaf(self): | |
for path in self.paths: | |
renamer(None, path, fsync=True) | |
def test_renamer_fsync_parent_dirs(self): | |
for path in self.paths: | |
renamer2(None, path, fsync=True) | |
def tearDown(self): | |
clean() | |
if __name__ == '__main__': | |
benchmark.main(each=3) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Total object paths = 60000
Benchmark Report
RenamerFsyncDirs
Each of the above 9 runs were run in random, non-consecutive order by
benchmark
v0.1.5 (http://jspi.es/benchmark) with Python 2.7.5Linux-3.16.4-200.fc20.x86_64-x86_64 on 2014-12-23 09:34:12.