Skip to content

Instantly share code, notes, and snippets.

@prashanthpai
Last active April 26, 2016 13:37
Show Gist options
  • Save prashanthpai/00476e957b3b8d7d640c to your computer and use it in GitHub Desktop.
Save prashanthpai/00476e957b3b8d7d640c to your computer and use it in GitHub Desktop.
diff --git a/swiftonfile/swift/common/utils.py b/swiftonfile/swift/common/utils.py
index c7ad8bf..5cb30ae 100644
--- a/swiftonfile/swift/common/utils.py
+++ b/swiftonfile/swift/common/utils.py
@@ -292,7 +292,7 @@ def _get_etag(path_or_fd):
return etag
-def get_object_metadata(obj_path_or_fd, stats=None):
+def get_object_metadata(obj_path_or_fd, stats=None, etag=None):
"""
Return metadata of object.
"""
@@ -311,6 +311,10 @@ def get_object_metadata(obj_path_or_fd, stats=None):
metadata = {}
else:
is_dir = stat.S_ISDIR(stats.st_mode)
+ if not is_dir:
+ etag = etag or _get_etag(obj_path_or_fd)
+ else:
+ etag = md5().hexdigest()
metadata = {
X_TYPE: OBJECT,
X_TIMESTAMP: normalize_timestamp(stats.st_ctime),
@@ -318,7 +322,7 @@ def get_object_metadata(obj_path_or_fd, stats=None):
X_OBJECT_TYPE: DIR_NON_OBJECT if is_dir else FILE,
X_CONTENT_LENGTH: 0 if is_dir else stats.st_size,
X_MTIME: 0 if is_dir else normalize_timestamp(stats.st_mtime),
- X_ETAG: md5().hexdigest() if is_dir else _get_etag(obj_path_or_fd)}
+ X_ETAG: etag}
return metadata
@@ -333,11 +337,12 @@ def restore_metadata(path, metadata, meta_orig):
return meta_new
-def create_object_metadata(obj_path_or_fd, stats=None, existing_meta={}):
+def create_object_metadata(obj_path_or_fd, stats=None, etag=None,
+ existing_meta={}):
# We must accept either a path or a file descriptor as an argument to this
# method, as the diskfile modules uses a file descriptior and the DiskDir
# module (for container operations) uses a path.
- metadata_from_stat = get_object_metadata(obj_path_or_fd, stats)
+ metadata_from_stat = get_object_metadata(obj_path_or_fd, stats, etag)
return restore_metadata(obj_path_or_fd, metadata_from_stat, existing_meta)
diff --git a/swiftonfile/swift/obj/diskfile.py b/swiftonfile/swift/obj/diskfile.py
index 4d00ef9..23dc149 100644
--- a/swiftonfile/swift/obj/diskfile.py
+++ b/swiftonfile/swift/obj/diskfile.py
@@ -25,6 +25,7 @@ import logging
import time
from uuid import uuid4
from eventlet import sleep
+from hashlib import md5
from contextlib import contextmanager
from swiftonfile.swift.common.exceptions import AlreadyExistsAsFile, \
AlreadyExistsAsDir
@@ -40,13 +41,14 @@ from swiftonfile.swift.common.fs_utils import do_fstat, do_open, do_close, \
do_fadvise64, do_rename, do_fdatasync, do_lseek, do_mkdir
from swiftonfile.swift.common.utils import read_metadata, write_metadata, \
validate_object, create_object_metadata, rmobjdir, dir_is_object, \
- get_object_metadata, write_pickle
+ get_object_metadata, write_pickle, CHUNK_SIZE
from swiftonfile.swift.common.utils import X_CONTENT_TYPE, \
X_TIMESTAMP, X_TYPE, X_OBJECT_TYPE, FILE, OBJECT, DIR_TYPE, \
FILE_TYPE, DEFAULT_UID, DEFAULT_GID, DIR_NON_OBJECT, DIR_OBJECT, \
X_ETAG, X_CONTENT_LENGTH, X_MTIME
from swift.obj.diskfile import DiskFileManager as SwiftDiskFileManager
from swift.obj.diskfile import get_async_dir
+from six import BytesIO as six_BytesIO
# FIXME: Hopefully we'll be able to move to Python 2.7+ where O_CLOEXEC will
# be back ported. See http://www.python.org/dev/peps/pep-0433/
@@ -547,6 +549,24 @@ class DiskFileReader(object):
do_close(fd)
+class SmallDiskFileReader(object):
+ def __init__(self, fd, smallfile):
+ self._fd = fd
+ self._smallfile = smallfile
+
+ def __iter__(self):
+ try:
+ yield self._smallfile.getvalue()
+ finally:
+ self.close()
+
+ def close(self):
+ if self._fd is not None:
+ fd, self._fd = self._fd, None
+ if fd > -1:
+ do_close(fd)
+
+
class DiskFile(object):
"""
Manage object files on disk.
@@ -602,6 +622,10 @@ class DiskFile(object):
self._data_file = os.path.join(self._put_datadir, self._obj)
+ # Small file optimization
+ self._small_file_size = mgr.disk_chunk_size
+ self._small_file = None
+
def open(self):
"""
Open the object.
@@ -636,8 +660,16 @@ class DiskFile(object):
self._metadata = read_metadata(self._fd)
if not validate_object(self._metadata, self._stat):
+ etag = None
+ self._small_file_size
+ if not self._is_dir and \
+ obj_size <= self._small_file_size:
+ chunk = do_read(self._fd, CHUNK_SIZE)
+ etag = md5(chunk).hexdigest()
+ # https://github.com/tornadoweb/tornado/issues/1110
+ self._small_file = six_BytesIO(chunk)
self._metadata = create_object_metadata(self._fd, self._stat,
- self._metadata)
+ etag, self._metadata)
assert self._metadata is not None
self._filter_metadata()
@@ -762,10 +794,13 @@ class DiskFile(object):
"""
if self._metadata is None:
raise DiskFileNotOpen()
- dr = DiskFileReader(
- self._fd, self._threadpool, self._mgr.disk_chunk_size,
- self._obj_size, self._mgr.keep_cache_size,
- iter_hook=iter_hook, keep_cache=keep_cache)
+ if self._small_file:
+ dr = SmallDiskFileReader(self._fd, self._small_file)
+ else:
+ dr = DiskFileReader(
+ self._fd, self._threadpool, self._mgr.disk_chunk_size,
+ self._obj_size, self._mgr.keep_cache_size,
+ iter_hook=iter_hook, keep_cache=keep_cache)
# At this point the reader object is now responsible for closing
# the file pointer.
self._fd = None
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment