Last active
January 9, 2016 23:32
-
-
Save jaseg/7b347cfbb8deabc9ea7c to your computer and use it in GitHub Desktop.
little python experiment
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pathlib | |
import itertools | |
import lzma | |
import threading | |
import functools | |
class Blobstore: | |
def __init__(self, path, create_if_missing=False, validate=True, blobid_len=4): | |
p = self._path_obj = pathlib.Path(path) | |
self._blobid_len, self._fmt_blobid = blobid_len, lambda blobid: ('{:0'+str(self._blobid_len)+'x}').format(blobid) | |
if not p.exists() and create_if_missing: | |
self.create_blobstore() | |
if validate: | |
self.validate_blobstore() | |
def create_blobstore(self): | |
if self._path_obj.exists(): | |
raise FileExistsError('Blobstore at "{}" already exists'.format(p)) | |
self._path_obj.mkdir() | |
for a,b in itertools.product('0123456789abcdef', repeat=2): | |
(self._path_obj / (a+b)).mkdir() | |
def validate_blobstore(self): | |
if not self._path_obj.exists(): | |
raise FileNotFoundError('Blobstore at "{}" does not exist'.format(p)) | |
if not all( (self._path_obj / (a+b)).is_dir() for a,b in itertools.product('0123456789abcdef', repeat=2)): | |
raise ValueError('No valid blobstore found at "{}"'.format(p)) | |
def _blobid_to_file(self, blob_id): | |
blob_id = self._fmt_blobid(blob_id) | |
return self._path_obj / blob_id[:2] / blob_id[2:] | |
def blobstore_open(self, blob_id, mode='rt'): | |
bf = self._blobid_to_file(blob_id) | |
assert bf.parent.exists() | |
if not 0 <= blob_id <= 16**self._blobid_len: | |
raise ValueError('Invalid blobid "{}"'.format(blob_id)) | |
return lzma.open(str(bf), mode) | |
class Stringstore(Blobstore): | |
def __init__(self, path, create_if_missing=False, validate=True, block_id_len=4, max_block_size=262144): | |
super().__init__(path, False, False, block_id_len) | |
self.max_block_size = max_block_size | |
if not self._path_obj.exists() and create_if_missing: | |
self.create_blobstore() | |
self.blobstore_open(0, 'x').close() | |
(self._path_obj / 'last_block').symlink_to(self._blobid_to_file(0)) | |
if validate: | |
self.validate_blobstore() | |
self.validate_stringstore() | |
def validate_stringstore(self): | |
lb = (self._path_obj / 'last_block') | |
assert lb.is_symlink() and lb.resolve().exists() and lb.resolve().relative_to(self._path_obj) | |
@functools.lru_cache(maxsize=32) | |
def _read_block(self, block_id): | |
with self.blobstore_open(block_id, 'rt') as f: | |
return [ l.rstrip('\n') for l in f.readlines() ] | |
def get_string(self, block_id, string_id): | |
return self._read_block(block_id)[string_id] | |
def _write_block(self, block_id, lines): | |
self._read_block.cache_clear() | |
with self.blobstore_open(block_id, 'wt') as f: | |
f.write('\n'.join(lines)) | |
def replace_string(self, block_id, string_id, value): | |
blocklines = self._read_block(block_id) | |
blocklines[string_id] = value | |
self._write_block(block_id, blocklines) | |
def insert_strings(self, values): | |
lb = (self._path_obj / 'last_block').resolve() | |
block_id = int(''.join(lb.relative_to(self._path_obj).parts), 16) | |
curlines = self._read_block(block_id) | |
sz = sum(map(len, curlines)) | |
string_id = len(curlines) | |
for val in values: | |
if sz > self.max_block_size: | |
self._write_block(block_id, curlines) | |
block_id, string_id, curlines, sz = block_id+1, 0, [], 0 | |
assert block_id < 16**self._blobid_len | |
# move last_block symlink | |
self.blobstore_open(block_id, 'x').close() | |
lb = (self._path_obj / 'last_block') | |
assert lb.is_symlink() | |
lb.unlink() | |
lb.symlink_to(self._blobid_to_file(block_id)) | |
else: | |
curlines.append(val) | |
string_id, sz = string_id+1, sz+len(val)+1 #+1 for newline | |
yield string_id | |
self._write_block(block_id, curlines) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment