Skip to content

Instantly share code, notes, and snippets.

@jaseg
Last active January 9, 2016 23:32
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jaseg/7b347cfbb8deabc9ea7c to your computer and use it in GitHub Desktop.
Save jaseg/7b347cfbb8deabc9ea7c to your computer and use it in GitHub Desktop.
little python experiment
import pathlib
import itertools
import lzma
import threading
import functools
class Blobstore:
def __init__(self, path, create_if_missing=False, validate=True, blobid_len=4):
p = self._path_obj = pathlib.Path(path)
self._blobid_len, self._fmt_blobid = blobid_len, lambda blobid: ('{:0'+str(self._blobid_len)+'x}').format(blobid)
if not p.exists() and create_if_missing:
self.create_blobstore()
if validate:
self.validate_blobstore()
def create_blobstore(self):
if self._path_obj.exists():
raise FileExistsError('Blobstore at "{}" already exists'.format(p))
self._path_obj.mkdir()
for a,b in itertools.product('0123456789abcdef', repeat=2):
(self._path_obj / (a+b)).mkdir()
def validate_blobstore(self):
if not self._path_obj.exists():
raise FileNotFoundError('Blobstore at "{}" does not exist'.format(p))
if not all( (self._path_obj / (a+b)).is_dir() for a,b in itertools.product('0123456789abcdef', repeat=2)):
raise ValueError('No valid blobstore found at "{}"'.format(p))
def _blobid_to_file(self, blob_id):
blob_id = self._fmt_blobid(blob_id)
return self._path_obj / blob_id[:2] / blob_id[2:]
def blobstore_open(self, blob_id, mode='rt'):
bf = self._blobid_to_file(blob_id)
assert bf.parent.exists()
if not 0 <= blob_id <= 16**self._blobid_len:
raise ValueError('Invalid blobid "{}"'.format(blob_id))
return lzma.open(str(bf), mode)
class Stringstore(Blobstore):
def __init__(self, path, create_if_missing=False, validate=True, block_id_len=4, max_block_size=262144):
super().__init__(path, False, False, block_id_len)
self.max_block_size = max_block_size
if not self._path_obj.exists() and create_if_missing:
self.create_blobstore()
self.blobstore_open(0, 'x').close()
(self._path_obj / 'last_block').symlink_to(self._blobid_to_file(0))
if validate:
self.validate_blobstore()
self.validate_stringstore()
def validate_stringstore(self):
lb = (self._path_obj / 'last_block')
assert lb.is_symlink() and lb.resolve().exists() and lb.resolve().relative_to(self._path_obj)
@functools.lru_cache(maxsize=32)
def _read_block(self, block_id):
with self.blobstore_open(block_id, 'rt') as f:
return [ l.rstrip('\n') for l in f.readlines() ]
def get_string(self, block_id, string_id):
return self._read_block(block_id)[string_id]
def _write_block(self, block_id, lines):
self._read_block.cache_clear()
with self.blobstore_open(block_id, 'wt') as f:
f.write('\n'.join(lines))
def replace_string(self, block_id, string_id, value):
blocklines = self._read_block(block_id)
blocklines[string_id] = value
self._write_block(block_id, blocklines)
def insert_strings(self, values):
lb = (self._path_obj / 'last_block').resolve()
block_id = int(''.join(lb.relative_to(self._path_obj).parts), 16)
curlines = self._read_block(block_id)
sz = sum(map(len, curlines))
string_id = len(curlines)
for val in values:
if sz > self.max_block_size:
self._write_block(block_id, curlines)
block_id, string_id, curlines, sz = block_id+1, 0, [], 0
assert block_id < 16**self._blobid_len
# move last_block symlink
self.blobstore_open(block_id, 'x').close()
lb = (self._path_obj / 'last_block')
assert lb.is_symlink()
lb.unlink()
lb.symlink_to(self._blobid_to_file(block_id))
else:
curlines.append(val)
string_id, sz = string_id+1, sz+len(val)+1 #+1 for newline
yield string_id
self._write_block(block_id, curlines)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment