Last active
August 29, 2015 14:12
-
-
Save blink1073/12a701f884618b76ffee to your computer and use it in GitHub Desktop.
BSON Adaptor
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from bson import BSON, Binary | |
import numpy as np | |
import pickle | |
import struct | |
NUMPY_SUBTYPE = 128 | |
def bson_dump(obj): | |
for (key, value) in obj.items(): | |
if isinstance(value, np.ndarray): | |
obj[key] = Binary(pickle.dumps(value, protocol=-1), | |
subtype=NUMPY_SUBTYPE) | |
elif isinstance(value, dict): # Make sure we recurse into sub-dicts | |
obj[key] = bson_dump(value) | |
return BSON.encode(obj) | |
def bson_load(data): | |
def unpack(obj): | |
for (key, value) in obj.items(): | |
if isinstance(value, Binary) and value.subtype == NUMPY_SUBTYPE: | |
obj[key] = pickle.loads(value) | |
elif isinstance(value, dict): | |
# Again, make sure to recurse into sub-dicts | |
obj[key] = unpack(value) | |
return obj | |
return unpack(BSON.decode(data)) | |
def load_from_file(fid, n=0): | |
"""Load just the nth record from a file""" | |
# the first four bytes are little endian length | |
ind = 0 | |
pos = fid.tell() | |
while ind < n: | |
buf = fid.read(4) | |
if not buf: | |
raise EOFError | |
length = struct.unpack('I', buf)[0] | |
fid.seek(pos + length) | |
pos += length | |
ind += 1 | |
buf = fid.read(4) | |
if not buf: | |
raise EOFError | |
length = struct.unpack('I', buf)[0] | |
buf += fid.read(length - 4) | |
return bson_load(buf) | |
if __name__ == '__main__': | |
with open('test.bson', 'wb') as fid: | |
for i in range(1000): | |
fid.write(bson_dump(dict(foo=1, bar='1'*i))) | |
fid.write(bson_dump(dict(spam=np.ones(100), eggs=None))) | |
for i in range(30): | |
fid.write(bson_dump(dict(baz=1, bar=200))) | |
with open('test.bson', 'rb') as fid: | |
import time | |
t0 = time.time() | |
print(load_from_file(fid, 1000)) | |
print(time.time() - t0) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment