Skip to content

Instantly share code, notes, and snippets.

@blink1073
Last active August 29, 2015 14:12
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save blink1073/12a701f884618b76ffee to your computer and use it in GitHub Desktop.
Save blink1073/12a701f884618b76ffee to your computer and use it in GitHub Desktop.
BSON Adaptor
from bson import BSON, Binary
import numpy as np
import pickle
import struct
NUMPY_SUBTYPE = 128
def bson_dump(obj):
for (key, value) in obj.items():
if isinstance(value, np.ndarray):
obj[key] = Binary(pickle.dumps(value, protocol=-1),
subtype=NUMPY_SUBTYPE)
elif isinstance(value, dict): # Make sure we recurse into sub-dicts
obj[key] = bson_dump(value)
return BSON.encode(obj)
def bson_load(data):
def unpack(obj):
for (key, value) in obj.items():
if isinstance(value, Binary) and value.subtype == NUMPY_SUBTYPE:
obj[key] = pickle.loads(value)
elif isinstance(value, dict):
# Again, make sure to recurse into sub-dicts
obj[key] = unpack(value)
return obj
return unpack(BSON.decode(data))
def load_from_file(fid, n=0):
"""Load just the nth record from a file"""
# the first four bytes are little endian length
ind = 0
pos = fid.tell()
while ind < n:
buf = fid.read(4)
if not buf:
raise EOFError
length = struct.unpack('I', buf)[0]
fid.seek(pos + length)
pos += length
ind += 1
buf = fid.read(4)
if not buf:
raise EOFError
length = struct.unpack('I', buf)[0]
buf += fid.read(length - 4)
return bson_load(buf)
if __name__ == '__main__':
with open('test.bson', 'wb') as fid:
for i in range(1000):
fid.write(bson_dump(dict(foo=1, bar='1'*i)))
fid.write(bson_dump(dict(spam=np.ones(100), eggs=None)))
for i in range(30):
fid.write(bson_dump(dict(baz=1, bar=200)))
with open('test.bson', 'rb') as fid:
import time
t0 = time.time()
print(load_from_file(fid, 1000))
print(time.time() - t0)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment