Created
December 12, 2012 15:02
-
-
Save miku/4268445 to your computer and use it in GitHub Desktop.
Simple streaming pickle.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Streaming pickle implementation for efficiently serializing and | |
de-serializing an iterable (e.g., list) | |
Originally created on 2010-06-19 by Philip Guo | |
Reformatted on 2012-12-12 by M Czygan | |
Limitations: | |
- Only works with ASCII-based pickles (protocol=0), since s_load reads | |
in one line at a time | |
""" | |
from __future__ import print_function | |
import os | |
import sys | |
try: | |
from cPickle import dumps, loads | |
except ImportError: | |
from pickle import dumps, loads | |
def s_dump(iterable_to_pickle, file_obj): | |
""" dump contents of an iterable iterable_to_pickle to file_obj, a file | |
opened in write mode """ | |
for elt in iterable_to_pickle: | |
s_dump_elt(elt, file_obj) | |
def s_dump_elt(elt_to_pickle, file_obj): | |
""" dumps one element to file_obj, a file opened in write mode """ | |
pickled_elt_str = dumps(elt_to_pickle) | |
file_obj.write(pickled_elt_str) | |
# record separator is a blank line | |
# (since pickled_elt_str might contain its own newlines) | |
file_obj.write('\n\n') | |
def s_load(file_obj): | |
""" load contents from file_obj, returning a generator that yields one | |
element at a time """ | |
cur_elt = [] | |
for line in file_obj: | |
cur_elt.append(line) | |
if line == '\n': | |
pickled_elt_str = ''.join(cur_elt) | |
elt = loads(pickled_elt_str) | |
cur_elt = [] | |
yield elt | |
if __name__ == '__main__': | |
if os.path.exists('obj.serialized'): | |
# load a file 'obj.serialized' from disk and | |
# spool through iterable | |
with open('obj.serialized', 'r') as handle: | |
_generator = s_load(handle) | |
for element in _generator: | |
print(element) | |
else: | |
# or create it first, otherwise | |
with open('obj.serialized', 'w') as handle: | |
for i in xrange(100000): | |
s_dump_elt({'i' : i}, handle) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment