Skip to content

Instantly share code, notes, and snippets.

@JoshRosen
Created August 18, 2012 03:39
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save JoshRosen/3384191 to your computer and use it in GitHub Desktop.
Save JoshRosen/3384191 to your computer and use it in GitHub Desktop.
Flattening data structures that contain Pickled objects
"""
Say that we're storing pickled Python objects in a complicated Java data
structure, which might contain nested scala.Tuple2s and java.util.Lists.
We may be able to create a pickled representation of the Java data structure
based on the pickled objects that it contains, allowing the Python consumer to
deserialized the nested object in a single unpickle call.
This file contains a prototype of this idea. It has only been tested with
Pickle protocol version 2.
"""
from pickle import EMPTY_TUPLE, TUPLE, TUPLE1, TUPLE2, TUPLE3, STOP, \
EMPTY_LIST, MARK, APPENDS, PROTO
try:
import cPickle as pickle
except ImportError:
import pickle
PICKLE_VER = 2
PROTO2 = PROTO + chr(PICKLE_VER)
def strip_pickle(pickled):
"""
>>> pickle.dumps(1, PICKLE_VER)
'\\x80\\x02K\\x01.'
>>> strip_pickle(pickle.dumps(1, PICKLE_VER))
'K\\x01'
"""
# Strip out the PROTO_2 from the start of the pickle and the STOP from the
# end.
assert pickled[:2] == PROTO2
assert pickled[-1] == STOP
return pickled[2:-1]
def flatten_pickle(x):
"""
>>> one = pickle.dumps(1, PICKLE_VER)
>>> tup = pickle.dumps((2, 3), PICKLE_VER)
>>> pickle.loads(flatten_pickle((one, tup)))
(1, (2, 3))
>>> pickle.loads(flatten_pickle([one, one]))
[1, 1]
>>> pickle.loads(flatten_pickle([one]))
[1]
>>> pickle.loads(flatten_pickle(()))
()
>>> pickle.loads(flatten_pickle((one,)))
(1,)
>>> pickle.loads(flatten_pickle((one, one)))
(1, 1)
>>> pickle.loads(flatten_pickle((one, one, one)))
(1, 1, 1)
>>> pickle.loads(flatten_pickle((one, one, one, one)))
(1, 1, 1, 1)
>>> hello = pickle.dumps("hello", PICKLE_VER)
>>> world = pickle.dumps("world", PICKLE_VER)
>>> pickle.loads(flatten_pickle((hello, world)))
('hello', 'world')
>>> pickle.loads(flatten_pickle([one, tup, (tup, tup), one]))
[1, (2, 3), ((2, 3), (2, 3)), 1]
>>> obj = pickle.dumps(object(), PICKLE_VER)
>>> [type(x) for x in pickle.loads(flatten_pickle([obj, obj]))]
[<type 'object'>, <type 'object'>]
"""
return PROTO2 + _flatten_pickle(x) + STOP
def _flatten_pickle(x):
if isinstance(x, tuple):
l = len(x)
if l == 0:
return EMPTY_TUPLE
elif l == 1:
return _flatten_pickle(x[0]) + TUPLE1
elif l == 2:
return ''.join(_flatten_pickle(y) for y in x) + TUPLE2
elif l == 3:
return ''.join(_flatten_pickle(y) for y in x) + TUPLE3
else:
ops = []
ops.append(MARK)
ops.extend(_flatten_pickle(y) for y in x)
ops.append(TUPLE)
return ''.join(ops)
elif isinstance(x, list):
ops = []
ops.append(EMPTY_LIST)
ops.append(MARK)
ops.extend(_flatten_pickle(y) for y in x)
ops.append(APPENDS)
return ''.join(ops)
elif isinstance(x, str):
return strip_pickle(x)
else:
raise ValueError(
"_flatten_pickle(): unsupported type %s" % str(type(x)))
def _test():
import doctest
doctest.testmod()
if __name__ == "__main__":
_test()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment