-
-
Save bencharb/6953d231d18619b9899a to your computer and use it in GitHub Desktop.
flatten()/unflatten()
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
DEFAULT_DELIM = '.' | |
NOPREFIX_PREFIX = '__NOPREFIX__' | |
import itertools | |
import collections | |
def is_non_string_sequence(seq): | |
return isinstance(seq, collections.Sequence) and not isinstance(seq, basestring) | |
def is_dict(d): | |
return isinstance(d, collections.Mapping) or hasattr(d, 'iteritems') and not isinstance(d, basestring) | |
def _object_to_rows(obj, prefix=None, delim=DEFAULT_DELIM): | |
rows = [] | |
dot_prefix = (prefix and (prefix + delim) or '') | |
if is_dict(obj): | |
for key, item in obj.iteritems(): | |
rows.extend(_object_to_rows(item, prefix=dot_prefix + key, delim=delim)) | |
elif is_non_string_sequence(obj): | |
for i, item in enumerate(obj): | |
rows.extend(_object_to_rows(item, prefix=dot_prefix + str(i), delim=delim)) | |
else: | |
rows.append(((prefix or ''), obj)) | |
return rows | |
def flatten(obj, delim=DEFAULT_DELIM): | |
rows = _object_to_rows(obj, delim=delim) | |
return dict(rows) | |
def unflatten(data, delim=DEFAULT_DELIM): | |
obj = {} | |
for key, value in data.iteritems(): | |
current = obj | |
bits = key.split(delim) | |
path, lastkey = bits[:-1], bits[-1] | |
for bit in path: | |
current[bit] = current.get(bit) or {} | |
current = current[bit] | |
# Now deal with $type suffixes: | |
if '$' in lastkey: | |
lastkey, lasttype = lastkey.rsplit('$', 2) | |
value = { | |
'int': int, | |
'float': float, | |
'bool': lambda v: v.lower() == 'true', | |
'none': lambda v: None | |
}.get(lasttype, lambda v: v)(value) | |
current[lastkey] = value | |
# We handle foo.0.one, foo.1.two syntax in a second pass, | |
# by iterating through our structure looking for dictionaries | |
# where all of the keys are stringified integers | |
def replace_integer_keyed_dicts_with_lists(d): | |
for key, value in d.iteritems(): | |
if isinstance(value, dict): | |
if all(k.isdigit() for k in value): | |
d[key] = [i[1] for i in sorted([ | |
(int(k), value[k]) for k in value | |
])] | |
else: | |
replace_integer_keyed_dicts_with_lists(value) | |
replace_integer_keyed_dicts_with_lists(obj) | |
return obj | |
test_examples = [ | |
# test_name, unflattened, flattened | |
('simple', { | |
'foo': 'bar' | |
}, { | |
'foo': 'bar' | |
}), | |
('nested', { | |
'foo': { | |
'bar': 'baz' | |
} | |
}, { | |
'foo.bar': 'baz' | |
}), | |
('list', { | |
'foo': { | |
'bar': ['one', 'two'] | |
} | |
}, { | |
'foo.bar.0': 'one', | |
'foo.bar.1': 'two' | |
}), | |
('int', { | |
'foo': 5 | |
}, { | |
'foo$int': '5' | |
}), | |
('none', { | |
'foo': None, | |
}, { | |
'foo$none': 'None', | |
}), | |
('bool_true', { | |
'foo': True, | |
}, { | |
'foo$bool': 'True' | |
}), | |
('bool_false', { | |
'foo': False | |
}, { | |
'foo$bool': 'False' | |
}), | |
('float', { | |
'foo': 2.5 | |
}, { | |
'foo$float': '2.5' | |
}), | |
('complex', { | |
'this': { | |
'is': { | |
'nested': [{ | |
'nested_dict_one': 10 | |
}, { | |
'nested_dict_two': 20.5 | |
}] | |
}, | |
'other_types': { | |
'false': False, | |
'true': True, | |
'none': None | |
} | |
} | |
}, { | |
'this.is.nested.0.nested_dict_one$int': '10', | |
'this.is.nested.1.nested_dict_two$float': '20.5', | |
'this.other_types.true$bool': 'True', | |
'this.other_types.false$bool': 'False', | |
'this.other_types.none$none': 'None' | |
}), | |
] | |
# Dynamically construct the TestCase, to ensure each flatten/unflatten | |
# method has the correct name (so test failures will be displayed nicely) | |
import unittest | |
class FlattenUnflattenTests(unittest.TestCase): | |
def test_integers_with_gaps_does_not_create_sparse_array(self): | |
# This test doesn't round-trip, so it can't be created using | |
# the _make_test_pair function | |
self.assertEqual(unflatten({ | |
'list.10': 'three', | |
'list.5': 'two', | |
'list.0': 'one', | |
}), { | |
'list': ['one', 'two', 'three'] | |
}) | |
def _make_test_pair(test_name, unflattened, flattened): | |
def test_flatten(self): | |
self.assertEqual(flatten(unflattened), flattened) | |
def test_unflatten(self): | |
self.assertEqual(unflatten(flattened), unflattened) | |
return test_flatten, test_unflatten | |
for test_name, unflattened, flattened in test_examples: | |
test_flatten, test_unflatten = _make_test_pair(test_name, unflattened, flattened) | |
test_flatten.__name__ = 'test_flatten_%s' % test_name | |
test_unflatten.__name__ = 'test_unflatten_%s' % test_name | |
setattr(FlattenUnflattenTests, test_flatten.__name__, test_flatten) | |
setattr(FlattenUnflattenTests, test_unflatten.__name__, test_unflatten) | |
if __name__ == '__main__': | |
unittest.main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment