bencharb/flatten.py

## flatten.py
DEFAULT_DELIM = '.'
NOPREFIX_PREFIX = '__NOPREFIX__'

import itertools
import collections
def is_non_string_sequence(seq):
    return isinstance(seq, collections.Sequence) and not isinstance(seq, basestring)
def is_dict(d):
    return isinstance(d, collections.Mapping) or hasattr(d, 'iteritems') and not isinstance(d, basestring)


def _object_to_rows(obj, prefix=None, delim=DEFAULT_DELIM):
    rows = []
    dot_prefix = (prefix and (prefix + delim) or '')
    if is_dict(obj):
        for key, item in obj.iteritems():
            rows.extend(_object_to_rows(item, prefix=dot_prefix + key, delim=delim))
    elif is_non_string_sequence(obj):
        for i, item in enumerate(obj):
            rows.extend(_object_to_rows(item, prefix=dot_prefix + str(i), delim=delim))
    else:
        rows.append(((prefix or ''), obj))
    return rows

def flatten(obj, delim=DEFAULT_DELIM):
    rows = _object_to_rows(obj, delim=delim)
    return dict(rows)


def unflatten(data, delim=DEFAULT_DELIM):
    obj = {}
    for key, value in data.iteritems():
        current = obj
        bits = key.split(delim)
        path, lastkey = bits[:-1], bits[-1]
        for bit in path:
            current[bit] = current.get(bit) or {}
            current = current[bit]
        # Now deal with $type suffixes:
        if '$' in lastkey:
            lastkey, lasttype = lastkey.rsplit('$', 2)
            value = {
                'int': int,
                'float': float,
                'bool': lambda v: v.lower() == 'true',
                'none': lambda v: None
            }.get(lasttype, lambda v: v)(value)
        current[lastkey] = value

    # We handle foo.0.one, foo.1.two syntax in a second pass,
    # by iterating through our structure looking for dictionaries
    # where all of the keys are stringified integers
    def replace_integer_keyed_dicts_with_lists(d):
        for key, value in d.iteritems():
            if isinstance(value, dict):
                if all(k.isdigit() for k in value):
                    d[key] = [i[1] for i in sorted([
                        (int(k), value[k]) for k in value
                    ])]
                else:
                    replace_integer_keyed_dicts_with_lists(value)

    replace_integer_keyed_dicts_with_lists(obj)
    return obj


test_examples = [
    # test_name, unflattened, flattened
    ('simple', {
        'foo': 'bar'
    }, {
        'foo': 'bar'
    }),
    ('nested', {
        'foo': {
            'bar': 'baz'
        }
    }, {
        'foo.bar': 'baz'
    }),
    ('list', {
        'foo': {
            'bar': ['one', 'two']
        }
    }, {
        'foo.bar.0': 'one',
        'foo.bar.1': 'two'
    }),
    ('int', {
        'foo': 5
    }, {
        'foo$int': '5'
    }),
    ('none', {
        'foo': None,
    }, {
        'foo$none': 'None',
    }),
    ('bool_true', {
        'foo': True,
    }, {
        'foo$bool': 'True'
    }),
    ('bool_false', {
        'foo': False
    }, {
        'foo$bool': 'False'
    }),
    ('float', {
        'foo': 2.5
    }, {
        'foo$float': '2.5'
    }),
    ('complex', {
        'this': {
            'is': {
                'nested': [{
                    'nested_dict_one': 10
                }, {
                    'nested_dict_two': 20.5
                }]
            },
            'other_types': {
                'false': False,
                'true': True,
                'none': None
            }
        }
    }, {
        'this.is.nested.0.nested_dict_one$int': '10',
        'this.is.nested.1.nested_dict_two$float': '20.5',
        'this.other_types.true$bool': 'True',
        'this.other_types.false$bool': 'False',
        'this.other_types.none$none': 'None'
    }),
]

# Dynamically construct the TestCase, to ensure each flatten/unflatten
# method has the correct name (so test failures will be displayed nicely)
import unittest
class FlattenUnflattenTests(unittest.TestCase):
    def test_integers_with_gaps_does_not_create_sparse_array(self):
        # This test doesn't round-trip, so it can't be created using
        # the _make_test_pair function
        self.assertEqual(unflatten({
            'list.10': 'three',
            'list.5': 'two',
            'list.0': 'one',
        }), {
            'list': ['one', 'two', 'three']
        })

def _make_test_pair(test_name, unflattened, flattened):
    def test_flatten(self):
        self.assertEqual(flatten(unflattened), flattened)
    def test_unflatten(self):
        self.assertEqual(unflatten(flattened), unflattened)
    return test_flatten, test_unflatten

for test_name, unflattened, flattened in test_examples:
    test_flatten, test_unflatten = _make_test_pair(test_name, unflattened, flattened)
    test_flatten.__name__ = 'test_flatten_%s' % test_name
    test_unflatten.__name__ = 'test_unflatten_%s' % test_name
    setattr(FlattenUnflattenTests, test_flatten.__name__, test_flatten)
    setattr(FlattenUnflattenTests, test_unflatten.__name__, test_unflatten)


if __name__ == '__main__':
    unittest.main()
	DEFAULT_DELIM = '.'
	NOPREFIX_PREFIX = '__NOPREFIX__'

	import itertools
	import collections
	def is_non_string_sequence(seq):
	return isinstance(seq, collections.Sequence) and not isinstance(seq, basestring)
	def is_dict(d):
	return isinstance(d, collections.Mapping) or hasattr(d, 'iteritems') and not isinstance(d, basestring)


	def _object_to_rows(obj, prefix=None, delim=DEFAULT_DELIM):
	rows = []
	dot_prefix = (prefix and (prefix + delim) or '')
	if is_dict(obj):
	for key, item in obj.iteritems():
	rows.extend(_object_to_rows(item, prefix=dot_prefix + key, delim=delim))
	elif is_non_string_sequence(obj):
	for i, item in enumerate(obj):
	rows.extend(_object_to_rows(item, prefix=dot_prefix + str(i), delim=delim))
	else:
	rows.append(((prefix or ''), obj))
	return rows

	def flatten(obj, delim=DEFAULT_DELIM):
	rows = _object_to_rows(obj, delim=delim)
	return dict(rows)


	def unflatten(data, delim=DEFAULT_DELIM):
	obj = {}
	for key, value in data.iteritems():
	current = obj
	bits = key.split(delim)
	path, lastkey = bits[:-1], bits[-1]
	for bit in path:
	current[bit] = current.get(bit) or {}
	current = current[bit]
	# Now deal with $type suffixes:
	if '$' in lastkey:
	lastkey, lasttype = lastkey.rsplit('$', 2)
	value = {
	'int': int,
	'float': float,
	'bool': lambda v: v.lower() == 'true',
	'none': lambda v: None
	}.get(lasttype, lambda v: v)(value)
	current[lastkey] = value

	# We handle foo.0.one, foo.1.two syntax in a second pass,
	# by iterating through our structure looking for dictionaries
	# where all of the keys are stringified integers
	def replace_integer_keyed_dicts_with_lists(d):
	for key, value in d.iteritems():
	if isinstance(value, dict):
	if all(k.isdigit() for k in value):
	d[key] = [i[1] for i in sorted([
	(int(k), value[k]) for k in value
	])]
	else:
	replace_integer_keyed_dicts_with_lists(value)

	replace_integer_keyed_dicts_with_lists(obj)
	return obj


	test_examples = [
	# test_name, unflattened, flattened
	('simple', {
	'foo': 'bar'
	}, {
	'foo': 'bar'
	}),
	('nested', {
	'foo': {
	'bar': 'baz'
	}
	}, {
	'foo.bar': 'baz'
	}),
	('list', {
	'foo': {
	'bar': ['one', 'two']
	}
	}, {
	'foo.bar.0': 'one',
	'foo.bar.1': 'two'
	}),
	('int', {
	'foo': 5
	}, {
	'foo$int': '5'
	}),
	('none', {
	'foo': None,
	}, {
	'foo$none': 'None',
	}),
	('bool_true', {
	'foo': True,
	}, {
	'foo$bool': 'True'
	}),
	('bool_false', {
	'foo': False
	}, {
	'foo$bool': 'False'
	}),
	('float', {
	'foo': 2.5
	}, {
	'foo$float': '2.5'
	}),
	('complex', {
	'this': {
	'is': {
	'nested': [{
	'nested_dict_one': 10
	}, {
	'nested_dict_two': 20.5
	}]
	},
	'other_types': {
	'false': False,
	'true': True,
	'none': None
	}
	}
	}, {
	'this.is.nested.0.nested_dict_one$int': '10',
	'this.is.nested.1.nested_dict_two$float': '20.5',
	'this.other_types.true$bool': 'True',
	'this.other_types.false$bool': 'False',
	'this.other_types.none$none': 'None'
	}),
	]

	# Dynamically construct the TestCase, to ensure each flatten/unflatten
	# method has the correct name (so test failures will be displayed nicely)
	import unittest
	class FlattenUnflattenTests(unittest.TestCase):
	def test_integers_with_gaps_does_not_create_sparse_array(self):
	# This test doesn't round-trip, so it can't be created using
	# the _make_test_pair function
	self.assertEqual(unflatten({
	'list.10': 'three',
	'list.5': 'two',
	'list.0': 'one',
	}), {
	'list': ['one', 'two', 'three']
	})

	def _make_test_pair(test_name, unflattened, flattened):
	def test_flatten(self):
	self.assertEqual(flatten(unflattened), flattened)
	def test_unflatten(self):
	self.assertEqual(unflatten(flattened), unflattened)
	return test_flatten, test_unflatten

	for test_name, unflattened, flattened in test_examples:
	test_flatten, test_unflatten = _make_test_pair(test_name, unflattened, flattened)
	test_flatten.__name__ = 'test_flatten_%s' % test_name
	test_unflatten.__name__ = 'test_unflatten_%s' % test_name
	setattr(FlattenUnflattenTests, test_flatten.__name__, test_flatten)
	setattr(FlattenUnflattenTests, test_unflatten.__name__, test_unflatten)


	if __name__ == '__main__':
	unittest.main()