Skip to content

Instantly share code, notes, and snippets.

@Klortho
Created May 7, 2016 05:19
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Klortho/7d83975559bdcc47ac64fd7d877934f6 to your computer and use it in GitHub Desktop.
Save Klortho/7d83975559bdcc47ac64fd7d877934f6 to your computer and use it in GitHub Desktop.
Merge a list of Python dict's (or any mapping), recursively
#!/usr/bin/env python
import itertools
from itertools import chain
import operator
import sys
import pprint
from collections import abc
pp = pprint.PrettyPrinter(indent=4)
# This class builds upon the Chainmap class described here:
# http://code.activestate.com/recipes/305268/.
# It provides an elegant way to merge multiple hierarchical dictionaries or
# other mapping-type objects in Python.
is_mapping = lambda x: isinstance(x, abc.Mapping)
not_mapping = lambda x: not(is_mapping(x))
class ChainMapTree(abc.Mapping):
"""Combine/overlay multiple hierarchical mappings. This efficiently merges
multiple hierarchical (could be several layers deep) dictionaries, producing
a new view into them that acts exactly like a merged dictionary, but without
doing any copying.
Because it doesn't actually copy the data, it is intended to be used only
with immutable mappings. It is safe to change *leaf* data values,
and the results will be reflected here, but changing the structure of any
of the trees will not work.
"""
def __init__(self, *maps):
_maps = list(maps)
# All keys of kids that are mappings
kid_keys = set([key for m in maps
for key in m.keys() if is_mapping(m[key])])
# This will be a dictionary of lists of mappings
kid_maps = {};
for key in kid_keys:
# The list of child mappings for this key
kmaps = [ m[key] for m in maps if key in m ]
# Make sure they are *all* mappings
if any(map(not_mapping, kmaps)): raise KeyError(key)
# Recurse
kid_maps[key] = ChainMapTree(*kmaps)
# If non-empty, prepend it to the existing list
if len(kid_maps.keys()) > 0: _maps.insert(0, kid_maps)
self._maps = _maps
def __getitem__(self, key):
for mapping in self._maps:
try:
return mapping[key]
except KeyError:
pass
raise KeyError(key)
def __iter__(self):
return self._maps.__iter__()
def __len__(self):
return self._maps.__len__()
if __name__ == "__main__":
d1 = {'a':1, 'b':2, 'c': {'c1': 1, 'c2': 2}}
d2 = {'a':3, 'd':4, 'c': {'c2': 4, 'c3': 3}}
cm = ChainMapTree(d1, d2)
assert cm['a'] == 1
assert cm['b'] == 2
assert cm['d'] == 4
try:
print(cm['f'])
except KeyError:
pass
else:
raise Exception('Did not raise KeyError for missing key')
assert cm['c']['c1'] == 1
assert cm['c']['c2'] == 2
assert cm['c']['c3'] == 3
assert 'a' in cm and 'b' in cm and 'd' in cm
assert cm.get('a', 10) == 1
assert cm.get('b', 20) == 2
assert cm.get('d', 30) == 4
assert cm.get('f', 40) == 40
testDicts = [
{
'a': 1001,
'b': {
'ba': {
'baa': 1211
},
'bb': 1022,
},
'c': 1003,
},
{
'a': 2001,
'e': {
'ea': 2051
}
},
{
'a': 3001,
'b': {
'ba': {
'baa': 3211,
'bab': 3212,
},
'bb': 3022
},
'd': {
'da': 3041
}
},
]
cmt = ChainMapTree(*testDicts)
assert cmt['a'] == 1001
assert cmt['b']['ba']['baa'] == 1211
assert cmt['b']['ba']['bab'] == 3212
assert cmt['b']['bb'] == 1022
assert cmt['c'] == 1003
assert cmt['d']['da'] == 3041
assert cmt['e']['ea'] == 2051
print('ok')
@thorwhalen
Copy link

In case it's useful to anyone: A doc test with different data.

>>> base1 = {
...     'a1': 'base1.a1',
...     'a2': 'base1.a2',
...     'a3': {
...         'b1': 'base1.a3.b1',
...         'b2': 'base1.a3.b2',
...     },
... }
>>> base2 = {
...     'a2': 'base2.a2',
...     'a3': {
...         'b2': 'base2.a3.b2',
...         'b4': 'base2.a3.b4',
...     },
...     'a4': 'base2.a4',
... }
>>>
>>> cm = ChainMapTree(base1, base2)
>>> cm['a1']
'base1.a1'
>>> cm['a2']
'base1.a2'
>>> cm['a4']
'base2.a4'
>>> cm['a3']
ChainMapTree({'b1': 'base1.a3.b1', 'b2': 'base1.a3.b2'}, {'b2': 'base2.a3.b2', 'b4': 'base2.a3.b4'})
>>> cm['a3']['b1']
'base1.a3.b1'
>>> cm['a3']['b4']
'base2.a3.b4'
>>> cm = ChainMapTree(base2, base1)
>>> cm['a1']
'base1.a1'
>>> cm['a2']
'base2.a2'
>>> cm['a4']
'base2.a4'
>>> cm['a3']
ChainMapTree({'b2': 'base2.a3.b2', 'b4': 'base2.a3.b4'}, {'b1': 'base1.a3.b1', 'b2': 'base1.a3.b2'})
>>> cm['a3']['b2']
'base2.a3.b2'
>>> cm['a3']['b1']
'base1.a3.b1'
>>>
>>> # Let's do a ChainMapTree with THREE bases now!
>>> base3 = {
...     'a2': 'base3.a2',
...     'a3': {
...         'b2': 'base3.a3.b2',
...         'b4': 'base3.a3.b4',
...     },
...     'a4': 'base3.a4',
... }
>>> cm = ChainMapTree(base3, base2, base1)
>>> cm['a2']  # will get it from base3
'base3.a2'
>>> cm['a3']['b2']  # will get it from base3 (not base2)
'base3.a3.b2'
>>> cm['a3']['b1']  # will get it from base1 (since no one else has it)
'base1.a3.b1'

@thorwhalen
Copy link

I found adding this method was useful:

    def to_dict(self):
        """Convert to dict

        >>> a = {'a': {'x': 1, 'z': 3}, 'foo': "a's foo"}
        >>> b = {'a': {'y': 222, 'z': 333}, 'foo': "b's foo"}
        >>> cm = ChainMapTree(a, b)
        >>> # It acts like a dict when you ask for items, but is not a dict. If you want a dict, do this:
        >>> cm.to_dict()
        {'a': {'x': 1, 'z': 3, 'y': 222}, 'foo': "a's foo"}
        >>> # Compare to normal/flat/not-nested chaining:
        >>> dict(a, **b)   # Note the precedence is the inverse of ChainMapTree!
        {'a': {'y': 222, 'z': 333}, 'foo': "b's foo"}
        >>>
        >>> # See what you get if you specify b before a
        >>> ChainMapTree(b, a).to_dict()
        {'a': {'y': 222, 'z': 333, 'x': 1}, 'foo': "b's foo"}
        >>> # Compare to normal/flat/not-nested chaining:
        >>> dict(b, **a)  # Note the precedence is the inverse of ChainMapTree!
        {'a': {'x': 1, 'z': 3}, 'foo': "a's foo"}
        """
        d = dict()
        for k in self:
            v = self[k]
            if not isinstance(v, ChainMapTree):
                d[k] = v
            else:
                d[k] = v.to_dict()
        return d

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment