Skip to content

Instantly share code, notes, and snippets.

@treyhunner
Last active November 3, 2023 08:13
Show Gist options
  • Save treyhunner/f35292e676efa0be1728 to your computer and use it in GitHub Desktop.
Save treyhunner/f35292e676efa0be1728 to your computer and use it in GitHub Desktop.
Test performance of different dictionary merging functions in Python
"""
Results:
multiple_update: 33 ms
copy_and_update: 27 ms
dict_constructor: 29 ms
kwargs_hack: 33 ms
dict_comprehension: 33 ms
concatenate_items: 81 ms
union_items: 81 ms
chain_items: 66 ms
chainmap: 47 ms
dict_from_chainmap: 338 ms
dict_unpacking: 18 ms
dict_unioning: 18 ms
"""
import timeit
setup = """
from collections import ChainMap
from itertools import chain
def multiple_update(dict_a, dict_b):
merged = {}
merged.update(dict_a)
merged.update(dict_b)
return merged
def copy_and_update(dict_a, dict_b):
merged = dict_a.copy()
merged.update(dict_b)
return merged
def dict_constructor(dict_a, dict_b):
merged = dict(dict_a)
merged.update(dict_b)
return merged
def kwargs_hack(dict_a, dict_b):
return dict(dict_a, **dict_b)
def dict_comprehension(dict_a, dict_b):
return dict(dict_a, **dict_b)
def concatenate_items(dict_a, dict_b):
return dict(list(dict_a.items()) + list(dict_b.items()))
def union_items(dict_a, dict_b):
return dict(list(dict_a.items()) + list(dict_b.items()))
def chain_items(dict_a, dict_b):
return dict(chain(dict_a.items(), dict_b.items()))
def chainmap(dict_a, dict_b):
return ChainMap({}, dict_b, dict_a)
def dict_from_chainmap(dict_a, dict_b):
return dict(ChainMap(dict_b, dict_a))
def dict_unpacking(dict_a, dict_b):
return {**dict_a, **dict_b}
def dict_unioning(dict_a, dict_b):
return dict_a | dict_b
defaults = {'name': "Anonymous User", 'page_name': "Profile Page"}
user = {'name': "Trey", 'website': "http://treyhunner.com"}
"""
def time_function(func_name):
timed_code = "context = {}(defaults, user)".format(func_name).strip()
time = min(timeit.Timer(timed_code, setup=setup).repeat(7, 100000))
print("{}: {} ms".format(func_name, int(time * 1000)))
time_function("multiple_update")
time_function("copy_and_update")
time_function("dict_constructor")
time_function("kwargs_hack")
time_function("dict_comprehension")
time_function("concatenate_items")
time_function("union_items")
time_function("chain_items")
time_function("chainmap")
time_function("dict_from_chainmap")
time_function("dict_unpacking")
time_function("dict_unioning")
@bnorick
Copy link

bnorick commented Feb 23, 2016

Out of curiosity, I fixed the duplicate functions and timed everything again. You can see the results here.

@aikimark
Copy link

Have you encountered a problem where you need to merge dictionaries where the item of each key is a list of tuples? What I'd like to do is to have the merge process append/extend the item list for matching key items.

@xzpjerry
Copy link

I poured these methods into perfplot for fun.
performance dict unpacking

It is clear ChainMap is the best if we do not need a new dictionary, otherwise, we could pick any of the rest methods since they are very similar in terms of performance.

Code to reproduce the plot:

    ...: from collections import ChainMap
    ...: from itertools import chain
    ...: def multiple_update(a):
    ...:     dict_a, dict_b = a
    ...:     merged = {}
    ...:     merged.update(dict_a)
    ...:     merged.update(dict_b)
    ...:     return merged
    ...: def copy_and_update(a):
    ...:     dict_a, dict_b = a
    ...:     merged = dict_a.copy()
    ...:     merged.update(dict_b)
    ...:     return merged
    ...: def dict_constructor(a):
    ...:     dict_a, dict_b = a
    ...:     merged = dict(dict_a)
    ...:     merged.update(dict_b)
    ...:     return merged
    ...: def kwargs_hack(a):
    ...:     dict_a, dict_b = a
    ...:     return dict(dict_a, **dict_b)
    ...: def dict_comprehension(a):
    ...:     dict_a, dict_b = a
    ...:     return dict(dict_a, **dict_b)
    ...: def concatenate_items(a):
    ...:     dict_a, dict_b = a
    ...:     return dict(list(dict_a.items()) + list(dict_b.items()))
    ...: def union_items(a):
    ...:     dict_a, dict_b = a
    ...:     return dict(list(dict_a.items()) + list(dict_b.items()))
    ...: def chain_items(a):
    ...:     dict_a, dict_b = a
    ...:     return dict(chain(dict_a.items(), dict_b.items()))
    ...: def chainmap(a):
    ...:     dict_a, dict_b = a
    ...:     return ChainMap({}, dict_b, dict_a)
    ...: def dict_from_chainmap(a):
    ...:     dict_a, dict_b = a
    ...:     return dict(ChainMap(dict_b, dict_a))
    ...: def dict_unpacking(a):
    ...:     dict_a, dict_b = a
    ...:     return {**dict_a, **dict_b}
    ...: perfplot.show(
    ...:     setup=lambda n: [{f'{2*n}':2*n for n in range(n)}, {f"{2*n+1}":2*n+1 for n in range(n)}],
    ...:     kernels=[
    ...:         multiple_update,
    ...: copy_and_update,
    ...: dict_constructor,
    ...: kwargs_hack,
    ...: dict_comprehension,
    ...: concatenate_items,
    ...: union_items,
    ...: chain_items,
    ...: chainmap,
    ...: dict_from_chainmap,
    ...: dict_unpacking,
    ...:     ],
    ...:     labels=[
    ...:     'multiple_update',
    ...: 'copy_and_update',
    ...: 'dict_constructor',
    ...: 'kwargs_hack',
    ...: 'dict_comprehension',
    ...: 'concatenate_items',
    ...: 'union_items',
    ...: 'chain_items',
    ...: 'chainmap',
    ...: 'dict_from_chainmap',
    ...: 'dict_unpacking',
    ...:     ],
    ...:     n_range=[2 ** k for k in range(15)],
    ...:     xlabel="len of dict",
    ...:     equality_check=lambda x, y: dict(x) == dict(y)
    ...:     )

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment