Last active
November 3, 2023 08:13
-
-
Save treyhunner/f35292e676efa0be1728 to your computer and use it in GitHub Desktop.
Test performance of different dictionary merging functions in Python
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Results: | |
multiple_update: 33 ms | |
copy_and_update: 27 ms | |
dict_constructor: 29 ms | |
kwargs_hack: 33 ms | |
dict_comprehension: 33 ms | |
concatenate_items: 81 ms | |
union_items: 81 ms | |
chain_items: 66 ms | |
chainmap: 47 ms | |
dict_from_chainmap: 338 ms | |
dict_unpacking: 18 ms | |
dict_unioning: 18 ms | |
""" | |
import timeit | |
setup = """ | |
from collections import ChainMap | |
from itertools import chain | |
def multiple_update(dict_a, dict_b): | |
merged = {} | |
merged.update(dict_a) | |
merged.update(dict_b) | |
return merged | |
def copy_and_update(dict_a, dict_b): | |
merged = dict_a.copy() | |
merged.update(dict_b) | |
return merged | |
def dict_constructor(dict_a, dict_b): | |
merged = dict(dict_a) | |
merged.update(dict_b) | |
return merged | |
def kwargs_hack(dict_a, dict_b): | |
return dict(dict_a, **dict_b) | |
def dict_comprehension(dict_a, dict_b): | |
return dict(dict_a, **dict_b) | |
def concatenate_items(dict_a, dict_b): | |
return dict(list(dict_a.items()) + list(dict_b.items())) | |
def union_items(dict_a, dict_b): | |
return dict(list(dict_a.items()) + list(dict_b.items())) | |
def chain_items(dict_a, dict_b): | |
return dict(chain(dict_a.items(), dict_b.items())) | |
def chainmap(dict_a, dict_b): | |
return ChainMap({}, dict_b, dict_a) | |
def dict_from_chainmap(dict_a, dict_b): | |
return dict(ChainMap(dict_b, dict_a)) | |
def dict_unpacking(dict_a, dict_b): | |
return {**dict_a, **dict_b} | |
def dict_unioning(dict_a, dict_b): | |
return dict_a | dict_b | |
defaults = {'name': "Anonymous User", 'page_name': "Profile Page"} | |
user = {'name': "Trey", 'website': "http://treyhunner.com"} | |
""" | |
def time_function(func_name): | |
timed_code = "context = {}(defaults, user)".format(func_name).strip() | |
time = min(timeit.Timer(timed_code, setup=setup).repeat(7, 100000)) | |
print("{}: {} ms".format(func_name, int(time * 1000))) | |
time_function("multiple_update") | |
time_function("copy_and_update") | |
time_function("dict_constructor") | |
time_function("kwargs_hack") | |
time_function("dict_comprehension") | |
time_function("concatenate_items") | |
time_function("union_items") | |
time_function("chain_items") | |
time_function("chainmap") | |
time_function("dict_from_chainmap") | |
time_function("dict_unpacking") | |
time_function("dict_unioning") |
Have you encountered a problem where you need to merge dictionaries where the item of each key is a list of tuples? What I'd like to do is to have the merge process append/extend the item list for matching key items.
I poured these methods into perfplot for fun.
It is clear ChainMap is the best if we do not need a new dictionary, otherwise, we could pick any of the rest methods since they are very similar in terms of performance.
Code to reproduce the plot:
...: from collections import ChainMap
...: from itertools import chain
...: def multiple_update(a):
...: dict_a, dict_b = a
...: merged = {}
...: merged.update(dict_a)
...: merged.update(dict_b)
...: return merged
...: def copy_and_update(a):
...: dict_a, dict_b = a
...: merged = dict_a.copy()
...: merged.update(dict_b)
...: return merged
...: def dict_constructor(a):
...: dict_a, dict_b = a
...: merged = dict(dict_a)
...: merged.update(dict_b)
...: return merged
...: def kwargs_hack(a):
...: dict_a, dict_b = a
...: return dict(dict_a, **dict_b)
...: def dict_comprehension(a):
...: dict_a, dict_b = a
...: return dict(dict_a, **dict_b)
...: def concatenate_items(a):
...: dict_a, dict_b = a
...: return dict(list(dict_a.items()) + list(dict_b.items()))
...: def union_items(a):
...: dict_a, dict_b = a
...: return dict(list(dict_a.items()) + list(dict_b.items()))
...: def chain_items(a):
...: dict_a, dict_b = a
...: return dict(chain(dict_a.items(), dict_b.items()))
...: def chainmap(a):
...: dict_a, dict_b = a
...: return ChainMap({}, dict_b, dict_a)
...: def dict_from_chainmap(a):
...: dict_a, dict_b = a
...: return dict(ChainMap(dict_b, dict_a))
...: def dict_unpacking(a):
...: dict_a, dict_b = a
...: return {**dict_a, **dict_b}
...: perfplot.show(
...: setup=lambda n: [{f'{2*n}':2*n for n in range(n)}, {f"{2*n+1}":2*n+1 for n in range(n)}],
...: kernels=[
...: multiple_update,
...: copy_and_update,
...: dict_constructor,
...: kwargs_hack,
...: dict_comprehension,
...: concatenate_items,
...: union_items,
...: chain_items,
...: chainmap,
...: dict_from_chainmap,
...: dict_unpacking,
...: ],
...: labels=[
...: 'multiple_update',
...: 'copy_and_update',
...: 'dict_constructor',
...: 'kwargs_hack',
...: 'dict_comprehension',
...: 'concatenate_items',
...: 'union_items',
...: 'chain_items',
...: 'chainmap',
...: 'dict_from_chainmap',
...: 'dict_unpacking',
...: ],
...: n_range=[2 ** k for k in range(15)],
...: xlabel="len of dict",
...: equality_check=lambda x, y: dict(x) == dict(y)
...: )
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Out of curiosity, I fixed the duplicate functions and timed everything again. You can see the results here.