Skip to content

Instantly share code, notes, and snippets.

@tomplex
Created May 12, 2022 20:22
Show Gist options
  • Save tomplex/25f5a3b7623101f91fde051138d85fff to your computer and use it in GitHub Desktop.
Save tomplex/25f5a3b7623101f91fde051138d85fff to your computer and use it in GitHub Desktop.
flatten dict, keeping the deepest or shallowest instance of each key
import json
from typing import Any, Iterator, Tuple, Union
def _deserializable(obj: Any) -> bool:
try:
o = json.loads(obj)
assert isinstance(o, Union[list, dict])
return True
except:
return False
def _iteritems(obj: dict, _level: int = 0) -> Iterator[Tuple[str, str, int]]:
"""
Recursively traverse obj, returning each key/value pair within,
incrementing the _level each time.
will accept arbitrarily nested lists and dicts.
"""
if isinstance(obj, str):
try:
yield from _iteritems(json.loads(obj), _level=_level + 1)
except:
pass
if isinstance(obj, dict):
for key, value in obj.items():
if not isinstance(value, Union[dict, list]) and not _deserializable(value):
yield key, value, _level
# no-op for non-recursable objects
yield from _iteritems(value, _level=_level + 1)
elif isinstance(obj, list):
for item in obj:
yield from _iteritems(item, _level=_level + 1)
def flatten_keeping_shallowest(obj: dict) -> dict:
"""
Flatten the given dictionary, keeping the "shallowest" instance of each key.
we're going to use an implementation detail of python, where if you create a dict with
duplicate keys, it keeps the last one you use.
>>> d = {k:v for k,v in [(1, 2), (1, 3)]}
>>> d
{1: 3}
"""
return {
key: value for (key, value, _) in sorted(_iteritems(obj), key=lambda i: i[2], reverse=True)
}
def flatten_keeping_deepest(obj: dict) -> dict:
"""
Flatten the given dictionary, keeping the "deepest" instance of each key.
we're going to use an implementation detail of python, where if you create a dict with
duplicate keys, it keeps the last one you use.
>>> d = {k:v for k,v in [(1, 2), (1, 3)]}
>>> d
{1: 3}
"""
return {
key: value for (key, value, _) in sorted(_iteritems(obj), key=lambda i: i[2],)
}
if __name__ == '__main__':
test = {
"top": {
"a": 1,
"b": 2,
"next": {
"f": 23
}
},
"another": {
"c": 3,
"deeper": {
"d": 4
}
},
"nested": {
"array": [
{"d": 3},
{"a": "2"}
]
},
"more": {
"json": '{"e": 6}'
}
}
print(list(_iteritems(test)))
print(flatten_keeping_shallowest(test))
print(flatten_keeping_deepest(test))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment