Model: claude-3-5-sonnet-20240620
json_flatten.py
---
"""
flatten() and unflatten()
A pair of functions that can convert an arbitrary JSON object into a
flat name/value pair dictionary and back again, preserving type
information and handling both nested lists and nested dictionaries.
For example:
{
"this": {
"is": {
"nested": [{
"nested_dict_one": 10
}, {
"nested_dict_two": 20.5
}]
},
"other_types": {
"true": True,
"false": False,
"none": None,
}
}
}
Flattens to:
{
"this.is.nested.[0].nested_dict_one$int": "10",
"this.is.nested.[1].nested_dict_two$float": "20.5",
"this.other_types.true$bool": "True",
"this.other_types.false$bool": "False",
"this.other_types.none$none": "None",
}
"""
import re
def _object_to_rows(obj, prefix=None):
rows = []
dot_prefix = prefix and (prefix + ".") or ""
if isinstance(obj, dict):
if not obj:
rows.append(((prefix or "") + "$empty", "{}"))
else:
for key, item in obj.items():
rows.extend(_object_to_rows(item, prefix=dot_prefix + key))
elif isinstance(obj, (list, tuple)):
if len(obj) == 0:
rows.append(((prefix or "") + "$emptylist", "[]"))
for i, item in enumerate(obj):
rows.extend(_object_to_rows(item, prefix=dot_prefix + "[{}]".format(i)))
elif obj is None:
rows.append(((prefix or "") + "$none", "None"))
elif isinstance(obj, bool):
rows.append(((prefix or "") + "$bool", str(obj)))
elif isinstance(obj, int):
rows.append(((prefix or "") + "$int", str(obj)))
elif isinstance(obj, float):
rows.append(((prefix or "") + "$float", str(obj)))
else:
rows.append((prefix, str(obj)))
return rows
def flatten(obj):
if not isinstance(obj, dict):
raise TypeError("Expected dict, got {}".format(type(obj)))
return dict(_object_to_rows(obj))
_types_re = re.compile(r".*\$(none|bool|int|float|empty|emptylist)$")
_int_key_re = re.compile(r"\[(\d+)\]")
def unflatten(data):
obj = {}
for key, value in data.items():
current = obj
bits = key.split(".")
path, lastkey = bits[:-1], bits[-1]
for bit in path:
current[bit] = current.get(bit) or {}
current = current[bit]
# Now deal with $type suffixes:
if _types_re.match(lastkey):
lastkey, lasttype = lastkey.rsplit("$", 2)
value = {
"int": int,
"float": float,
"empty": lambda v: {},
"emptylist": lambda v: [],
"bool": lambda v: v.lower() == "true",
"none": lambda v: None,
}.get(lasttype, lambda v: v)(value)
current[lastkey] = value
# We handle foo.[0].one, foo.[1].two syntax in a second pass,
# by iterating through our structure looking for dictionaries
# where all of the keys are stringified integers
def replace_integer_keyed_dicts_with_lists(obj):
if isinstance(obj, dict):
if obj and all(_int_key_re.match(k) for k in obj):
return [
i[1]
for i in sorted(
[
(
int(_int_key_re.match(k).group(1)),
replace_integer_keyed_dicts_with_lists(v),
)
for k, v in obj.items()
]
)
]
else:
return dict(
(k, replace_integer_keyed_dicts_with_lists(v))
for k, v in obj.items()
)
elif isinstance(obj, list):
return [replace_integer_keyed_dicts_with_lists(v) for v in obj]
else:
return obj
obj = replace_integer_keyed_dicts_with_lists(obj)
# Handle root units only, e.g. {'$empty': '{}'}
if list(obj.keys()) == [""]:
return list(obj.values())[0]
return obj
---
setup.py
---
from setuptools import setup
import os
VERSION = "0.3"
def get_long_description():
with open(
os.path.join(os.path.dirname(os.path.abspath(__file__)), "README.md"),
encoding="utf8",
) as fp:
return fp.read()
setup(
name="json-flatten",
description="Python functions for flattening a JSON object to a single dictionary of pairs, and unflattening that dictionary back to a JSON object",
long_description=get_long_description(),
long_description_content_type="text/markdown",
author="Simon Willison",
url="https://github.com/simonw/json-flatten",
project_urls={
"Issues": "https://github.com/simonw/json-flatten/issues",
"CI": "https://github.com/simonw/json-flatten/actions",
"Changelog": "https://github.com/simonw/json-flatten/releases",
},
license="Apache License, Version 2.0",
version=VERSION,
py_modules=["json_flatten"],
extras_require={"test": ["pytest", "black"]},
tests_require=["json-flatten[test]"],
)
---
test_json_flatten.py
---
from json_flatten import flatten, unflatten
import pytest
@pytest.mark.parametrize(
"test_name,unflattened,flattened",
[
# test_name, unflattened, flattened
("simple", {"foo": "bar"}, {"foo": "bar"}),
("nested", {"foo": {"bar": "baz"}}, {"foo.bar": "baz"}),
("list_with_one_item", {"foo": ["item"]}, {"foo.[0]": "item"}),
("nested_lists", {"foo": [["item"]]}, {"foo.[0].[0]": "item"}),
(
"list",
{"foo": {"bar": ["one", "two"]}},
{"foo.bar.[0]": "one", "foo.bar.[1]": "two"},
),
("int", {"foo": 5}, {"foo$int": "5"}),
("none", {"foo": None}, {"foo$none": "None"}),
("bool_true", {"foo": True}, {"foo$bool": "True"}),
("bool_false", {"foo": False}, {"foo$bool": "False"}),
("float", {"foo": 2.5}, {"foo$float": "2.5"}),
(
"complex",
{
"this": {
"is": {
"nested": [{"nested_dict_one": 10}, {"nested_dict_two": 20.5}]
},
"other_types": {"false": False, "true": True, "none": None},
}
},
{
"this.is.nested.[0].nested_dict_one$int": "10",
"this.is.nested.[1].nested_dict_two$float": "20.5",
"this.other_types.true$bool": "True",
"this.other_types.false$bool": "False",
"this.other_types.none$none": "None",
},
),
(
"dollar_signs_that_are_not_type_indicators",
{
"foo": [
{
"emails": ["bar@example.com"],
"phones": {"_$!<home>!$_": "555-555-5555"},
}
]
},
{
"foo.[0].emails.[0]": "bar@example.com",
"foo.[0].phones._$!<home>!$_": "555-555-5555",
},
),
("empty_object", {}, {"$empty": "{}"}),
(
"nested_empty_objects",
{"nested": {"foo": {}, "bar": {}}},
{"nested.foo$empty": "{}", "nested.bar$empty": "{}"},
),
("empty_nested_list", {"empty": []}, {"empty$emptylist": "[]"}),
(
"empty_nested_list_complex",
{"foo": {"bar": []}, "nested": [[], []]},
{
"foo.bar$emptylist": "[]",
"nested.[0]$emptylist": "[]",
"nested.[1]$emptylist": "[]",
},
),
("dict_with_numeric_key", {"bob": {"24": 4}}, {"bob.24$int": "4"}),
],
)
def test_flatten_unflatten(test_name, unflattened, flattened):
actual_flattened = flatten(unflattened)
assert actual_flattened == flattened
actual_unflattened = unflatten(actual_flattened)
assert actual_unflattened == unflattened
def test_integers_with_gaps_does_not_create_sparse_array():
assert unflatten({"list.[10]": "three", "list.[5]": "two", "list.[0]": "one"}) == {
"list": ["one", "two", "three"]
}
def test_list_as_base_level_object_rejected_with_error():
with pytest.raises(TypeError):
flatten([{"name": "john"}])
---
write detailed documentation in markdown describing the format used to represent JSON and nested JSON as key/value pairs, include a table as well
Here's detailed documentation in markdown describing the format used to represent JSON and nested JSON as key/value pairs:
This document describes the format used to represent JSON and nested JSON structures as flattened key/value pairs.
- Keys are constructed using dot notation to represent nesting.
- Type information is preserved using
$type
suffixes. - List indices are represented using
[index]
notation. - Empty objects and lists have special representations.
For nested objects, keys are constructed by joining the nested keys with dots.
Example:
{
"user": {
"name": "John",
"age": 30
}
}
Flattened:
user.name: John
user.age$int: 30
List items are represented using [index]
notation.
Example:
{
"fruits": ["apple", "banana", "cherry"]
}
Flattened:
fruits.[0]: apple
fruits.[1]: banana
fruits.[2]: cherry
For nested lists, the index notation is repeated.
Example:
{
"matrix": [[1, 2], [3, 4]]
}
Flattened:
matrix.[0].[0]$int: 1
matrix.[0].[1]$int: 2
matrix.[1].[0]$int: 3
matrix.[1].[1]$int: 4
Types are preserved using $type
suffixes:
Type | Suffix | Example |
---|---|---|
Integer | $int |
age$int: 30 |
Float | $float |
price$float: 19.99 |
Boolean | $bool |
active$bool: True |
Null | $none |
data$none: None |
Empty Object | $empty |
obj$empty: {} |
Empty List | $emptylist |
list$emptylist: [] |
String values do not require a type suffix.
An empty object at the root level is represented as:
$empty: {}
An empty list is represented as:
key$emptylist: []
If the root JSON element is not an object (e.g., a list), it will be rejected with a TypeError.
JSON:
{
"user": {
"name": "Alice",
"age": 28,
"hobbies": ["reading", "swimming"],
"address": {
"street": "123 Main St",
"city": "Anytown"
},
"active": true,
"salary": 50000.50,
"spouse": null
}
}
Flattened:
user.name: Alice
user.age$int: 28
user.hobbies.[0]: reading
user.hobbies.[1]: swimming
user.address.street: 123 Main St
user.address.city: Anytown
user.active$bool: True
user.salary$float: 50000.50
user.spouse$none: None
This format allows for lossless conversion between nested JSON structures and flat key/value pairs, preserving all type information and structural relationships.
Rewrite that document to use the Python cog library to generate the examples
Certainly! Here's the rewritten documentation using Python's cog library to generate the examples:
# JSON Flattening Format
This document describes the format used to represent JSON and nested JSON structures as flattened key/value pairs.
## Basic Principles
1. Keys are constructed using dot notation to represent nesting.
2. Type information is preserved using `$type` suffixes.
3. List indices are represented using `[index]` notation.
4. Empty objects and lists have special representations.
## Key Construction
### Nested Objects
For nested objects, keys are constructed by joining the nested keys with dots.
Example:
[[[cog
import cog
from json_flatten import flatten
example = {
"user": {
"name": "John",
"age": 30
}
}
cog.out("```json\n")
cog.out(str(example))
cog.out("\n```\n")
cog.out("Flattened:\n```\n")
for key, value in flatten(example).items():
cog.out(f"{key}: {value}\n")
cog.out("```\n")
]]]
[[[end]]]
### Lists
List items are represented using `[index]` notation.
Example:
[[[cog
example = {
"fruits": ["apple", "banana", "cherry"]
}
cog.out("```json\n")
cog.out(str(example))
cog.out("\n```\n")
cog.out("Flattened:\n```\n")
for key, value in flatten(example).items():
cog.out(f"{key}: {value}\n")
cog.out("```\n")
]]]
[[[end]]]
### Nested Lists
For nested lists, the index notation is repeated.
Example:
[[[cog
example = {
"matrix": [[1, 2], [3, 4]]
}
cog.out("```json\n")
cog.out(str(example))
cog.out("\n```\n")
cog.out("Flattened:\n```\n")
for key, value in flatten(example).items():
cog.out(f"{key}: {value}\n")
cog.out("```\n")
]]]
[[[end]]]
## Type Preservation
Types are preserved using `$type` suffixes:
| Type | Suffix | Example |
|------|--------|---------|
| Integer | `$int` | `age$int: 30` |
| Float | `$float` | `price$float: 19.99` |
| Boolean | `$bool` | `active$bool: True` |
| Null | `$none` | `data$none: None` |
| Empty Object | `$empty` | `obj$empty: {}` |
| Empty List | `$emptylist` | `list$emptylist: []` |
String values do not require a type suffix.
## Special Cases
### Empty Objects
An empty object at the root level is represented as:
[[[cog
example = {}
cog.out("```\n")
for key, value in flatten(example).items():
cog.out(f"{key}: {value}\n")
cog.out("```\n")
]]]
[[[end]]]
### Empty Lists
An empty list is represented as:
[[[cog
example = {"list": []}
cog.out("```\n")
for key, value in flatten(example).items():
cog.out(f"{key}: {value}\n")
cog.out("```\n")
]]]
[[[end]]]
### Root-Level Non-Object
If the root JSON element is not an object (e.g., a list), it will be rejected with a TypeError.
## Examples
### Complex Nested Structure
JSON:
[[[cog
example = {
"user": {
"name": "Alice",
"age": 28,
"hobbies": ["reading", "swimming"],
"address": {
"street": "123 Main St",
"city": "Anytown"
},
"active": True,
"salary": 50000.50,
"spouse": None
}
}
cog.out("```json\n")
cog.out(str(example))
cog.out("\n```\n")
cog.out("\nFlattened:\n```\n")
for key, value in flatten(example).items():
cog.out(f"{key}: {value}\n")
cog.out("```\n")
]]]
[[[end]]]
This format allows for lossless conversion between nested JSON structures and flat key/value pairs, preserving all type information and structural relationships.