Created
October 26, 2023 10:49
-
-
Save jacksmith15/482c2109dc83f156ff2a3246b82f20a2 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import json | |
import sys | |
import typing | |
from pathlib import Path | |
from types import NoneType | |
def main(): | |
directory = Path(sys.argv[1]) | |
files = directory.rglob("**/*.json") | |
schema: dict = {} | |
for file in files: | |
_infer_schema(schema, json.loads(file.read_text(encoding="utf-8"))) | |
print(json.dumps(schema, indent=2, sort_keys=True)) | |
def _infer_schema(schema: dict, document: typing.Union[dict, list, str, float, int, bool, None]) -> None: | |
document_type = { | |
str: "string", | |
float: "number", | |
int: "integer", | |
bool: "boolean", | |
NoneType: "null", | |
dict: "object", | |
list: "array", | |
}[type(document)] | |
any_of = schema.get("anyOf") | |
if any_of: | |
_infer_schema(_extract_matching_any_of(any_of, document_type), document) | |
return | |
current_type = schema.get("type") | |
if not current_type: | |
schema["type"] = document_type | |
elif current_type != document_type: | |
sub_schema: dict = {} | |
_infer_schema({}, document) | |
_replace_dict_inplace(schema, {"anyOf": [{**schema}, sub_schema]}) | |
return | |
if isinstance(document, dict): | |
for key, value in document.items(): | |
_infer_schema(schema.setdefault("properties", {}).setdefault(key, {}), value) | |
if isinstance(document, list): | |
for item in document: | |
_infer_schema(schema.setdefault("items", {}), item) | |
def _extract_matching_any_of(any_of: typing.List[dict], document_type: str) -> dict: | |
for sub_schema in any_of: | |
if sub_schema.get("type") == document_type: | |
return sub_schema | |
sub_schema = {} | |
any_of.append(sub_schema) | |
return sub_schema | |
def _replace_dict_inplace(dictionary: dict, replacement: dict) -> None: | |
for key in dictionary: | |
del dictionary[key] | |
for key, value in replacement.items(): | |
dictionary[key] = value | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment