Created
November 14, 2018 21:05
-
-
Save pythononwheels/fd5b6e066329fdc71721bfe46a6a1994 to your computer and use it in GitHub Desktop.
Convert given JSON input (file.json) to a basic cerberus schema
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# | |
# convert json data to a cerberus schema. | |
# Cerberus types see here: http://docs.python-cerberus.org/en/stable/validation-rules.html#type | |
# | |
# sampledata: https://www.json-generator.com/ | |
# | |
# this uses the first data element in a given json file to create | |
# a model(cerberus) schema from it. Trying to guess the right types (without too much effort) | |
# | |
import simplejson as json | |
import re | |
from dateutil.parser import parse | |
import sys | |
import click | |
from collections import OrderedDict | |
uuid = re.compile('[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}', re.I) | |
line_lenght=90 | |
def is_date(string): | |
try: | |
parse(string) | |
return True | |
except ValueError: | |
return False | |
@click.command() | |
@click.option('--infile', help='json file to read') | |
@click.option('--start_element', default=0, help="Element to process, if json file contains a list. Default=0") | |
def json_to_cerberus(infile, start_element): | |
""" | |
Convert json data (infile) | |
to a (simple) cerberus schema. | |
If the json contains a list of elements you can specify the | |
element to inspect. Default is the first element (0) | |
This is meant to be a conveniance utility to take away all the initial typing | |
when dealing with longer json formats... | |
You can then easily add resl constraints to the schema afterwards. | |
""" | |
cerberus_schema = {} | |
# sample output schema format: | |
# schema = {'name': {'type': 'string'} } | |
print("opening json data file: {}".format(infile)) | |
f = open(infile,"r") | |
# already covers bool, list, dict | |
try: | |
raw_data=f.read() | |
data = json.loads(raw_data) | |
except Exception as e: | |
raise e | |
mydata=data[start_element] | |
# inspect the json. | |
for elem in mydata: | |
if isinstance(mydata[elem], bool): | |
cerberus_schema[elem] = {"type" : "boolean" } | |
elif isinstance(mydata[elem], int): | |
cerberus_schema[elem] = {"type" : "integer" } | |
elif isinstance(mydata[elem], float): | |
cerberus_schema[elem] = {"type" : "float" } | |
elif isinstance(mydata[elem], list): | |
cerberus_schema[elem] = {"type" : "list" } | |
elif isinstance(mydata[elem], dict): | |
cerberus_schema[elem] = {"type" : "dictionary" } | |
elif isinstance(mydata[elem], str): | |
# check if sring is a date format... | |
if is_date(mydata[elem]): | |
cerberus_schema[elem] = {"type" : "datetime" } | |
# todo check if it is a dat (date = datetime without h:m:s:.xx) | |
else: | |
cerberus_schema[elem] = {"type" : "string" } | |
elif isinstance(mydata[elem], bytes) or isinstance(mydata[elem], bytearray): | |
cerberus_schema[elem] = {"type" : "binary" } | |
else: | |
cerberus_schema[elem] = {"type" : "string" } | |
print("type unknown, setting string.") | |
from pprint import PrettyPrinter | |
pp = PrettyPrinter(indent=4) | |
print(line_lenght*"-") | |
print("| json data: {}".format(infile) ) | |
print(line_lenght*"-") | |
pp.pprint(mydata) | |
#for elem in mydata: | |
# print(" {:20} {}".format('"'+elem+'"', mydata[elem] )) | |
print(line_lenght*"-") | |
print("| Model schema for: {}".format(infile) ) | |
print(line_lenght*"-") | |
print("schema = { ") | |
oschema = OrderedDict(sorted(cerberus_schema.items(), key=lambda t: t[0])) | |
for elem in oschema: | |
print(" {:20} {}".format('"'+elem+'"', oschema[elem] )) | |
print("}") | |
print(line_lenght*"-") | |
print("| you can copy&paste this right into your model for example." ) | |
print(line_lenght*"-") | |
if __name__ == "__main__": | |
json_to_cerberus() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Sample data:
Sample output: