Skip to content

Instantly share code, notes, and snippets.

@pythononwheels
Created November 14, 2018 21:05
Show Gist options
  • Save pythononwheels/fd5b6e066329fdc71721bfe46a6a1994 to your computer and use it in GitHub Desktop.
Save pythononwheels/fd5b6e066329fdc71721bfe46a6a1994 to your computer and use it in GitHub Desktop.
Convert given JSON input (file.json) to a basic cerberus schema
#
# convert json data to a cerberus schema.
# Cerberus types see here: http://docs.python-cerberus.org/en/stable/validation-rules.html#type
#
# sampledata: https://www.json-generator.com/
#
# this uses the first data element in a given json file to create
# a model(cerberus) schema from it. Trying to guess the right types (without too much effort)
#
import simplejson as json
import re
from dateutil.parser import parse
import sys
import click
from collections import OrderedDict
uuid = re.compile('[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}', re.I)
line_lenght=90
def is_date(string):
try:
parse(string)
return True
except ValueError:
return False
@click.command()
@click.option('--infile', help='json file to read')
@click.option('--start_element', default=0, help="Element to process, if json file contains a list. Default=0")
def json_to_cerberus(infile, start_element):
"""
Convert json data (infile)
to a (simple) cerberus schema.
If the json contains a list of elements you can specify the
element to inspect. Default is the first element (0)
This is meant to be a conveniance utility to take away all the initial typing
when dealing with longer json formats...
You can then easily add resl constraints to the schema afterwards.
"""
cerberus_schema = {}
# sample output schema format:
# schema = {'name': {'type': 'string'} }
print("opening json data file: {}".format(infile))
f = open(infile,"r")
# already covers bool, list, dict
try:
raw_data=f.read()
data = json.loads(raw_data)
except Exception as e:
raise e
mydata=data[start_element]
# inspect the json.
for elem in mydata:
if isinstance(mydata[elem], bool):
cerberus_schema[elem] = {"type" : "boolean" }
elif isinstance(mydata[elem], int):
cerberus_schema[elem] = {"type" : "integer" }
elif isinstance(mydata[elem], float):
cerberus_schema[elem] = {"type" : "float" }
elif isinstance(mydata[elem], list):
cerberus_schema[elem] = {"type" : "list" }
elif isinstance(mydata[elem], dict):
cerberus_schema[elem] = {"type" : "dictionary" }
elif isinstance(mydata[elem], str):
# check if sring is a date format...
if is_date(mydata[elem]):
cerberus_schema[elem] = {"type" : "datetime" }
# todo check if it is a dat (date = datetime without h:m:s:.xx)
else:
cerberus_schema[elem] = {"type" : "string" }
elif isinstance(mydata[elem], bytes) or isinstance(mydata[elem], bytearray):
cerberus_schema[elem] = {"type" : "binary" }
else:
cerberus_schema[elem] = {"type" : "string" }
print("type unknown, setting string.")
from pprint import PrettyPrinter
pp = PrettyPrinter(indent=4)
print(line_lenght*"-")
print("| json data: {}".format(infile) )
print(line_lenght*"-")
pp.pprint(mydata)
#for elem in mydata:
# print(" {:20} {}".format('"'+elem+'"', mydata[elem] ))
print(line_lenght*"-")
print("| Model schema for: {}".format(infile) )
print(line_lenght*"-")
print("schema = { ")
oschema = OrderedDict(sorted(cerberus_schema.items(), key=lambda t: t[0]))
for elem in oschema:
print(" {:20} {}".format('"'+elem+'"', oschema[elem] ))
print("}")
print(line_lenght*"-")
print("| you can copy&paste this right into your model for example." )
print(line_lenght*"-")
if __name__ == "__main__":
json_to_cerberus()
@pythononwheels
Copy link
Author

Sample data:

[
  {
    "_id": "5be471ceeb564e9699c6111d",
    "index": 0,
    "guid": "2418fd81-a66e-42e3-828d-3ab24f0b9847",
    "isActive": false,
    "balance": "$2,208.41",
    "picture": "http://placehold.it/32x32",
    "age": 20,
    "eyeColor": "brown",
    "name": "Compton Thornton",
    "gender": "male",
    "company": "TROPOLIS",
    "email": "comptonthornton@tropolis.com",
    "phone": "+1 (901) 437-3586",
    "address": "385 Gaylord Drive, Lund, Idaho, 3189",
    "about": "Duis labore exercitation et culpa qui mollit qui id id. Ut consequat qui est laboris quis sint aute fugiat id mollit. Voluptate nostrud eiusmod occaecat minim nulla. Labore occaecat ipsum est tempor reprehenderit et non. Non officia aliquip irure ex excepteur. Enim ea est reprehenderit ea nisi elit nisi do qui labore occaecat veniam Lorem. Irure elit minim fugiat ex.\r\n",
    "registered": "2017-05-21T01:27:35 -02:00",
    "latitude": -23.525615,
    "longitude": 65.972042,
    "tags": [
      "quis",
      "aute",
      "fugiat",
      "commodo",
      "duis",
      "fugiat",
      "laboris"
    ],
    "friends": [
      {
        "id": 0,
        "name": "Nannie Dean"
      },
      {
        "id": 1,
        "name": "Joanne Wooten"
      },
      {
        "id": 2,
        "name": "Michael Mason"
      }
    ],
    "greeting": "Hello, Compton Thornton! You have 4 unread messages.",
    "favoriteFruit": "banana"
  }]

Sample output:

opening json data file: sample.json
------------------------------------------------------------------------------------------
|  json data: sample.json
------------------------------------------------------------------------------------------
{   '_id': '5be471ceeb564e9699c6111d',
    'about': 'Duis labore exercitation et culpa qui mollit qui id id. Ut '
             'consequat qui est laboris quis sint aute fugiat id mollit. '
             'Voluptate nostrud eiusmod occaecat minim nulla. Labore occaecat '
             'ipsum est tempor reprehenderit et non. Non officia aliquip irure '
             'ex excepteur. Enim ea est reprehenderit ea nisi elit nisi do qui '
             'labore occaecat veniam Lorem. Irure elit minim fugiat ex.\r\n',
    'address': '385 Gaylord Drive, Lund, Idaho, 3189',
    'age': 20,
    'balance': '$2,208.41',
    'company': 'TROPOLIS',
    'email': 'comptonthornton@tropolis.com',
    'eyeColor': 'brown',
    'favoriteFruit': 'banana',
    'friends': [   {'id': 0, 'name': 'Nannie Dean'},
                   {'id': 1, 'name': 'Joanne Wooten'},
                   {'id': 2, 'name': 'Michael Mason'}],
    'gender': 'male',
    'greeting': 'Hello, Compton Thornton! You have 4 unread messages.',
    'guid': '2418fd81-a66e-42e3-828d-3ab24f0b9847',
    'index': 0,
    'isActive': False,
    'latitude': -23.525615,
    'longitude': 65.972042,
    'name': 'Compton Thornton',
    'phone': '+1 (901) 437-3586',
    'picture': 'http://placehold.it/32x32',
    'registered': '2017-05-21T01:27:35 -02:00',
    'tags': ['quis', 'aute', 'fugiat', 'commodo', 'duis', 'fugiat', 'laboris']}
------------------------------------------------------------------------------------------
|  Model schema for: sample.json
------------------------------------------------------------------------------------------
schema = {
    "_id"                {'type': 'string'}
    "about"              {'type': 'string'}
    "address"            {'type': 'string'}
    "age"                {'type': 'integer'}
    "balance"            {'type': 'string'}
    "company"            {'type': 'string'}
    "email"              {'type': 'string'}
    "eyeColor"           {'type': 'string'}
    "favoriteFruit"      {'type': 'string'}
    "friends"            {'type': 'list'}
    "gender"             {'type': 'string'}
    "greeting"           {'type': 'string'}
    "guid"               {'type': 'string'}
    "index"              {'type': 'integer'}
    "isActive"           {'type': 'boolean'}
    "latitude"           {'type': 'float'}
    "longitude"          {'type': 'float'}
    "name"               {'type': 'string'}
    "phone"              {'type': 'string'}
    "picture"            {'type': 'string'}
    "registered"         {'type': 'datetime'}
    "tags"               {'type': 'list'}
}
------------------------------------------------------------------------------------------
|   you can copy&paste this right into any  model schema
------------------------------------------------------------------------------------------

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment