Skip to content

Instantly share code, notes, and snippets.

@moonhouse
Last active August 28, 2019 13:23
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save moonhouse/f4fb44babafb7aa7574eaea58fccf065 to your computer and use it in GitHub Desktop.
Save moonhouse/f4fb44babafb7aa7574eaea58fccf065 to your computer and use it in GitHub Desktop.
Convert JSON schema to BigQuery schema
import json
import sys
f = sys.argv[1]
bq_schema = []
def tree_to_schema(tree):
schema = []
properties = tree['properties']
for a in properties:
row = {'name': a}
vals = properties[a]
data_type = vals['type']
if vals['type'] == 'object':
data_type = 'record'
row['fields'] = tree_to_schema(vals)
if vals['type'] == 'array':
mode = 'repeated'
data_type = vals['items']['type']
elif a in data['required']:
mode = 'required'
else:
mode = 'nullable'
if type(data_type) is list:
data_type.remove('null')
if len(data_type) == 1:
data_type = data_type[0]
else:
raise Exception('Unhandled schema error','Multiple types')
if data_type == 'null':
# Choose another data type for a field that is always null
data_type = 'string'
row['type'] = data_type
row['mode'] = mode
schema.append(row)
return schema
with open(f) as json_file:
data = json.load(json_file)
bq_schema = tree_to_schema(data)
print(json.dumps(bq_schema))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment