Last active
August 28, 2019 13:23
-
-
Save moonhouse/f4fb44babafb7aa7574eaea58fccf065 to your computer and use it in GitHub Desktop.
Convert JSON schema to BigQuery schema
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
import sys | |
f = sys.argv[1] | |
bq_schema = [] | |
def tree_to_schema(tree): | |
schema = [] | |
properties = tree['properties'] | |
for a in properties: | |
row = {'name': a} | |
vals = properties[a] | |
data_type = vals['type'] | |
if vals['type'] == 'object': | |
data_type = 'record' | |
row['fields'] = tree_to_schema(vals) | |
if vals['type'] == 'array': | |
mode = 'repeated' | |
data_type = vals['items']['type'] | |
elif a in data['required']: | |
mode = 'required' | |
else: | |
mode = 'nullable' | |
if type(data_type) is list: | |
data_type.remove('null') | |
if len(data_type) == 1: | |
data_type = data_type[0] | |
else: | |
raise Exception('Unhandled schema error','Multiple types') | |
if data_type == 'null': | |
# Choose another data type for a field that is always null | |
data_type = 'string' | |
row['type'] = data_type | |
row['mode'] = mode | |
schema.append(row) | |
return schema | |
with open(f) as json_file: | |
data = json.load(json_file) | |
bq_schema = tree_to_schema(data) | |
print(json.dumps(bq_schema)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment