Last active
March 3, 2016 09:49
-
-
Save nagachika/cb0dd03013a33d12fe71 to your computer and use it in GitHub Desktop.
BigQuery Schema extract from JSON data
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require "json" | |
require "yaml" | |
def type(value, key) | |
case value | |
when Float | |
"FLOAT" | |
when Integer | |
"INTEGER" | |
when String | |
"STRING" | |
when true, false | |
"BOOLEAN" | |
when Hash | |
"RECORD" | |
when Array | |
type(value.first, key) | |
else | |
raise "Unknown type for #{key}: #{t}, #{t.class}" | |
end | |
end | |
def mode(e) | |
if e.is_a? Array | |
"REPEATED" | |
else | |
"NULLABLE" | |
end | |
end | |
def traverse(target, data) | |
unless data.is_a?(Hash) | |
raise "Hash is requred but #{data.inspect}" | |
end | |
data.each do |field, value| | |
desc = target.find {|e| e["name"] == field} | |
if desc.nil? | |
desc = {} | |
target << desc | |
end | |
desc["name"] = field | |
desc["type"] = type(value, field) | |
desc["mode"] = mode(value) | |
if desc["type"] == "RECORD" | |
desc["fields"] ||= [] | |
value = value.first if value.is_a?(Array) | |
traverse(desc["fields"], value) | |
end | |
end | |
end | |
fields = [] | |
file = ARGV[0] | |
data = File.read(file) | |
traverse(fields, YAML.load(data)) | |
def check(target) | |
target.each do |field| | |
if !(field.has_key?("name") && !field["name"].nil? && | |
field.has_key?("type") && !field["type"].nil?) | |
STDERR.puts "Warning: #{field} has an unknown type." | |
field["type"] = "STRING" | |
field["mode"] = "NULLABLE" | |
end | |
if field["fields"] | |
check(field["fields"]) | |
end | |
end | |
end | |
check(fields) | |
puts JSON.generate(fields, :pretty => true) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment