Skip to content

Instantly share code, notes, and snippets.

@nagachika
Last active March 3, 2016 09:49
Show Gist options
  • Save nagachika/cb0dd03013a33d12fe71 to your computer and use it in GitHub Desktop.
Save nagachika/cb0dd03013a33d12fe71 to your computer and use it in GitHub Desktop.
BigQuery Schema extract from JSON data
require "json"
require "yaml"
def type(value, key)
case value
when Float
"FLOAT"
when Integer
"INTEGER"
when String
"STRING"
when true, false
"BOOLEAN"
when Hash
"RECORD"
when Array
type(value.first, key)
else
raise "Unknown type for #{key}: #{t}, #{t.class}"
end
end
def mode(e)
if e.is_a? Array
"REPEATED"
else
"NULLABLE"
end
end
def traverse(target, data)
unless data.is_a?(Hash)
raise "Hash is requred but #{data.inspect}"
end
data.each do |field, value|
desc = target.find {|e| e["name"] == field}
if desc.nil?
desc = {}
target << desc
end
desc["name"] = field
desc["type"] = type(value, field)
desc["mode"] = mode(value)
if desc["type"] == "RECORD"
desc["fields"] ||= []
value = value.first if value.is_a?(Array)
traverse(desc["fields"], value)
end
end
end
fields = []
file = ARGV[0]
data = File.read(file)
traverse(fields, YAML.load(data))
def check(target)
target.each do |field|
if !(field.has_key?("name") && !field["name"].nil? &&
field.has_key?("type") && !field["type"].nil?)
STDERR.puts "Warning: #{field} has an unknown type."
field["type"] = "STRING"
field["mode"] = "NULLABLE"
end
if field["fields"]
check(field["fields"])
end
end
end
check(fields)
puts JSON.generate(fields, :pretty => true)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment