Skip to content

Instantly share code, notes, and snippets.

@reizist
Created February 1, 2021 13:15
Show Gist options
  • Save reizist/346ec9b37e347c602ba1fcb89dc94f74 to your computer and use it in GitHub Desktop.
Save reizist/346ec9b37e347c602ba1fcb89dc94f74 to your computer and use it in GitHub Desktop.
Output bigquery schema from TD
# TODO:
# 1. gem install td-client
# 2. Set TD_API_KEY, TD_DATABASE
# 3. Prepare table_list.txt including table name list
# 4. Execute like this: ruby generate-schema.rb table_list.txt
require 'td-client'
DB = ENV["TD_DATABASE"]
TYPE_TABLE = {
"boolean" => "BOOLEAN",
"tinyint" => "INTEGER",
"integer" => "INTEGER",
"bigint" => "INTEGER",
"long" => "INTEGER",
"double" => "FLOAT",
"string" => "STRING",
"timestamp" => "STRING"
}
def get_bq_type(type)
TYPE_TABLE[type] || raise("detect unknown column type: #{type}. Add type to TYPE_TABLE.")
end
def generate_string_from_columns(columns)
JSON.pretty_generate(columns)
end
def execute
apikey = ENV['TD_API_KEY']
cli = TreasureData::Client.new(apikey)
columns = []
begin
table_list = File.open(ARGV[0]).readlines.map(&:chomp)
rescue => e
puts "Just specify valid table list file.\n\n #{e}"
end
table_list.each do |table|
puts "print table: #{table}\n\n"
if table.include? "_masked"
puts "Skip #{table} because it has sensitive data.\n\n"
next
end
begin
schema = cli.table(DB, table).schema
rescue => e
puts "Encountered error while loadign the schema.\n#{e}\n\n"
next
end
columns = []
schema.fields.each do |field|
columns << { "name" => field.name, "type" => get_bq_type(field.type) }
end
if ENV['FILE']
table_name = ENV['TRIM'] ? table.gsub(ENV['TRIM'], "") : table
File.open("#{table_name}.json", mode: "w") do |f|
f.puts generate_string_from_columns(columns)
end
else
puts generate_string_from_columns(columns)
end
end
end
execute
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment