Skip to content

Instantly share code, notes, and snippets.

@pcreux
Created May 3, 2021 16:15
Show Gist options
  • Star 19 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save pcreux/b2e4a288b272fb17a36d319734fbb8ee to your computer and use it in GitHub Desktop.
Save pcreux/b2e4a288b272fb17a36d319734fbb8ee to your computer and use it in GitHub Desktop.
Generate an ERD via dbdiagram.io from a dbt project.
#!/usr/bin/env ruby
# Generate a dbdiagram for dbdiagram.io from a dbt project.
#
# Usage:
# 1. Run `dbt docs generate` first.
# 2. Run `dbt_to_dbdiagram.rb`
# 3. Paste the output in https://dbdiagram.io/
require 'yaml'
require 'json'
# Feel free to change this path to point to the schema you're interested in.
SCHEMA = YAML.load(File.read("models/analytics/schema.yml"))
CATALOG = JSON.parse(File.read("target/catalog.json"))
# Feel free to update this too with your own mappings.
TYPES = {
"text" => "text",
"date" => "date",
"character varying" => "text",
"timestamp without time zone" => "timestamp",
"integer" => "int",
"bigint" => "int",
"numeric" => "int",
"double precision" => "float",
"character varying(7)" => "text",
"boolean" => "boolean",
"text[]" => "text[]",
"character varying[]" => "text[]",
"shared_extensions.citext" => "text"
}
Table = Struct.new(:name, :columns) do
def self.build(data)
return unless data.fetch("metadata").fetch("schema") == "analytics"
table_name = data.fetch("metadata").fetch("name")
new(
table_name,
data.fetch("columns").map { |name, details| Column.build(table_name, name, details) }
)
rescue
pp data
raise
end
end
Column = Struct.new(:table_name, :name, :type, :comment, :ref) do
def self.build(table_name, name, details)
models = SCHEMA.fetch("models") + SCHEMA.fetch("seeds")
relationship = models.find { |model| model.fetch("name") == table_name }
.fetch("columns").find { |column| column.fetch("name") == name }
&.fetch("tests", [])&.find { |test| test["relationships"] }&.fetch("relationships")
ref = if relationship
# {"to"=>"ref('group_messages')", "field"=>"id"}}
[ relationship.fetch("to").split("'")[1], relationship.fetch("field") ]
end
new(
table_name,
name,
details.fetch("type"),
details.fetch("comment"),
ref
)
rescue
p table_name
p name
pp details
raise
end
end
# Here is the kind of output we want for dbdiagram.io
#
# Table order_items {
# order_id int [ref: > orders.id] // inline relationship (many-to-one)
# product_id int
# quantity int [default: 1] // default value
# }
def out(table)
out = "TABLE #{table.name} {"
columns = table.columns.map do |column|
details = [ column.name ]
details << TYPES.fetch(column.type, column.type)
if (target_table, target_column = column.ref)
details << "[ref: > #{target_table}.#{target_column}]"
end
details.join(" ")
end
columns.each { |c| out << "\n #{c}" }
out << "\n}"
end
tables = CATALOG.fetch("nodes").map { |_key, data| Table.build(data) }.compact
tables.each do |table|
puts out(table)
puts ""
end
@felipefrancisco
Copy link

Nice one mate 👍

@ciejer
Copy link

ciejer commented Jun 23, 2021

Hi @pcreux - I have an interactive catalog for dbt under development, and one of my upcoming features is in this space - were there any learnings from this that you could offer to help guide ERD design?
ciejer/tangata_local#6

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment