-
-
Save boxysean/c1e0cb6735f6bbbb422cb06a14c3cd92 to your computer and use it in GitHub Desktop.
Imitating Zero-Copy Clones in Redshift using dbt commands
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{% macro create_proxy_views(models) %} | |
{% set prod_schema = "analytics" %} -- This is the schema name of our production schema | |
{% if prod_schema == target.schema %} | |
{% do log("This macro shouldn't be run on the production target. Exiting without actions.", info=True) %} | |
{% else %} | |
{% for model in models %} | |
{% set relation_identifier = model["alias"] %} | |
{% set relation_sub_schema = model["config"]["schema"] %} | |
{% set relation_schema = target.schema + "_" + relation_sub_schema if relation_sub_schema else target.schema %} | |
{% set relation_prod_schema = prod_schema + "_" + relation_sub_schema if relation_sub_schema else prod_schema %} | |
{# checks if the relation already exists and drops it #} | |
{% set existing_relation = adapter.get_relation( | |
database=target.dbname, | |
schema=relation_schema, | |
identifier=relation_identifier) %} | |
{% if existing_relation %} | |
{% do log("Dropping " ~ existing_relation, info=True) %} | |
{% do adapter.drop_relation(existing_relation) %} | |
{% endif %} | |
{# creates the schema and proxy view #} | |
{% set proxy_view = api.Relation.create( | |
database=target.dbname, | |
schema=relation_schema, | |
identifier=relation_identifier) %} | |
{% set prod_relation = adapter.get_relation( | |
database=target.dbname, | |
schema=relation_prod_schema, | |
identifier=relation_identifier) %} | |
{% if prod_relation %} | |
{% do adapter.create_schema(proxy_view) %} | |
{% set sql %} | |
create view {{ proxy_view }} as select * from {{ prod_relation }}; | |
-- with no schema binding; | |
{% endset %} | |
{% do log("Creating proxy view " ~ proxy_view ~ " pointing to " ~ prod_relation, info=True) %} | |
{% do run_query(sql) %} | |
{% else %} | |
{% do log("There's no equivalent model to " ~ proxy_view ~ " found in production.", info=True) %} | |
{% do log("Maybe it's ephemeral or haven't been run yet. You could try to create with a `dbt run`", info=True) %} | |
{% endif %} | |
{% endfor %} | |
{% do log("The proxy views were created", info=True) %} | |
{% endif %} | |
{% endmacro %} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
version: 2 | |
macros: | |
- name: create_proxy_views | |
description: '{{ doc("create_proxy_views") }}' | |
arguments: | |
- name: models | |
type: json string | |
description: the json string produced from `dbt ls --output json` and filtered by `jq` | |
- name: production_schema | |
type: string | |
description: >- | |
"The name of the production schema used. | |
`WARNING`: this macro should never run in production!" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{# | |
Build a list of upstream model names that can be used in Stephan | |
Durry's create_proxy_views() macro for Fake Zero-Copy Clones in | |
Redshift. | |
TODO: Turn this into a proper run-operation macro. | |
#} | |
{# first build the graph #} | |
{% set depends_on = {} %} | |
{% for node_name, node_value in graph.nodes.items() -%} | |
{% do depends_on.update({node_name: node_value['depends_on']['nodes']}) %} | |
{%- endfor %} | |
{# then breadth first search to add all upstream nodes #} | |
{% set start_node = 'model.tpch.fct_orders' %} | |
{% macro bfs(start_node, depends_on) -%} | |
{% set queue = [start_node] %} | |
{% set upstream_nodes = [] %} | |
{% for _ in range(1, 10000) %} {# Emulate a `while True` loop #} | |
{% if queue|length == 0 %} | |
{{ return(upstream_nodes) }} | |
{% endif %} | |
{% set node_name = queue.pop() %} | |
{% for upstream_node_name in depends_on.get(node_name, []) %} | |
{% if upstream_node_name.startswith('model.') and upstream_node_name not in upstream_nodes and upstream_node_name not in queue %} | |
{% do upstream_nodes.append(upstream_node_name) %} | |
{% do queue.append(upstream_node_name) %} | |
{% endif %} | |
{% endfor %} | |
{% endfor %} | |
{%- endmacro %} | |
{% set upstream_nodes = bfs(start_node, depends_on) %} | |
{# then spit out the dictionary format needed for create_proxy_views() from Stephan #} | |
{% set input_list = [] %} | |
{% for node_name in upstream_nodes -%} | |
{% do input_list.append( | |
{ | |
'config': graph.nodes[node_name]['config'], | |
'alias': graph.nodes[node_name]['alias'] | |
} | |
) %} | |
{%- endfor %} | |
{# Ready to use in create_proxy_views() #} | |
{{ input_list | tojson }} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment