Skip to content

Instantly share code, notes, and snippets.

@boxysean
Created October 17, 2022 11:15
Show Gist options
  • Save boxysean/c1e0cb6735f6bbbb422cb06a14c3cd92 to your computer and use it in GitHub Desktop.
Save boxysean/c1e0cb6735f6bbbb422cb06a14c3cd92 to your computer and use it in GitHub Desktop.
Imitating Zero-Copy Clones in Redshift using dbt commands
{% macro create_proxy_views(models) %}
{% set prod_schema = "analytics" %} -- This is the schema name of our production schema
{% if prod_schema == target.schema %}
{% do log("This macro shouldn't be run on the production target. Exiting without actions.", info=True) %}
{% else %}
{% for model in models %}
{% set relation_identifier = model["alias"] %}
{% set relation_sub_schema = model["config"]["schema"] %}
{% set relation_schema = target.schema + "_" + relation_sub_schema if relation_sub_schema else target.schema %}
{% set relation_prod_schema = prod_schema + "_" + relation_sub_schema if relation_sub_schema else prod_schema %}
{# checks if the relation already exists and drops it #}
{% set existing_relation = adapter.get_relation(
database=target.dbname,
schema=relation_schema,
identifier=relation_identifier) %}
{% if existing_relation %}
{% do log("Dropping " ~ existing_relation, info=True) %}
{% do adapter.drop_relation(existing_relation) %}
{% endif %}
{# creates the schema and proxy view #}
{% set proxy_view = api.Relation.create(
database=target.dbname,
schema=relation_schema,
identifier=relation_identifier) %}
{% set prod_relation = adapter.get_relation(
database=target.dbname,
schema=relation_prod_schema,
identifier=relation_identifier) %}
{% if prod_relation %}
{% do adapter.create_schema(proxy_view) %}
{% set sql %}
create view {{ proxy_view }} as select * from {{ prod_relation }};
-- with no schema binding;
{% endset %}
{% do log("Creating proxy view " ~ proxy_view ~ " pointing to " ~ prod_relation, info=True) %}
{% do run_query(sql) %}
{% else %}
{% do log("There's no equivalent model to " ~ proxy_view ~ " found in production.", info=True) %}
{% do log("Maybe it's ephemeral or haven't been run yet. You could try to create with a `dbt run`", info=True) %}
{% endif %}
{% endfor %}
{% do log("The proxy views were created", info=True) %}
{% endif %}
{% endmacro %}
version: 2
macros:
- name: create_proxy_views
description: '{{ doc("create_proxy_views") }}'
arguments:
- name: models
type: json string
description: the json string produced from `dbt ls --output json` and filtered by `jq`
- name: production_schema
type: string
description: >-
"The name of the production schema used.
`WARNING`: this macro should never run in production!"
{#
Build a list of upstream model names that can be used in Stephan
Durry's create_proxy_views() macro for Fake Zero-Copy Clones in
Redshift.
TODO: Turn this into a proper run-operation macro.
#}
{# first build the graph #}
{% set depends_on = {} %}
{% for node_name, node_value in graph.nodes.items() -%}
{% do depends_on.update({node_name: node_value['depends_on']['nodes']}) %}
{%- endfor %}
{# then breadth first search to add all upstream nodes #}
{% set start_node = 'model.tpch.fct_orders' %}
{% macro bfs(start_node, depends_on) -%}
{% set queue = [start_node] %}
{% set upstream_nodes = [] %}
{% for _ in range(1, 10000) %} {# Emulate a `while True` loop #}
{% if queue|length == 0 %}
{{ return(upstream_nodes) }}
{% endif %}
{% set node_name = queue.pop() %}
{% for upstream_node_name in depends_on.get(node_name, []) %}
{% if upstream_node_name.startswith('model.') and upstream_node_name not in upstream_nodes and upstream_node_name not in queue %}
{% do upstream_nodes.append(upstream_node_name) %}
{% do queue.append(upstream_node_name) %}
{% endif %}
{% endfor %}
{% endfor %}
{%- endmacro %}
{% set upstream_nodes = bfs(start_node, depends_on) %}
{# then spit out the dictionary format needed for create_proxy_views() from Stephan #}
{% set input_list = [] %}
{% for node_name in upstream_nodes -%}
{% do input_list.append(
{
'config': graph.nodes[node_name]['config'],
'alias': graph.nodes[node_name]['alias']
}
) %}
{%- endfor %}
{# Ready to use in create_proxy_views() #}
{{ input_list | tojson }}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment