Skip to content

Instantly share code, notes, and snippets.

@gareginordyan
Last active October 11, 2021 22:05
Show Gist options
  • Save gareginordyan/6d3bffb869e8d107b5af197a60019609 to your computer and use it in GitHub Desktop.
Save gareginordyan/6d3bffb869e8d107b5af197a60019609 to your computer and use it in GitHub Desktop.
Identify unused dbt models (materialized only) that haven't been referenced in the last 30 days
-- find all jobs with prod target name by dbt
with jobs_with_dbt_prod_target as (
select distinct
job_id
from `region-us`.INFORMATION_SCHEMA.JOBS_BY_PROJECT
where query like '%"target_name": "prod"%'
--find all tables/views that were created (or refreshed) in the last 5 days
), recent_tables_in_prod_by_dbt_transform_layer as (
select distinct
concat(destination_table.project_id, '.', destination_table.dataset_id, '.', destination_table.table_id) as pkey,
statement_type,
destination_table.project_id,
destination_table.dataset_id,
destination_table.table_id
from `region-us`.INFORMATION_SCHEMA.JOBS_BY_PROJECT
where (
job_id in (
select
job_id
from jobs_with_dbt_prod_target
) or parent_job_id in (
select
job_id
from jobs_with_dbt_prod_target
)
) and date(creation_time) > date_sub(current_date, interval 5 day) /* tables that were made in the last 5 days*/
and substr(destination_table.dataset_id, 0, 1) <> '_'
), recently_referenced_tables as (
select
concat(referenced_tables_unnested.project_id, '.', referenced_tables_unnested.dataset_id, '.', referenced_tables_unnested.table_id) as pkey,
count(*) as count_references
from `region-us`.INFORMATION_SCHEMA.JOBS_BY_PROJECT, unnest(referenced_tables) as referenced_tables_unnested
where date(creation_time) > date_sub(current_date, interval 30 day) /* tables that were referenced in the last 5 days*/
group by 1
)
select
*
from recent_tables_in_prod_by_dbt_transform_layer
left join recently_referenced_tables using (pkey)
where statement_type not in ('CREATE_VIEW', 'DROP_TABLE')
and recent_tables_in_prod_by_dbt_transform_layer.table_id not like '%__dbt_tmp'
order by count_references, table_id desc
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment