Last active
October 11, 2021 22:05
-
-
Save gareginordyan/6d3bffb869e8d107b5af197a60019609 to your computer and use it in GitHub Desktop.
Identify unused dbt models (materialized only) that haven't been referenced in the last 30 days
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- find all jobs with prod target name by dbt | |
with jobs_with_dbt_prod_target as ( | |
select distinct | |
job_id | |
from `region-us`.INFORMATION_SCHEMA.JOBS_BY_PROJECT | |
where query like '%"target_name": "prod"%' | |
--find all tables/views that were created (or refreshed) in the last 5 days | |
), recent_tables_in_prod_by_dbt_transform_layer as ( | |
select distinct | |
concat(destination_table.project_id, '.', destination_table.dataset_id, '.', destination_table.table_id) as pkey, | |
statement_type, | |
destination_table.project_id, | |
destination_table.dataset_id, | |
destination_table.table_id | |
from `region-us`.INFORMATION_SCHEMA.JOBS_BY_PROJECT | |
where ( | |
job_id in ( | |
select | |
job_id | |
from jobs_with_dbt_prod_target | |
) or parent_job_id in ( | |
select | |
job_id | |
from jobs_with_dbt_prod_target | |
) | |
) and date(creation_time) > date_sub(current_date, interval 5 day) /* tables that were made in the last 5 days*/ | |
and substr(destination_table.dataset_id, 0, 1) <> '_' | |
), recently_referenced_tables as ( | |
select | |
concat(referenced_tables_unnested.project_id, '.', referenced_tables_unnested.dataset_id, '.', referenced_tables_unnested.table_id) as pkey, | |
count(*) as count_references | |
from `region-us`.INFORMATION_SCHEMA.JOBS_BY_PROJECT, unnest(referenced_tables) as referenced_tables_unnested | |
where date(creation_time) > date_sub(current_date, interval 30 day) /* tables that were referenced in the last 5 days*/ | |
group by 1 | |
) | |
select | |
* | |
from recent_tables_in_prod_by_dbt_transform_layer | |
left join recently_referenced_tables using (pkey) | |
where statement_type not in ('CREATE_VIEW', 'DROP_TABLE') | |
and recent_tables_in_prod_by_dbt_transform_layer.table_id not like '%__dbt_tmp' | |
order by count_references, table_id desc |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment