Skip to content

Instantly share code, notes, and snippets.

@jtalmi
Last active October 5, 2022 16:55
Show Gist options
  • Save jtalmi/d72973461bd8d37d32bcdc9dcc865d8d to your computer and use it in GitHub Desktop.
Save jtalmi/d72973461bd8d37d32bcdc9dcc865d8d to your computer and use it in GitHub Desktop.
A script to generate a list of dbt models using changed macros relative to a git branch
#!/usr/bin/env python3
'''
Script to detect models downstream of changed macros, relative to a git branch.
Usage:
$ python3 models_using_changed_macros.py --branch master --children --manifest_path /path/to/manifest.json
'''
import os
import sys
import json
import argparse
import subprocess
from typing import List, Dict
def _find_changed_macros(branch: str) -> List:
''' Returns changed dbt macros relative to a git branch'''
list_of_changed_files = subprocess.check_output(["git", "diff", branch, "--name-only"]).split()
list_of_changed_files = [file.decode('utf-8') for file in list_of_changed_files]
list_of_changed_macros = [os.path.splitext(model)[0] for model in list_of_changed_files if model.startswith('macros/') and model.endswith('.sql')] # pylint: disable=line-too-long
list_of_remaining_changed_macros = [os.path.basename(macro) for macro in list_of_changed_macros if os.path.exists("{}/{}.sql".format(os.getcwd(), macro))] # pylint: disable=line-too-long
return list_of_remaining_changed_macros
def _fetch_manifest(manifest_path: str):
try:
with open(manifest_path) as f:
manifest = json.load(f)
return manifest
except IOError:
raise Exception("Could not find manifest file in %s", manifest_path)
def _fetch_macro_child_map(manifest: Dict) -> List:
macro_child_map = {}
for resource, v in manifest['nodes'].items():
if v['resource_type'] == 'model' and v.get('depends_on', {}).get('macros'):
for macro in v["depends_on"]["macros"]:
macro_name = macro.split(".")[-1]
macro_child_map.setdefault(macro_name, [])
macro_child_map[macro_name].append(resource)
return macro_child_map
def main(branch: str, children: bool, manifest_path: str) -> None:
'''Return changed macros'''
list_of_changed_models = []
list_of_changed_macros = _find_changed_macros(branch)
if not list_of_changed_macros:
return ""
manifest = _fetch_manifest(manifest_path)
models = {}
macro_child_map = _fetch_macro_child_map(manifest)
for macro in list_of_changed_macros:
list_of_changed_models += macro_child_map.get(macro, [])
list_of_changed_models = [model_name.split('.')[2] for model_name in list(set(list_of_changed_models))]
if children:
list_of_changed_models = [model + '+' for model in list_of_changed_models]
if not list_of_changed_models:
return ""
return " ".join(list_of_changed_models)
if __name__ == "__main__":
parser = argparse.ArgumentParser(add_help=False)
parser.add_argument("--branch", default='origin/master', type=str)
parser.add_argument("--children", default=True, action="store_true")
parser.add_argument("--manifest_path", default='./target/manifest.json', type=str)
parsed_args = parser.parse_args()
try:
models = main(parsed_args.branch, parsed_args.children, parsed_args.manifest_path)
print(models)
except Exception as e:
raise Exception('Changed macro script failed: %s', e)
@jtalmi
Copy link
Author

jtalmi commented Mar 16, 2021

Example usage in CI:

- MODELS_USING_MODIFIED_MACROS=$(python scripts/models_using_changed_macros.py 2>&1)
- dbt run -m state:modified+ $MODELS_USING_MODIFIED_MACROS --state tmp/ --defer
- dbt test -m state:modified+ $MODELS_USING_MODIFIED_MACROS --state tmp/ --defer

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment