Last active January 28, 2021 09:15
Example of dbt scheduling with Airflow
# How to schedule dbt runs with Airflow using gocardless/airflow-dbt
# Folder structure is as such:
# /app
# /airflow
# /dags
# /
# /dbt
# /config
# /profiles.yml
# /models
# /web
# /
from airflow import DAG
from airflow_dbt.operators.dbt_operator import DbtRunOperator
default_args = {
'owner': 'airflow',
'depends_on_past': False,
'start_date': '2020-01-01',
'email': [''],
'email_on_failure': False,
'email_on_retry': False,
'retries': 1,
'retry_delay': timedelta(minutes=5),
'dbt_dir': dbt_path
dbt_path = dbt_path = os.environ['DBT_PATH'] # /app/dbt
# Use a cron schedule in `schedule_interval`, here it will run every 20 minutes
dag = DAG(dag_id='update_models', default_args=default_args, schedule_interval='*/20 * * * *', catchup=False)
# DbtRunOperator from gocardless/airflow-dbt
# This will run the /app/dbt/models/web/ dbt model
dbt_run = DbtRunOperator(
profiles_dir=dbt_path + '/config', # The folder where is profiles.yml (/app/dbt/config)
# Airflow dependency graph (with a single task here)
