achad4/simple_ml_dag.py

## simple_ml_dag.py
from airflow.models import Variable
from airflow import DAG
from airflow.operators.python_operator import PythonOperator


def train_model(lookback_days=30: int):
    """
    Trains a model using data from the past <lookback_days> and persists to a model store
    """
    ...

def score_customers(model_version=None: str, lookback_days=30: int):
    """
    Pulls <model_version> from the model store and uses it to score production data for the last <lookback_days>
    """
    ...

# This dynamic configuration mechanism is not ideal but makes for an easy demo
model_version = Variable.get("model_version")
lookback_days_train = Variable.get("lookback_days_train")
lookback_days_score = Variable.get("lookback_days_score")

with DAG(
    "unscalable_model_pipeline",
    default_args={
        'retries': 1,
        'retry_delay': timedelta(minutes=5),
    },
    description='An example of a DAG that will be difficult to scale',
    schedule_interval=timedelta(days=1),
    start_date=datetime(2022, 4, 7),
    catchup=False,
) as dag:


    train_model = PythonOperator(
        task_id="train_model",
        python_callable=train_model,
        op_kwargs={"lookback_days": lookback_days_train},
        dag=dag
    )

    score_customers = PythonOperator(
        task_id="score_customers",
        python_callable=train_model,
        op_kwargs={
            "model_version": model_version,
            "lookback_days": lookback_days_score
        },
        dag=dag
    )
    train_model >> score_customers
	from airflow.models import Variable
	from airflow import DAG
	from airflow.operators.python_operator import PythonOperator


	def train_model(lookback_days=30: int):
	"""
	Trains a model using data from the past <lookback_days> and persists to a model store
	"""
	...

	def score_customers(model_version=None: str, lookback_days=30: int):
	"""
	Pulls <model_version> from the model store and uses it to score production data for the last <lookback_days>
	"""
	...

	# This dynamic configuration mechanism is not ideal but makes for an easy demo
	model_version = Variable.get("model_version")
	lookback_days_train = Variable.get("lookback_days_train")
	lookback_days_score = Variable.get("lookback_days_score")

	with DAG(
	"unscalable_model_pipeline",
	default_args={
	'retries': 1,
	'retry_delay': timedelta(minutes=5),
	},
	description='An example of a DAG that will be difficult to scale',
	schedule_interval=timedelta(days=1),
	start_date=datetime(2022, 4, 7),
	catchup=False,
	) as dag:


	train_model = PythonOperator(
	task_id="train_model",
	python_callable=train_model,
	op_kwargs={"lookback_days": lookback_days_train},
	dag=dag
	)

	score_customers = PythonOperator(
	task_id="score_customers",
	python_callable=train_model,
	op_kwargs={
	"model_version": model_version,
	"lookback_days": lookback_days_score
	},
	dag=dag
	)
	train_model >> score_customers