Skip to content

Instantly share code, notes, and snippets.

@kaxil
Created October 23, 2019 22:43
Show Gist options
  • Save kaxil/dc0ab939b5eae92a128e6150609d1ac9 to your computer and use it in GitHub Desktop.
Save kaxil/dc0ab939b5eae92a128e6150609d1ac9 to your computer and use it in GitHub Desktop.
Generates an Airflow dag with 2048 tasks and 699050 dependencies between those tasks
# Author: Bas Harenslak
import datetime as dt
import airflow.utils.dates
from airflow.models import DAG
from airflow.operators.dummy_operator import DummyOperator
from airflow.utils.helpers import cross_downstream
dag = DAG(dag_id="megadag", schedule_interval=dt.timedelta(hours=4), start_date=airflow.utils.dates.days_ago(1))
layers = 10
ntasks = 0
ndeps = 0
previous_layer = None
for layer_n in range(layers + 1):
ntasks = 2 ** layer_n
current_layer = [DummyOperator(task_id=f"task{layer_n}{task_n}", dag=dag) for task_n in range(ntasks)]
ntasks += len(current_layer)
if previous_layer:
cross_downstream(previous_layer, current_layer)
ndeps += len(previous_layer) * len(current_layer)
previous_layer = current_layer
print(ntasks)
print(ndeps)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment