Skip to content

Instantly share code, notes, and snippets.

@seahrh
Created March 23, 2018 08:31
Show Gist options
  • Save seahrh/57be255c1e6da30e9c726886e87cc4b2 to your computer and use it in GitHub Desktop.
Save seahrh/57be255c1e6da30e9c726886e87cc4b2 to your computer and use it in GitHub Desktop.
from datetime import datetime, timedelta
from airflow import DAG
from airflow.operators.bash_operator import BashOperator
default_args = {
'owner': 'myowner',
'depends_on_past': False,
'start_date': datetime(year=2017, month=10, day=18, hour=0, minute=0),
'email': ['me@example.com'],
'email_on_failure': True,
'email_on_retry': False,
'retries': 0,
'retry_delay': timedelta(minutes=10),
'execution_timeout': timedelta(hours=2)
}
params = {
'script': './run.sh',
'jar': '/path/to/my.jar',
'class': 'com.example.MyApp',
'memory_overhead': '1024',
'executor_cores': '4',
'num_executors': '4',
'driver_memory': '2g',
'executor_memory': '15g',
'master': 'yarn',
'deploy_mode': 'client',
'queue': 'myqueue',
'log_config_file': '/path/to/log4j.properties',
'spark_home': '/path/to/spark2-client/bin',
'proxy_user': 'myproxyuser',
'spark_port_max_retries': 64
}
schedule_interval = '10 0 * * * ' # Daily at 0010Hrs SGT
dag_id = 'my_dag_id'
dag = DAG(
dag_id,
default_args=default_args,
schedule_interval=schedule_interval,
)
globals()[dag_id] = dag
ventures = ['']
for v in ventures:
params['venture'] = v
task = BashOperator(
task_id=dag_id + '_' + v + '_task',
bash_command=params['script'],
params=params,
dag=dag
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment