from airflow import DAG
from airflow.operators import BashOperator
from airflow.operators.sensors import TimeSensor
from datetime import datetime, timedelta, time
default_args = {
'owner': 'cedricm',
'depends_on_past': True,
'start_date': datetime(2016, 7, 30),
'email': [''],
'email_on_failure': True,
'email_on_retry': False,
'retries': 1,
'retry_delay': timedelta(minutes=5),
# 'queue': 'bash_queue',
# 'pool': 'backfill',
# 'priority_weight': 10,
# 'end_date': datetime(2016, 1, 1),
# DAG : Airflow
# Issue : when you want to run a daily dag with a specific schceduling time
# you will have an issue with the first day (start_date) wich will run 2 times at 00:00 and @ the specific hour
# Workaround : create a daily scheduled dag "@daily" and then use TimeSensor in upstream of the task you wanna run at a specific hour
dag = DAG('daily_firstday_twice_workaround', default_args=default_args, max_active_runs=1, concurrency=1, schedule_interval="@daily")
taskWaitHour = TimeSensor(task_id='wait_specific_hour',
task1 = BashOperator(
bash_command='echo daily_firstday_twice_workaround_task1 ! Hello World',
task2 = BashOperator(
bash_command='echo daily_firstday_twice_workaround_task2 ! Good Morning World !!!',
taskWaitHour >> task1 >> task2
