Skip to content

Instantly share code, notes, and snippets.

@maxious
Last active May 22, 2019 04:39
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save maxious/a7ee3bc03b0e193af858749860c5455e to your computer and use it in GitHub Desktop.
Save maxious/a7ee3bc03b0e193af858749860c5455e to your computer and use it in GitHub Desktop.
Data Plumbing
NAME="yourname" # all lower case one word
import datetime
import pandas
from airflow import models
from airflow.operators.python_operator import PythonOperator
from airflow.contrib.operators.kubernetes_pod_operator import KubernetesPodOperator
DATA_DIR = '/home/airflow/gcs/data/'
GCS_BUCKET = 'us-east1-dta-airflow-b3415db4-bucket'
DOCKER_IMAGE = 'gcr.io/dta-ga-bigquery/galileo'
RUN_R_SCRIPT = [('gsutil cp gs://{GCS_BUCKET}/data/{NAME}.csv . && ' +
'gsutil cp gs://{GCS_BUCKET}/dags/{NAME}.R . && ' +
'R -f {NAME}.R && ' +
'gsutil cp {NAME}.png gs://{GCS_BUCKET}/data/').format(GCS_BUCKET=GCS_BUCKET,NAME=NAME) ]
def fix_my_data():
print("# TODO fix your data up here")
with models.DAG('yourname', catchup=False, schedule_interval=None, default_args={'start_date': datetime.datetime(2019, 5, 13)}) as dag:
fix_my_data = PythonOperator(task_id='fix-my-data', python_callable=fix_my_data)
# graph_my_data = KubernetesPodOperator(task_id='graph-my-data',name='graph-my-data',namespace='default', image=DOCKER_IMAGE, cmds=['bash', '-c'], arguments=RUN_R_SCRIPT)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment