Skip to content

Instantly share code, notes, and snippets.

@cimentadaj
Created November 18, 2018 19:15
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save cimentadaj/6d82e27afe8b843817b0a65021a7b91f to your computer and use it in GitHub Desktop.
Save cimentadaj/6d82e27afe8b843817b0a65021a7b91f to your computer and use it in GitHub Desktop.
https://airflow.readthedocs.io/en/latest/start.html
sudo apt-get install python3-pip
sudo apt-get install postgresql postgresql-contrib
sudo -u postgres createuser --interactive
name: airflow
superuser: yes
# Create airflow user ubuntu because you need it to be have the same name
# as the database to enter postgres
sudo adduser airflow
# Type everything as airflow
sudo -u airflow createdb airflow
# If it enters, then everything is ok
sudo -u airflow psql -d airflow
# Should show the database airflow
\l
# Create schema and assign all privileges on the schema
CREATE SCHEMA airflow;
GRANT CONNECT ON DATABASE airflow TO airflow;
GRANT USAGE ON SCHEMA airflow TO airflow;
GRANT ALL PRIVILEGES ON ALL TABLES IN SCHEMA airflow TO airflow;
GRANT ALL PRIVILEGES ON ALL SEQUENCES IN SCHEMA airflow TO airflow;
GRANT ALL PRIVILEGES ON ALL TABLES IN SCHEMA airflow TO airflow;
GRANT ALL PRIVILEGES ON ALL SEQUENCES IN SCHEMA airflow TO airflow;
ALTER DEFAULT PRIVILEGES IN SCHEMA airflow
GRANT ALL PRIVILEGES ON TABLES TO airflow;
ALTER DEFAULT PRIVILEGES IN SCHEMA airflow
GRANT ALL PRIVILEGES ON SEQUENCES TO airflow;
## End postgres
\q
# Quit
\conninfo
# Create python environment
pip install virtualenv
cd airflow/
virtualenv airflow
mv airflow/ airflow_env
cd ..
# Activate environment
source ~/airflow/airflow_env/bin/activate
# install airflow
## Assign folder for airflow
export AIRFLOW_HOME=~/airflow
# Exclude GPL license dependencies (crashes)
export AIRFLOW_GPL_UNIDECODE=yes
pip3 install apache-airflow[postgres,s3,celery,rabbitmq] --user
pip3 install cryptography
# Load airflow database
airflow initdb
# The previous created a database in SQLite and we want to use postgres
# We need to change the file airflow.cfg in the airflow home folder
nano airflow/airflow.cfg
# search for the keyword sql_alchemy_conn and replace with
postgresql+psycopg2://airflow:airflow@localhost:5432/airflow
# This will allow psycopg2 (python library) to access the database to create the tables
# airflow:airflow are the user and password
# Run airflow initdb again
airflow initdb
# Check that the tables were indeed created
sudo -u airflow psql -d airflow
\dt
# All tables should be there
# Install backend for celery executor
sudo apt-get -y install redis-server
sudo sed -i "s|bind |#bind |" /etc/redis/redis.conf;
sudo sed -i "s|protected-mode yes|protected-mode no|" /etc/redis/redis.conf;
sudo sed -i "s|supervised no|supervised systemd|" /etc/redis/redis.conf;
service redis restart
# Install python libraries
pip3 install celery redis --user
export AIRFLOW_CFG=./airflow/airflow.cfg
ip4addr="$(ip route get 8.8.8.8 | awk '{print $NF; exit}')"
# Change some additional parameters from the airflow.cfg file
sudo sed -i "s|broker_url = .*|broker_url = redis://localhost:6379/0|g" "$AIRFLOW_CFG"
sudo sed -i "s|celery_result_backend = .*|celery_result_backend = redis://localhost:6379/0|g" "$AIRFLOW_CFG"
sudo sed -i "s|executor = .*|executor = CeleryExecutor|g" "$AIRFLOW_CFG"
sudo sed -i "s|load_examples = .*|load_examples = False|g" "$AIRFLOW_CFG"
sudo sed -i "s|web_server_host = .*|web_server_host = $ip4addr|g" "$AIRFLOW_CFG"
sudo sed -i "s|flower_host = .*|flower_host = $ip4addr|g" "$AIRFLOW_CFG"
sudo sed -i "s|localhost|$ip4addr|" "$AIRFLOW_CFG"
# Create three different screen to launch the three airflow processes
screen -S airflow_scheduler
airflow scheduler
screen -S airflow_worker
airflow worker
screen -S airflow_webserver
mkdir tmp
export TMPDIR=`pwd`/tmp
airflow webserver
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment