Skip to content

Instantly share code, notes, and snippets.

@cronosnull
Last active June 26, 2018 22:10
Show Gist options
  • Save cronosnull/0051c5ce23c23d20ee0dfac683833b56 to your computer and use it in GitHub Desktop.
Save cronosnull/0051c5ce23c23d20ee0dfac683833b56 to your computer and use it in GitHub Desktop.
Airflow Celery Install
#!/bin/bash
USUARIO_SO="$(whoami)"
ANACONDA_URL="https://repo.anaconda.com/archive/Anaconda3-5.2.0-Linux-x86_64.sh"
_DB_PASSWORD="la contraseña"
_IP=$(hostname -I | cut -d' ' -f1)
while getopts "a:p:h" opt; do
case $opt in
a) ANACONDA_URL="$OPTARG";;
p) _DB_PASSWORD="$OPTARG";;
h) cat <<EOF
All arguments are optional
-a anaconda url
-p password for airflow postgres user
-h this help
EOF
exit 0;
;;
\?) echo "Invalid option -$OPTARG" >&2
;;
esac
done
echo "Installation will be performed as $USUARIO_SO"
if [[ $(id -u) -eq 0 ]] ; then echo "This script must not be excecuted as root or using sudo(althougth the user must be sudoer and password will be asked in some steps)" ; exit 1 ; fi
#Prerequisites installation:
while sudo fuser /var/{lib/{dpkg,apt/lists},cache/apt/archives}/lock >/dev/null 2>&1; do
echo "Waiting while other process ends installs (dpkg/lock is locked)"
sleep 1
done
sudo apt update && sudo apt upgrade -y
sudo apt install -y openssh-server git wget htop postgresql postgresql-client postgresql-contrib redis-server rabbitmq-server
if ! hash conda &> /dev/null; then
mkdir -p ~/instaladores && wget -c -P "$HOME/instaladores" "$ANACONDA_URL"
bash "$HOME/instaladores/${ANACONDA_URL##*/}" -b -p "$HOME/anaconda2"
export PATH="$HOME/anaconda2/bin:$PATH"
echo "export PATH='$HOME/anaconda2/bin:$PATH'">>"$HOME/.bashrc"
fi
conda install -y psycopg2 redis-py
conda install -y -c conda-forge "airflow<1.9" "celery<4" flower
if [[ -z "${AIRFLOW_HOME}" ]]; then
export AIRFLOW_HOME="$HOME/airflow"
echo "export AIRFLOW_HOME='$HOME/airflow'" >>"$HOME/.bashrc"
fi
airflow initdb
sudo -u postgres createdb airflow
sudo -u postgres createuser airflow
sudo -u postgres psql airflow -c "alter user airflow with encrypted password '$_DB_PASSWORD';"
sudo -u postgres psql airflow -c "grant all privileges on database airflow to airflow;"
#Configurar postgresql para que admita conexiones remotas
_HBA=$(sudo -u postgres psql -t -P format=unaligned -c 'show hba_file')
_CONFIG=$(sudo -u postgres psql -t -P format=unaligned -c 'show config_file')
mkdir -p "$HOME/pg_backup"
sudo cp "$_HBA" "$HOME/pg_backup"
cp "$_CONFIG" "$HOME/pg_backup"
sudo su -c "echo 'host all all 0.0.0.0/0 md5' >>$_HBA"
sudo sed -i "s/#listen_addresses = 'localhost'/listen_addresses = '*'/" "$_CONFIG"
sudo systemctl restart postgresql.service
sed -i "s%sql_alchemy_conn.*%sql_alchemy_conn = postgresql+psycopg2://airflow:$_DB_PASSWORD@$_IP:5432/airflow%" "$AIRFLOW_HOME/airflow.cfg"
sed -i "s%executor =.*%executor = CeleryExecutor%" "$AIRFLOW_HOME/airflow.cfg"
sed -i "s%broker_url =.*%broker_url = amqp://airflow:airflow@$_IP/airflow%" "$AIRFLOW_HOME/airflow.cfg"
sed -i "s%celery_result_backend =.*%celery_result_backend = redis://$_IP:6379/0%" "$AIRFLOW_HOME/airflow.cfg"
sudo sed -i "s%bind .*%bind $_IP%" "/etc/redis/redis.conf"
sudo service redis restart
sudo rabbitmqctl add_user airflow airflow
sudo rabbitmqctl add_vhost airflow
sudo rabbitmqctl set_permissions -p airflow airflow ".*" ".*" ".*"
sudo rabbitmq-plugins enable rabbitmq_management
sudo rabbitmqctl set_user_tags airflow airflow_tag administrator
sudo service rabbitmq-server restart
sudo apt-get install -y nfs-kernel-server
sudo mkdir -p /nfs-share/{dags,plugins}
sudo chown -R "$USUARIO_SO" /nfs-share
sudo sh -c "echo '/nfs-share *(ro,sync,no_subtree_check)'>> /etc/exports"
sudo service nfs-kernel-server restart
ln -s /nfs-share/dags "$AIRFLOW_HOME/dags"
ln -s /nfs-share/plugins "$AIRFLOW_HOME/plugins"
cat <<EOF >"$AIRFLOW_HOME/dags/dummy.py"
import airflow
from airflow.models import DAG
from airflow.operators.dummy_operator import DummyOperator
from datetime import timedelta
args = {
'owner': 'airflow',
'start_date': airflow.utils.dates.days_ago(2)
}
dag = DAG(
dag_id='example_dummy', default_args=args,
schedule_interval=None,
dagrun_timeout=timedelta(minutes=1))
run_this_last = DummyOperator(task_id='DOES_NOTHING', dag=dag)
EOF
echo "You must open the appropriated ports to allow workers to connect to postgres, redis, rabbit and nfs"
airflow initdb
airflow scheduler -D
airflow webserver -p 8080 -D
airflow worker -D
echo "you should do a source .bashrc to reload the environment variables"
#!/bin/bash
USUARIO_SO="$(whoami)"
ANACONDA_URL="https://repo.anaconda.com/archive/Anaconda3-5.2.0-Linux-x86_64.sh"
_DB_PASSWORD="la contraseña"
while getopts "a:p:h" opt; do
case $opt in
a) ANACONDA_URL="$OPTARG";;
p) _DB_PASSWORD="$OPTARG";;
h) cat <<EOF
Use AirflowWorker.sh [options] master_address
All arguments are optional
-a anaconda url
-p password for airflow postgres user
-h this help
EOF
exit 0;
;;
\?) echo "Invalid option -$OPTARG" >&2
;;
esac
done
_IP=${@:$OPTIND:1}
if [ -z "$_IP" ]
then
echo "You must provide the master address"
exit 1
fi
echo "Installation will be performed as $USUARIO_SO"
if [[ $(id -u) -eq 0 ]] ; then echo "This script must not be excecuted as root or using sudo(althougth the user must be sudoer and password will be asked in some steps)" ; exit 1 ; fi
#Prerequisites installation:
while sudo fuser /var/{lib/{dpkg,apt/lists},cache/apt/archives}/lock >/dev/null 2>&1; do
echo "Waiting while other process ends installs (dpkg/lock is locked)"
sleep 1
done
sudo apt update && sudo apt upgrade -y
sudo apt install -y openssh-server git wget htop
if ! hash conda &> /dev/null; then
mkdir -p ~/instaladores && wget -c -P "$HOME/instaladores" "$ANACONDA_URL"
bash "$HOME/instaladores/${ANACONDA_URL##*/}" -b -p "$HOME/anaconda2"
export PATH="$HOME/anaconda2/bin:$PATH"
echo "export PATH='$HOME/anaconda2/bin:$PATH'">>"$HOME/.bashrc"
fi
conda install -y psycopg2 redis-py
conda install -y -c conda-forge "airflow<1.9" "celery<4"
if [[ -z "${AIRFLOW_HOME}" ]]; then
export AIRFLOW_HOME="$HOME/airflow"
echo "export AIRFLOW_HOME='$HOME/airflow'" >>"$HOME/.bashrc"
fi
airflow initdb
#Configurar postgresql para que admita conexiones remotas
sed -i "s%sql_alchemy_conn.*%sql_alchemy_conn = postgresql+psycopg2://airflow:$_DB_PASSWORD@$_IP:5432/airflow%" "$AIRFLOW_HOME/airflow.cfg"
sed -i "s%executor =.*%executor = CeleryExecutor%" "$AIRFLOW_HOME/airflow.cfg"
sed -i "s%broker_url =.*%broker_url = amqp://airflow:airflow@$_IP/airflow%" "$AIRFLOW_HOME/airflow.cfg"
sed -i "s%celery_result_backend =.*%celery_result_backend = redis://$_IP:6379/0%" "$AIRFLOW_HOME/airflow.cfg"
sudo apt-get install -y nfs-common
sudo mkdir -p /nfs-share/
sudo chown -R "$USUARIO_SO" /nfs-share
sudo sh -c "echo '$_IP:/nfs-share /nfs-share nfs auto,user,exec,ro,async,atime 0 0'>> /etc/fstab"
sudo mount -a
ln -s /nfs-share/dags "$AIRFLOW_HOME/dags"
ln -s /nfs-share/plugins "$AIRFLOW_HOME/plugins"
airflow worker -D
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment