Skip to content

Instantly share code, notes, and snippets.

@qi-qi
Last active May 26, 2021 10:06
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save qi-qi/a64393bf82d9578e8d65729f1f77fddd to your computer and use it in GitHub Desktop.
Save qi-qi/a64393bf82d9578e8d65729f1f77fddd to your computer and use it in GitHub Desktop.
Airflow-ec2
sudo apt-get update && sudo apt-get -y upgrade
sudo pip3 install --upgrade apache-airflow[all]==1.10.2
sudo apt-get install -y curl build-essential autoconf libtool pkg-config git libxml2-dev libxslt1-dev libkrb5-dev libsasl2-dev libssl-dev libffi-dev locales netcat apt-utils libblas-dev libmysqlclient-dev libpq-dev liblapack-dev supervisor net-tools
sudo apt-get install -y python3.6-dev python3-distutils python3-kerberos python3-jenkins
curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py
sudo python3 get-pip.py
sudo pip3 install --upgrade pip
sudo update-alternatives --install /usr/bin/python python /usr/bin/python3 10
sudo pip3 install --upgrade protobuf pyopenssl setuptools kerberos Flask-Login urllib3 requests pymysql psycopg2-binary requests-oauthlib google-cloud-core
sudo locale-gen && sudo update-locale LANG=en_US.UTF-8 LC_ALL=en_US.UTF-8
=== psql
createdb
qi=# ALTER DATABASE airflow SET search_path = airflow,public;
qi=# ALTER ROLE username SET search_path = airflow,public;
===
## sudo update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.7 10
## curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py
## sudo python3 get-pip.py
## sudo apt install python3-testresources
==========
# ==> sudo chown airflow:acast /airflow
export AIRFLOW_HOME=~/airflow
export SLUGIFY_USES_TEXT_UNIDECODE=yes
export AIRFLOW_GPL_UNIDECODE=yes
export AIRFLOW_CONFIG=$AIRFLOW_HOME/airflow.cfg
system env => sudo nano /etc/environment
SLUGIFY_USES_TEXT_UNIDECODE=yes
AIRFLOW_GPL_UNIDECODE=yes
AIRFLOW_HOME=/data/airflow
AIRFLOW_CONFIG=/data/airflow.cfg
========== Performance: https://github.com/puckel/docker-airflow/issues/233 =====
AIRFLOW__SCHEDULER__MIN_FILE_PROCESS_INTERVAL=60 # Prevent airflow from reloading the dags all the time and set. This is the main setting that reduces CPU load in the scheduler
AIRFLOW__SCHEDULER__SCHEDULER_MAX_THREADS=1 # This should be set to (CPU Cores - 1)
===========
psql -U master -d airflow -h airflow-db.c4632pmsmr18.eu-west-1.rds.amazonaws.com
sudo apt-get install libmysqlclient-dev libssl-dev libkrb5-dev libsasl2-dev
sudo pip install git+git://github.com/apache/incubator-airflow.git@v1-10-stable#egg=apache-airflow[all]
## sudo -H nano /etc/environment
AIRFLOW_CONFIG=$AIRFLOW_HOME/airflow.cfg
====== ubuntu =======
usermod -a -G examplegroup exampleusername
===== swap ====
https://www.digitalocean.com/community/tutorials/how-to-add-swap-space-on-ubuntu-18-04
sudo swapon --show
free -h
sudo fallocate -l 20G /swapfile
ls -lh /swapfile
sudo chmod 600 /swapfile
sudo mkswap /swapfile
sudo swapon /swapfile
sudo swapon --show
free -h
sudo cp /etc/fstab /etc/fstab.bak
echo '/swapfile none swap sw 0 0' | sudo tee -a /etc/fstab
sudo sysctl vm.swappiness=10
vm.swappiness=10
===== Swap (Script) =====
#!/bin/bash
set -ex
sudo fallocate -l 40G /swapfile &&
sudo chmod 600 /swapfile &&
sudo mkswap /swapfile &&
sudo swapon /swapfile &&
sudo cp /etc/fstab /etc/fstab.bak &&
echo '/swapfile none swap sw 0 0' | sudo tee -a /etc/fstab &&
echo 'vm.swappiness=15' | sudo tee -a /etc/sysctl.conf &&
echo 'vm.vfs_cache_pressure=50' | sudo tee -a /etc/sysctl.conf
sudo apt-get update
sudo apt-get install ntp
sudo timedatectl set-ntp on
===== EBS =====
https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ebs-using-volumes.html
pip install apache-airflow[all_dbs,async,celery,cloudant,crypto,devel,devel_hadoop,druid,gcp_api,hdfs,hive,jdbc,ldap,mssql,mysql,password,postgres,qds,rabbitmq,redis,s3,samba,slack,ssh,vertica]==1.10.0
===== mysql 8 =====
https://www.tecmint.com/install-mysql-8-in-ubuntu/
===== dask =====
pip install dask distributed --upgrade
sudo pip3 install bokeh --upgrade
====== ubuntu time sync =====
https://www.digitalocean.com/community/tutorials/how-to-set-up-time-synchronization-on-ubuntu-16-04
===== ubuntu add sudo user =====
https://www.digitalocean.com/community/tutorials/how-to-create-a-sudo-user-on-ubuntu-quickstart
sudo adduser airflow
sudo addgroup acast
sudo usermod -G acast -a airflow
sudo usermod -G acast -a ubuntu
===== ownership =====
sudo chown -R airflow:acast /data
sudo chmod -R ug+rw /data
====== Link: https://github.com/apache/airflow/tree/master/scripts/systemd =====
ln -s /usr/local/bin/airflow /bin/airflow
mkdir /run/airflow
chmod 0775 /run/airflow/
chown airflow:airflow /run/airflow/
sudo nano /etc/tmpfiles.d/airflow.conf
sudo nano /etc/default/airflow
sudo nano /etc/systemd/system/airflow-webserver.service
sudo nano /etc/systemd/system/airflow-scheduler.service
sudo systemctl enable airflow-webserver.service
sudo systemctl enable airflow-scheduler.service
======= If using ELB ======
edit airflow.cfg
# Enable werkzeug `ProxyFix` middleware
enable_proxy_fix = True
sudo apt-get update && sudo apt-get -y upgrade
sudo pip3 install --upgrade apache-airflow[all]==1.10.2
sudo apt-get install -y curl build-essential autoconf libtool pkg-config git libxml2-dev libxslt1-dev libkrb5-dev libsasl2-dev libssl-dev libffi-dev locales netcat apt-utils libblas-dev libmysqlclient-dev libpq-dev liblapack-dev supervisor net-tools
sudo apt-get install -y python3.6-dev python3-distutils python3-kerberos python3-jenkins
curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py
sudo python3 get-pip.py
sudo pip3 install --upgrade pip
sudo update-alternatives --install /usr/bin/python python /usr/bin/python3 10
sudo pip3 install --upgrade protobuf pyopenssl setuptools kerberos Flask-Login urllib3 requests pymysql psycopg2-binary requests-oauthlib google-cloud-core
sudo locale-gen && sudo update-locale LANG=en_US.UTF-8 LC_ALL=en_US.UTF-8
=== psql
createdb
qi=# ALTER DATABASE airflow SET search_path = airflow,public;
qi=# ALTER ROLE username SET search_path = airflow,public;
===
## sudo update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.7 10
## curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py
## sudo python3 get-pip.py
## sudo apt install python3-testresources
==========
# ==> sudo chown airflow:acast /airflow
export AIRFLOW_HOME=~/airflow
export SLUGIFY_USES_TEXT_UNIDECODE=yes
export AIRFLOW_GPL_UNIDECODE=yes
export AIRFLOW_CONFIG=$AIRFLOW_HOME/airflow.cfg
system env => sudo nano /etc/environment
SLUGIFY_USES_TEXT_UNIDECODE=yes
AIRFLOW_GPL_UNIDECODE=yes
AIRFLOW_HOME=/data/airflow
AIRFLOW_CONFIG=$AIRFLOW_HOME/airflow.cfg
========== Performance: https://github.com/puckel/docker-airflow/issues/233 =====
AIRFLOW__SCHEDULER__MIN_FILE_PROCESS_INTERVAL=60 # Prevent airflow from reloading the dags all the time and set. This is the main setting that reduces CPU load in the scheduler
AIRFLOW__SCHEDULER__SCHEDULER_MAX_THREADS=1 # This should be set to (CPU Cores - 1)
===========
psql -U master -d airflow -h airflow-db.c4632pmsmr18.eu-west-1.rds.amazonaws.com
sudo apt-get install libmysqlclient-dev libssl-dev libkrb5-dev libsasl2-dev
sudo pip install git+git://github.com/apache/incubator-airflow.git@v1-10-stable#egg=apache-airflow[all]
## sudo -H nano /etc/environment
AIRFLOW_CONFIG=$AIRFLOW_HOME/airflow.cfg
====== ubuntu =======
usermod -a -G examplegroup exampleusername
===== swap ====
https://www.digitalocean.com/community/tutorials/how-to-add-swap-space-on-ubuntu-18-04
sudo swapon --show
free -h
sudo fallocate -l 20G /swapfile
ls -lh /swapfile
sudo chmod 600 /swapfile
sudo mkswap /swapfile
sudo swapon /swapfile
sudo swapon --show
free -h
sudo cp /etc/fstab /etc/fstab.bak
echo '/swapfile none swap sw 0 0' | sudo tee -a /etc/fstab
sudo sysctl vm.swappiness=10
vm.swappiness=10
===== EBS =====
https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ebs-using-volumes.html
pip install apache-airflow[all_dbs,async,celery,cloudant,crypto,devel,devel_hadoop,druid,gcp_api,hdfs,hive,jdbc,ldap,mssql,mysql,password,postgres,qds,rabbitmq,redis,s3,samba,slack,ssh,vertica]==1.10.0
===== mysql 8 =====
https://www.tecmint.com/install-mysql-8-in-ubuntu/
===== dask =====
pip install dask distributed --upgrade
sudo pip3 install bokeh --upgrade
====== ubuntu time sync =====
https://www.digitalocean.com/community/tutorials/how-to-set-up-time-synchronization-on-ubuntu-16-04
===== ubuntu add sudo user =====
https://www.digitalocean.com/community/tutorials/how-to-create-a-sudo-user-on-ubuntu-quickstart
sudo adduser airflow
sudo addgroup acast
sudo usermod -G acast -a airflow
sudo usermod -G acast -a ubuntu
===== ownership =====
sudo chown -R airflow:acast /data
sudo chmod -R ug+rw /data
====== Link: https://github.com/apache/airflow/tree/master/scripts/systemd =====
ln -s /usr/local/bin/airflow /bin/airflow
mkdir /run/airflow
chmod 0775 /run/airflow/
chown airflow:airflow /run/airflow/
sudo nano /etc/tmpfiles.d/airflow.conf
sudo nano /etc/default/airflow
sudo nano /etc/systemd/system/airflow-webserver.service
sudo nano /etc/systemd/system/airflow-scheduler.service
sudo systemctl enable airflow-webserver.service
sudo systemctl enable airflow-scheduler.service
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment