Last active
May 26, 2021 10:06
-
-
Save qi-qi/a64393bf82d9578e8d65729f1f77fddd to your computer and use it in GitHub Desktop.
Airflow-ec2
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
sudo apt-get update && sudo apt-get -y upgrade | |
sudo pip3 install --upgrade apache-airflow[all]==1.10.2 | |
sudo apt-get install -y curl build-essential autoconf libtool pkg-config git libxml2-dev libxslt1-dev libkrb5-dev libsasl2-dev libssl-dev libffi-dev locales netcat apt-utils libblas-dev libmysqlclient-dev libpq-dev liblapack-dev supervisor net-tools | |
sudo apt-get install -y python3.6-dev python3-distutils python3-kerberos python3-jenkins | |
curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py | |
sudo python3 get-pip.py | |
sudo pip3 install --upgrade pip | |
sudo update-alternatives --install /usr/bin/python python /usr/bin/python3 10 | |
sudo pip3 install --upgrade protobuf pyopenssl setuptools kerberos Flask-Login urllib3 requests pymysql psycopg2-binary requests-oauthlib google-cloud-core | |
sudo locale-gen && sudo update-locale LANG=en_US.UTF-8 LC_ALL=en_US.UTF-8 | |
=== psql | |
createdb | |
qi=# ALTER DATABASE airflow SET search_path = airflow,public; | |
qi=# ALTER ROLE username SET search_path = airflow,public; | |
=== | |
## sudo update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.7 10 | |
## curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py | |
## sudo python3 get-pip.py | |
## sudo apt install python3-testresources | |
========== | |
# ==> sudo chown airflow:acast /airflow | |
export AIRFLOW_HOME=~/airflow | |
export SLUGIFY_USES_TEXT_UNIDECODE=yes | |
export AIRFLOW_GPL_UNIDECODE=yes | |
export AIRFLOW_CONFIG=$AIRFLOW_HOME/airflow.cfg | |
system env => sudo nano /etc/environment | |
SLUGIFY_USES_TEXT_UNIDECODE=yes | |
AIRFLOW_GPL_UNIDECODE=yes | |
AIRFLOW_HOME=/data/airflow | |
AIRFLOW_CONFIG=/data/airflow.cfg | |
========== Performance: https://github.com/puckel/docker-airflow/issues/233 ===== | |
AIRFLOW__SCHEDULER__MIN_FILE_PROCESS_INTERVAL=60 # Prevent airflow from reloading the dags all the time and set. This is the main setting that reduces CPU load in the scheduler | |
AIRFLOW__SCHEDULER__SCHEDULER_MAX_THREADS=1 # This should be set to (CPU Cores - 1) | |
=========== | |
psql -U master -d airflow -h airflow-db.c4632pmsmr18.eu-west-1.rds.amazonaws.com | |
sudo apt-get install libmysqlclient-dev libssl-dev libkrb5-dev libsasl2-dev | |
sudo pip install git+git://github.com/apache/incubator-airflow.git@v1-10-stable#egg=apache-airflow[all] | |
## sudo -H nano /etc/environment | |
AIRFLOW_CONFIG=$AIRFLOW_HOME/airflow.cfg | |
====== ubuntu ======= | |
usermod -a -G examplegroup exampleusername | |
===== swap ==== | |
https://www.digitalocean.com/community/tutorials/how-to-add-swap-space-on-ubuntu-18-04 | |
sudo swapon --show | |
free -h | |
sudo fallocate -l 20G /swapfile | |
ls -lh /swapfile | |
sudo chmod 600 /swapfile | |
sudo mkswap /swapfile | |
sudo swapon /swapfile | |
sudo swapon --show | |
free -h | |
sudo cp /etc/fstab /etc/fstab.bak | |
echo '/swapfile none swap sw 0 0' | sudo tee -a /etc/fstab | |
sudo sysctl vm.swappiness=10 | |
vm.swappiness=10 | |
===== Swap (Script) ===== | |
#!/bin/bash | |
set -ex | |
sudo fallocate -l 40G /swapfile && | |
sudo chmod 600 /swapfile && | |
sudo mkswap /swapfile && | |
sudo swapon /swapfile && | |
sudo cp /etc/fstab /etc/fstab.bak && | |
echo '/swapfile none swap sw 0 0' | sudo tee -a /etc/fstab && | |
echo 'vm.swappiness=15' | sudo tee -a /etc/sysctl.conf && | |
echo 'vm.vfs_cache_pressure=50' | sudo tee -a /etc/sysctl.conf | |
sudo apt-get update | |
sudo apt-get install ntp | |
sudo timedatectl set-ntp on | |
===== EBS ===== | |
https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ebs-using-volumes.html | |
pip install apache-airflow[all_dbs,async,celery,cloudant,crypto,devel,devel_hadoop,druid,gcp_api,hdfs,hive,jdbc,ldap,mssql,mysql,password,postgres,qds,rabbitmq,redis,s3,samba,slack,ssh,vertica]==1.10.0 | |
===== mysql 8 ===== | |
https://www.tecmint.com/install-mysql-8-in-ubuntu/ | |
===== dask ===== | |
pip install dask distributed --upgrade | |
sudo pip3 install bokeh --upgrade | |
====== ubuntu time sync ===== | |
https://www.digitalocean.com/community/tutorials/how-to-set-up-time-synchronization-on-ubuntu-16-04 | |
===== ubuntu add sudo user ===== | |
https://www.digitalocean.com/community/tutorials/how-to-create-a-sudo-user-on-ubuntu-quickstart | |
sudo adduser airflow | |
sudo addgroup acast | |
sudo usermod -G acast -a airflow | |
sudo usermod -G acast -a ubuntu | |
===== ownership ===== | |
sudo chown -R airflow:acast /data | |
sudo chmod -R ug+rw /data | |
====== Link: https://github.com/apache/airflow/tree/master/scripts/systemd ===== | |
ln -s /usr/local/bin/airflow /bin/airflow | |
mkdir /run/airflow | |
chmod 0775 /run/airflow/ | |
chown airflow:airflow /run/airflow/ | |
sudo nano /etc/tmpfiles.d/airflow.conf | |
sudo nano /etc/default/airflow | |
sudo nano /etc/systemd/system/airflow-webserver.service | |
sudo nano /etc/systemd/system/airflow-scheduler.service | |
sudo systemctl enable airflow-webserver.service | |
sudo systemctl enable airflow-scheduler.service | |
======= If using ELB ====== | |
edit airflow.cfg | |
# Enable werkzeug `ProxyFix` middleware | |
enable_proxy_fix = True | |
sudo apt-get update && sudo apt-get -y upgrade | |
sudo pip3 install --upgrade apache-airflow[all]==1.10.2 | |
sudo apt-get install -y curl build-essential autoconf libtool pkg-config git libxml2-dev libxslt1-dev libkrb5-dev libsasl2-dev libssl-dev libffi-dev locales netcat apt-utils libblas-dev libmysqlclient-dev libpq-dev liblapack-dev supervisor net-tools | |
sudo apt-get install -y python3.6-dev python3-distutils python3-kerberos python3-jenkins | |
curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py | |
sudo python3 get-pip.py | |
sudo pip3 install --upgrade pip | |
sudo update-alternatives --install /usr/bin/python python /usr/bin/python3 10 | |
sudo pip3 install --upgrade protobuf pyopenssl setuptools kerberos Flask-Login urllib3 requests pymysql psycopg2-binary requests-oauthlib google-cloud-core | |
sudo locale-gen && sudo update-locale LANG=en_US.UTF-8 LC_ALL=en_US.UTF-8 | |
=== psql | |
createdb | |
qi=# ALTER DATABASE airflow SET search_path = airflow,public; | |
qi=# ALTER ROLE username SET search_path = airflow,public; | |
=== | |
## sudo update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.7 10 | |
## curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py | |
## sudo python3 get-pip.py | |
## sudo apt install python3-testresources | |
========== | |
# ==> sudo chown airflow:acast /airflow | |
export AIRFLOW_HOME=~/airflow | |
export SLUGIFY_USES_TEXT_UNIDECODE=yes | |
export AIRFLOW_GPL_UNIDECODE=yes | |
export AIRFLOW_CONFIG=$AIRFLOW_HOME/airflow.cfg | |
system env => sudo nano /etc/environment | |
SLUGIFY_USES_TEXT_UNIDECODE=yes | |
AIRFLOW_GPL_UNIDECODE=yes | |
AIRFLOW_HOME=/data/airflow | |
AIRFLOW_CONFIG=$AIRFLOW_HOME/airflow.cfg | |
========== Performance: https://github.com/puckel/docker-airflow/issues/233 ===== | |
AIRFLOW__SCHEDULER__MIN_FILE_PROCESS_INTERVAL=60 # Prevent airflow from reloading the dags all the time and set. This is the main setting that reduces CPU load in the scheduler | |
AIRFLOW__SCHEDULER__SCHEDULER_MAX_THREADS=1 # This should be set to (CPU Cores - 1) | |
=========== | |
psql -U master -d airflow -h airflow-db.c4632pmsmr18.eu-west-1.rds.amazonaws.com | |
sudo apt-get install libmysqlclient-dev libssl-dev libkrb5-dev libsasl2-dev | |
sudo pip install git+git://github.com/apache/incubator-airflow.git@v1-10-stable#egg=apache-airflow[all] | |
## sudo -H nano /etc/environment | |
AIRFLOW_CONFIG=$AIRFLOW_HOME/airflow.cfg | |
====== ubuntu ======= | |
usermod -a -G examplegroup exampleusername | |
===== swap ==== | |
https://www.digitalocean.com/community/tutorials/how-to-add-swap-space-on-ubuntu-18-04 | |
sudo swapon --show | |
free -h | |
sudo fallocate -l 20G /swapfile | |
ls -lh /swapfile | |
sudo chmod 600 /swapfile | |
sudo mkswap /swapfile | |
sudo swapon /swapfile | |
sudo swapon --show | |
free -h | |
sudo cp /etc/fstab /etc/fstab.bak | |
echo '/swapfile none swap sw 0 0' | sudo tee -a /etc/fstab | |
sudo sysctl vm.swappiness=10 | |
vm.swappiness=10 | |
===== EBS ===== | |
https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ebs-using-volumes.html | |
pip install apache-airflow[all_dbs,async,celery,cloudant,crypto,devel,devel_hadoop,druid,gcp_api,hdfs,hive,jdbc,ldap,mssql,mysql,password,postgres,qds,rabbitmq,redis,s3,samba,slack,ssh,vertica]==1.10.0 | |
===== mysql 8 ===== | |
https://www.tecmint.com/install-mysql-8-in-ubuntu/ | |
===== dask ===== | |
pip install dask distributed --upgrade | |
sudo pip3 install bokeh --upgrade | |
====== ubuntu time sync ===== | |
https://www.digitalocean.com/community/tutorials/how-to-set-up-time-synchronization-on-ubuntu-16-04 | |
===== ubuntu add sudo user ===== | |
https://www.digitalocean.com/community/tutorials/how-to-create-a-sudo-user-on-ubuntu-quickstart | |
sudo adduser airflow | |
sudo addgroup acast | |
sudo usermod -G acast -a airflow | |
sudo usermod -G acast -a ubuntu | |
===== ownership ===== | |
sudo chown -R airflow:acast /data | |
sudo chmod -R ug+rw /data | |
====== Link: https://github.com/apache/airflow/tree/master/scripts/systemd ===== | |
ln -s /usr/local/bin/airflow /bin/airflow | |
mkdir /run/airflow | |
chmod 0775 /run/airflow/ | |
chown airflow:airflow /run/airflow/ | |
sudo nano /etc/tmpfiles.d/airflow.conf | |
sudo nano /etc/default/airflow | |
sudo nano /etc/systemd/system/airflow-webserver.service | |
sudo nano /etc/systemd/system/airflow-scheduler.service | |
sudo systemctl enable airflow-webserver.service | |
sudo systemctl enable airflow-scheduler.service |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment