Skip to content

Instantly share code, notes, and snippets.

@vak
Created August 20, 2018 14:56
Show Gist options
  • Save vak/766b3a7ad236fb0c819898718bfe522d to your computer and use it in GitHub Desktop.
Save vak/766b3a7ad236fb0c819898718bfe522d to your computer and use it in GitHub Desktop.
Custom bootstrap script to install Zeppelin 0.8 on AWS EMR (tested on EMR 5.16.0)
#!/bin/bash -ex
# ATTENTION:
#
# 1. ensure you have about 1Gb on the storage of /usr/lib/ for the Zeppelin huge bundle chosen by default below,
# or choose a smaller bundle from Zeppelin web-site
#
# 2. adjust values of ZEPPELIN_NOTEBOOK_S3_BUCKET
# and ZEPPELIN_NOTEBOOK_S3_USER if you need S3-persistance of your Zeppelin Notebooks to your S3 bucket
# otherwize just remove all three last exports lines starting from 'export ZEPPELIN_NOTEBOOK_S'
cd /tmp
wget 'http://apache.mirror.digionline.de/zeppelin/zeppelin-0.8.0/zeppelin-0.8.0-bin-all.tgz'
tar xf /tmp/zeppelin-0.8.0-bin-all.tgz
cat <<EOF >>/tmp/zeppelin-0.8.0-bin-all/conf/zeppelin-env.sh
export ZEPPELIN_PORT=8890
export ZEPPELIN_CONF_DIR=/etc/zeppelin/conf
export ZEPPELIN_LOG_DIR=/var/log/zeppelin
export ZEPPELIN_PID_DIR=/var/run/zeppelin
export ZEPPELIN_PID=$ZEPPELIN_PID_DIR/zeppelin.pid
export ZEPPELIN_WAR_TEMPDIR=/var/run/zeppelin/webapps
export ZEPPELIN_NOTEBOOK_DIR=/var/lib/zeppelin/notebook
export MASTER=yarn-client
export SPARK_HOME=/usr/lib/spark
export HADOOP_CONF_DIR=/etc/hadoop/conf
export CLASSPATH=":/usr/lib/hadoop-lzo/lib/*:/usr/lib/hadoop/hadoop-aws.jar:/usr/share/aws/aws-java-sdk/*:/usr/share/aws/emr/emrfs/conf:/usr/share/aws/emr/emrfs/lib/*:/usr/share/aws/emr/emrfs/auxlib/*:/usr/share/aws/hmclient/lib/aws-glue-datacatalog-spark-client.jar:/usr/share/aws/sagemaker-spark-sdk/lib/sagemaker-spark-sdk.jar"
export SPARK_SUBMIT_OPTIONS="$SPARK_SUBMIT_OPTIONS --conf 'spark.executorEnv.PYTHONPATH=/usr/lib/spark/python/lib/py4j-src.zip:/usr/lib/spark/python/:<CPS>{{PWD}}/pyspark.zip<CPS>{{PWD}}/py4j-src.zip' --conf spark.yarn.isPython=true"
export ZEPPELIN_NOTEBOOK_S3_BUCKET=my_bucket_here
export ZEPPELIN_NOTEBOOK_S3_USER=my_zeppelin_notebook_user_here
export ZEPPELIN_NOTEBOOK_STORAGE=org.apache.zeppelin.notebook.repo.S3NotebookRepo
EOF
cd
sudo mv /tmp/zeppelin-0.8.0-bin-all /usr/lib
sudo mkdir /etc/zeppelin/
sudo ln -s /usr/lib/zeppelin-0.8.0-bin-all/conf /etc/zeppelin/
sudo mkdir /var/{run,log}/zeppelin/
sudo chown hadoop:hadoop /var/{run,log}/zeppelin/
/usr/lib/zeppelin-0.8.0-bin-all/bin/zeppelin-daemon.sh start
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment