Skip to content

Instantly share code, notes, and snippets.

@dyno
Last active October 22, 2018 04:16
Show Gist options
  • Save dyno/b3b04eb634af3dec37084e358f6c365e to your computer and use it in GitHub Desktop.
Save dyno/b3b04eb634af3dec37084e358f6c365e to your computer and use it in GitHub Desktop.
local big data env
#!/usr/bin/env bash
# Setup environment variables required to execute hadoop/spark/zeppelin on Mac
#XXX: Supposed to be sourced in, don't complain about the indent.
if [ "$(uname)" = "Darwin" ]; then
# yarn and hadoop cannot be both installed.
brew unlink yarn &>/dev/null || true
(brew list | grep -q hadoop) || brew install hadoop
HADOOP_VERSION=$(brew info hadoop --json=v1 | jq -r '.[].linked_keg')
export HADOOP_HOME=/usr/local/Cellar/hadoop/${HADOOP_VERSION}/libexec
export HADOOP_CONF_DIR=${HADOOP_HOME}/etc/hadoop
(brew list | grep -q apache-spark) || brew install apache-spark
SPARK_VERSION=$(brew info apache-spark --json=v1 | jq -r '.[].linked_keg')
#export SPARK_HOME=/usr/local/Cellar/apache-spark/${SPARK_VERSION}/libexec
# http://spark.apache.org/docs/latest/hadoop-provided.html
# https://issues.apache.org/jira/browse/HADOOP-12537, S3A to support Amazon STS temporary credentials
# we need spark without hadoop because bundled hadoop 2.7.3 does not support STS.
export SPARK_HOME=$HOME/local/spark-${SPARK_VERSION}-bin-without-hadoop
if [ ! -e $SPARK_HOME ]; then
mkdir -p ~/local
(cd ~/local && \
curl -O https://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-without-hadoop.tgz && \
tar zxvf spark-${SPARK_VERSION}-bin-without-hadoop.tgz)
fi
export SPARK_CONF_DIR=${SPARK_HOME}/conf
export SPARK_DIST_CLASSPATH=$(hadoop --config $HADOOP_CONF_DIR classpath)
if ! grep -q aws <<< $SPARK_DIST_CLASSPATH; then
# https://hadoop.apache.org/docs/current/hadoop-aws/tools/hadoop-aws/index.html
echo "hadoop_add_to_classpath_tools hadoop-aws" >> ~/.hadooprc
export SPARK_DIST_CLASSPATH=$(hadoop --config $HADOOP_CONF_DIR classpath)
fi
(brew list | grep -q "apache-zeppelin") || brew install apache-zeppelin
ZEPPELIN_VERSION=$(brew info apache-zeppelin --json=v1 | jq -r '.[].linked_keg')
export ZEPPELIN_HOME=/usr/local/Cellar/apache-zeppelin/${ZEPPELIN_VERSION}/libexec
export ZEPPELIN_CONF_DIR=${ZEPPELIN_HOME}/conf
fi
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment