Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save ericeijkelenboom/4c3a9cfea639109b9590 to your computer and use it in GitHub Desktop.
Save ericeijkelenboom/4c3a9cfea639109b9590 to your computer and use it in GitHub Desktop.
Install Spark 0.9.1 on an EMR cluster
cd /home/hadoop/
##Download Spark
hadoop fs -copyToLocal s3://prediction-data/spark-jobs/dist/0.9.1/spark-0.9.1-emr.tgz ./spark-0.9.1-emr.tgz
##Download Scala
wget http://www.scala-lang.org/files/archive/scala-2.10.4.tgz
tar -xvzf scala-2.10.4.tgz
tar -xvzf spark-0.9.1-emr.tgz
ln -sf spark-0.9.1-emr spark
ln -sf /home/hadoop/scala-2.10.4 /home/hadoop/scala
MASTER=$(grep -i "job.tracker<" /home/hadoop/conf/mapred-site.xml | grep -o '[0-9]\{1,3\}\.[0-9]\{1,3\}\.[0-9]\{1,3\}\.[0-9]\{1,3\}')
SPACE=$(mount | grep mnt | awk '{print $3"/spark/"}' | xargs | sed 's/ /,/g')
PUB_HOSTNAME=$(GET http://169.254.169.254/latest/meta-data/public-hostname)
touch /home/hadoop/spark/conf/spark-env.sh
echo "export SPARK_CLASSPATH=/home/hadoop/spark/jars/*">> /home/hadoop/spark/conf/spark-env.sh
echo "export SPARK_MASTER_IP=$MASTER">> /home/hadoop/spark/conf/spark-env.sh
echo "export MASTER=spark://$MASTER:7077" >> /home/hadoop/spark/conf/spark-env.sh
echo "export SPARK_LIBRARY_PATH=/home/hadoop/native/Linux-amd64-64" >> /home/hadoop/spark/conf/spark-env.sh
echo "export SPARK_JAVA_OPTS=\"-Dspark.local.dir=$SPACE\"" >> /home/hadoop/spark/conf/spark-env.sh
echo "export SPARK_WORKER_DIR=/mnt/var/log/hadoop/userlogs/" >> /home/hadoop/spark/conf/spark-env.sh
echo "export SPARK_HOME=/home/hadoop/spark" >> /home/hadoop/spark/conf/spark-env.sh
echo "export SCALA_HOME=/home/hadoop/scala" >> /home/hadoop/spark/conf/spark-env.sh
#cp /home/hadoop/spark/conf/metrics.properties.aws /home/hadoop/spark/conf/metrics.properties
#cp /home/hadoop/lib/EmrMetrics*.jar /home/hadoop/spart/jars/
#cp /home/hadoop/lib/gson-* /home/hadoop/spark/jars/
cp /home/hadoop/conf/core-site.xml /home/hadoop/spark/conf/
cp /home/hadoop/hadoop-core.jar /home/hadoop/spark/jars/hadoop-core-1.0.4.jar
grep -Fq "\"isMaster\": true" /mnt/var/lib/info/instance.json
if [ $? -eq 0 ];
then
/home/hadoop/spark/sbin/start-master.sh
else
nc -z $MASTER 7077
while [ $? -eq 1 ]; do
echo "Can't connect to the master, sleeping for 20sec"
sleep 20
nc -z $MASTER 7077
done
echo "Conneting to the master was successful"
echo "export SPARK_JAVA_OPTS=\"-verbose:gc -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -Dspark.local.dir=$SPACE\"" >> /home/hadoop/spark/conf/spark-env.sh
echo "export SPARK_PUBLIC_DNS=$PUB_HOSTNAME" >> /home/hadoop/spark/conf/spark-env.sh
/home/hadoop/spark/sbin/spark-daemon.sh start org.apache.spark.deploy.worker.Worker `hostname` spark://$MASTER:7077
fi
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment