Skip to content

Instantly share code, notes, and snippets.

@andre3k1
Created August 4, 2013 20:25
Show Gist options
  • Save andre3k1/6151799 to your computer and use it in GitHub Desktop.
Save andre3k1/6151799 to your computer and use it in GitHub Desktop.
#!/bin/bash -ev
# Download Scala
wget http://www.scala-lang.org/files/archive/scala-2.9.3.tgz
tar -zxf scala-2.9.3.tgz
rm -rf scala-2.9.3.tgz
export SCALA_HOME=/home/hadoop/scala-2.9.3
# Download Spark
wget http://spark-project.org/files/spark-0.7.2-sources.tgz
tar -zxf spark-0.7.2-sources.tgz
rm -rf spark-0.7.2-sources.tgz
mv spark-0.7.2 /home/hadoop/spark
cd /home/hadoop/spark
# We want to build against Hadoop 1.0.3
sed -i 's/val HADOOP_VERSION = "1.0.4"/val HADOOP_VERSION = "1.0.3"/g' project/SparkBuild.scala
./sbt/sbt package
# Configure Spark
MASTER=$(grep -i "job.tracker<" /home/hadoop/conf/mapred-site.xml | grep -o '[0-9]\{1,3\}\.[0-9]\{1,3\}\.[0-9]\{1,3\}\.[0-9]\{1,3\}')
MASTER_DNS=$(host $MASTER | sed 's/^.* //g' | sed 's/\.$//g')
SPACE=$(mount | grep mnt | awk '{print $3"/spark/"}' | xargs | sed 's/ /,/g')
cat > /home/hadoop/spark/conf/spark-env.sh <<EOF
# Spark configuration variables
export SCALA_HOME=/home/hadoop/scala-2.9.3
export MASTER=spark://$MASTER_DNS:7077
export SPARK_LIBRARY_PATH=/home/hadoop/native/Linux-amd64-64
export SPARK_JAVA_OPTS="-verbose:gc -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -Dspark.local.dir=$SPACE"
EOF
cat > /home/hadoop/spark.properties <<EOF
spark.master=spark://$MASTER_DNS:7077
spark.home=/home/hadoop/spark
hdfs.root=hdfs://$MASTER:9000
aws.access.key=$AWS_ACCESS_KEY
aws.secret.key=$AWS_SECRET_KEY
EOF
# Install Hadoop libraries
cp /home/hadoop/lib/gson-* /home/hadoop/spark/lib_managed/jars/
cp /home/hadoop/lib/aws-java-sdk-* /home/hadoop/spark/lib_managed/jars/
cp /home/hadoop/conf/core-site.xml /home/hadoop/spark/conf/
cp /home/hadoop/hadoop-core.jar /home/hadoop/spark/lib_managed/jars/hadoop-core-1.0.3.jar
# Start master + slave instances
grep -Fq '"isMaster":true' /mnt/var/lib/info/instance.json
if [ $? -eq 0 ];
then
/home/hadoop/spark/bin/start-master.sh
else
nc -z $MASTER 7077
# Retry connection to master until successful
while [ $? -eq 1 ]; do
sleep 20
nc -z $MASTER 7077
done
/home/hadoop/spark/bin/start-slave.sh 1 spark://$MASTER_DNS:7077
fi
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment