Skip to content

Instantly share code, notes, and snippets.

@dapangmao
Last active September 1, 2020 16:28
Show Gist options
  • Save dapangmao/e3ab5cac2a70b95bc95d to your computer and use it in GitHub Desktop.
Save dapangmao/e3ab5cac2a70b95bc95d to your computer and use it in GitHub Desktop.
How to set up a spark cluster on digitalocean
sudo openvpn --config *.opvn
apt-get update
apt-get install vim
wget http://d3kbcqa49mib13.cloudfront.net/spark-1.3.0-bin-hadoop2.4.tgz | tar zxf
hadoop fs -mkdir /spark
hadoop fs -put spark-1.3.0-bin-hadoop2.4.tgz /spark
hadoop fs -du -h /spark
cp spark-env.sh.template spark-env.sh
export MESOS_NATIVE_LIBRARY=/usr/local/lib/libmesos.so
export SPARK_EXECUTOR_URI=hdfs://10.132.70.244:50070/spark/spark-1.3.0-bin-hadoop2.4.tgz
export MASTER=mesos://10.132.70.244:5050/
./pyspark --master mesos://10.132.70.244:5050
a = sc.parallelize(xrange(1, 100000, 2))
b = a.collect()
#!/bin/bash
set -x
# IPYTHON=yes
MASTER_IP=$1
SPARK_MASTER=mesos://zk://${MASTER_IP}:2181/mesos
SPARK_DIR=spark-1.3.0-bin-hadoop2.4
SPARK_FILE=spark-1.3.0-bin-hadoop2.4.tgz
# Install packages
# apt-get -y install tmux
# apt-get -y install mosh
# Get Spark and put it in HDFS
wget http://d3kbcqa49mib13.cloudfront.net/spark-1.3.0-bin-hadoop2.4.tgz | tar zxf
hdfs dfs -mkdir /tmp
hdfs dfs -put ${SPARK_FILE} /tmp
rm ${SPARK_FILE}
# Configure Spark
cd ${SPARK_DIR}
cd conf
cp spark-env.sh.template spark-env.sh
cat >> spark-env.sh <<EOF
export MESOS_NATIVE_LIBRARY=/usr/local/lib/libmesos.so
export SPARK_EXECUTOR_URI=hdfs://${MASTER_IP}/tmp/${SPARK_FILE}
export MASTER=${SPARK_MASTER}
export SPARK_LOCAL_IP=${MASTER_IP}
export SPARK_PUBLIC_DNS=${MASTER_IP}
EOF
cp spark-defaults.conf.template spark-defaults.conf
cat >> spark-defaults.conf <<EOF
spark.executor.uri hdfs://${MASTER_IP}/tmp/spark-1.2.0-bin-hadoop2.4.tgz
EOF
sed 's/log4j.rootCategory=INFO/log4j.rootCategory=WARN/' < log4j.properties.template > log4j.properties
cd ..
# Update environment
cat >> ~/.profile <<EOF
export PATH=${SPARK_DIR}/bin:${PATH}
export SPARK_MASTER=${SPARK_MASTER}
EOF
# Run an IPython notebook server
if [ ! -z "${IPYTHON+x}" ]
then
apt-get -y install python-dev
pip install pyzmq
pip install "ipython[notebook]"
mkdir ~/notebook
IPYTHON_OPTS="notebook --ip=0.0.0.0 --no-browser --notebook-dir=${HOME}/notebook --no-stdout --no-stderr " bin/pyspark --master ${SPARK_MASTER} &
fi
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment