Skip to content

Instantly share code, notes, and snippets.

@momijiame
Last active November 20, 2020 00:21
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save momijiame/fefb9831e9f032ef264d8d517df57cb4 to your computer and use it in GitHub Desktop.
Save momijiame/fefb9831e9f032ef264d8d517df57cb4 to your computer and use it in GitHub Desktop.
Vagrantfile for Spark 2.3 on YARN with CentOS 7 and Hadoop 2.8 (3 hosts)
#!/bin/sh
vagrant up node1 node2 master
#!/bin/sh
set -x
HOSTNAME=$1
: "Set hostname" && {
sudo hostname $HOSTNAME
echo $HOSTNAME | sudo tee /etc/hostname > /dev/null
}
: "Edit hosts file" && {
grep 192.168.33.10 /etc/hosts > /dev/null
if [ $? -ne 0 ] ; then
cat << 'EOF' | sudo tee -a /etc/hosts > /dev/null
192.168.33.10 master
192.168.33.11 node1
192.168.33.12 node2
EOF
fi
}
: "Install common packages" && {
sudo yum -y install epel-release
sudo yum -y install java-1.8.0-openjdk-devel openssh-clients rsync wget sshpass
}
: "Download Hadoop" && {
ls | grep hadoop-*.tar.gz > /dev/null
if [ $? -ne 0 ]; then
wget http://ftp.riken.jp/net/apache/hadoop/common/hadoop-2.8.3/hadoop-2.8.3.tar.gz -nv
tar xf hadoop-2.8.3.tar.gz
fi
}
: "Download Apache Spark" && {
ls | grep spark-*.tgz > /dev/null
if [ $? -ne 0 ]; then
wget http://ftp.riken.jp/net/apache/spark/spark-2.3.0/spark-2.3.0-bin-hadoop2.7.tgz -nv
tar xf spark-2.3.0-bin-hadoop2.7.tgz
fi
}
: "Set environment variables to shell RC file" && {
grep JAVA_HOME /etc/hosts > /dev/null
if [ $? -ne 0 ]; then
cat << 'EOF' >> ~/.bashrc
export JAVA_HOME=/usr/lib/jvm/jre-1.8.0-openjdk
export HADOOP_HOME=~/hadoop-2.8.3
export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop
export PATH=$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$JAVA_HOME/bin:$PATH
export SPARK_HOME=~/spark-2.3.0-bin-hadoop2.7
export PATH=$SPARK_HOME/bin:$SPARK_HOME/sbin:$JAVA_HOME/bin:$PATH
EOF
fi
source ~/.bashrc
}
: "Hadoop execution check" && {
hadoop version
}
: "Install SSH public key to all nodes" && {
ssh-keygen -t rsa -P '' -f ~/.ssh/id_rsa
for node in master node1 node2; do
sshpass -p "vagrant" ssh-copy-id -i ~/.ssh/id_rsa.pub -o "StrictHostKeyChecking no" $node
done;
}
: "Setting configuration files" && {
: "etc/hadoop/slaves" && {
cat << 'EOF' > $HADOOP_HOME/etc/hadoop/slaves
node1
node2
EOF
}
: "etc/hadoop/core-site.xml" && {
grep fs.defaultFS $HADOOP_HOME/etc/hadoop/core-site.xml
if [ $? -ne 0 ]; then
cat << 'EOF' > /tmp/core-site.xml.property
<property>
<name>fs.defaultFS</name>
<value>hdfs://192.168.33.10:9000</value>
</property>
EOF
sed -i -e '
/^<configuration>$/r /tmp/core-site.xml.property
/^$/d
' $HADOOP_HOME/etc/hadoop/core-site.xml
fi
}
: "etc/hadoop/hdfs-site.xml" && {
grep dfs.replication $HADOOP_HOME/etc/hadoop/hdfs-site.xml
if [ $? -ne 0 ]; then
cat << 'EOF' > /tmp/hdfs-site.xml.property
<property>
<name>dfs.replication</name>
<value>2</value>
</property>
<property>
<name>dfs.namenode.secondary.http-address</name>
<value>192.168.33.10:50090</value>
</property>
EOF
sed -i -e '
/^<configuration>$/r /tmp/hdfs-site.xml.property
/^$/d
' $HADOOP_HOME/etc/hadoop/hdfs-site.xml
fi
}
: "etc/hadoop/mapred-site.xml" && {
grep mapreduce.framework.nam $HADOOP_HOME/etc/hadoop/mapred-site.xml
if [ $? -ne 0 ]; then
cp $HADOOP_HOME/etc/hadoop/mapred-site.xml{.template,}
cat << 'EOF' > /tmp/mapred-site.xml.property
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
EOF
sed -i -e '
/^<configuration>$/r /tmp/mapred-site.xml.property
/^$/d
' $HADOOP_HOME/etc/hadoop/mapred-site.xml
fi
}
: "etc/hadoop/yarn-site.xml" && {
grep yarn.nodemanager.aux-service $HADOOP_HOME/etc/hadoop/yarn-site.xml
if [ $? -ne 0 ]; then
cat << 'EOF' > /tmp/yarn-site.xml.property
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.resourcemanager.hostname</name>
<value>master</value>
</property>
<property>
<name>yarn.nodemanager.pmem-check-enabled</name>
<value>false</value>
</property>
<property>
<name>yarn.nodemanager.vmem-check-enabled</name>
<value>false</value>
</property>
EOF
sed -i -e '
/^<configuration>$/r /tmp/yarn-site.xml.property
/^$/d
' $HADOOP_HOME/etc/hadoop/yarn-site.xml
fi
}
: "Copy to nodes" && {
for node in node1 node2; do
scp $HADOOP_HOME/etc/hadoop/* $node:$HADOOP_HOME/etc/hadoop/
done;
}
}
: "Format HDFS" && {
$HADOOP_HOME/bin/hdfs namenode -format
}
: "Start daemons" && {
: "HDFS" && {
jps | grep NameNode
if [ $? -ne 0 ]; then
$HADOOP_HOME/sbin/start-dfs.sh
fi
}
: "YARN" && {
jps | grep ResourceManager
if [ $? -ne 0 ]; then
$HADOOP_HOME/sbin/start-yarn.sh
fi
}
: "MapReduce JobHistory server" && {
jps | grep JobHistoryServer
if [ $? -ne 0 ]; then
$HADOOP_HOME/sbin/mr-jobhistory-daemon.sh --config $HADOOP_CONF_DIR start historyserver
fi
}
}
: "Setup Python3" && {
: "Install Python3" && {
sudo yum -y install https://centos7.iuscommunity.org/ius-release.rpm
sudo yum clean all
sudo yum -y install python36u-pip
}
: "Install Jupyter-Notebook and others" && {
sudo pip3.6 install pandas
}
: "Configure to launch pyspark" && {
cat << 'EOF' >> ~/.bashrc
export PYSPARK_PYTHON=/usr/bin/python3.6
EOF
}
: "Prepare to launch PySpark script" && {
cat << 'EOF' > launch-pyspark.sh
#!/bin/sh
pyspark --master yarn
EOF
chmod +x launch-pyspark.sh
}
}
#!/bin/sh
set -x
HOSTNAME=$1
: "Set hostname" && {
sudo hostname $HOSTNAME
echo $HOSTNAME | sudo tee /etc/hostname > /dev/null
}
: "Edit hosts file" && {
grep 192.168.33.10 /etc/hosts > /dev/null
if [ $? -ne 0 ] ; then
cat << 'EOF' | sudo tee -a /etc/hosts > /dev/null
192.168.33.10 master
192.168.33.11 node1
192.168.33.12 node2
EOF
fi
}
: "Install common packages" && {
sudo yum -y install java-1.8.0-openjdk-devel openssh-clients rsync wget
}
: "Download Hadoop" && {
ls | grep hadoop-*.tar.gz > /dev/null
if [ $? -ne 0 ]; then
wget http://ftp.riken.jp/net/apache/hadoop/common/hadoop-2.8.3/hadoop-2.8.3.tar.gz -nv
tar xf hadoop-2.8.3.tar.gz
fi
}
: "Set environment variables to shell RC file" && {
grep JAVA_HOME /etc/hosts > /dev/null
if [ $? -ne 0 ]; then
cat << 'EOF' >> ~/.bashrc
export JAVA_HOME=/usr/lib/jvm/jre-1.8.0-openjdk
export HADOOP_HOME=~/hadoop-2.8.3
export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop
export PATH=$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$JAVA_HOME/bin:$PATH
EOF
fi
source ~/.bashrc
}
: "Setup Python3" && {
: "Install Python3" && {
sudo yum -y install https://centos7.iuscommunity.org/ius-release.rpm
sudo yum clean all
sudo yum -y install python36u-pip
}
}
: "Hadoop execution check" && {
hadoop version
}
# -*- mode: ruby -*-
# vi: set ft=ruby :
# Vagrantfile API/syntax version. Don't touch unless you know what you're doing!
VAGRANTFILE_API_VERSION = "2"
Vagrant.configure(VAGRANTFILE_API_VERSION) do |config|
config.vm.define :master, primary: true do |master|
master.vm.box = "bento/centos-7.4"
master.vm.network "private_network", ip: "192.168.33.10"
master.vm.network :forwarded_port, host: 8888, guest: 8888
master.vm.provider "virtualbox" do |vb|
vb.memory = "1024"
end
master.vm.provision "shell", privileged: false do |s|
s.path = "master.sh"
s.args = "master"
end
end
(1..2).each {|i|
node_name = "node" + i.to_s
config.vm.define node_name do |node|
node.vm.box = "bento/centos-7.4"
node.vm.network "private_network", ip: "192.168.33.1" + i.to_s
node.vm.provider "virtualbox" do |vb|
vb.memory = "1024"
end
node.vm.provision "shell", privileged: false do |s|
s.path = "nodes.sh"
s.args = node_name
end
end
}
if Vagrant.has_plugin?("vagrant-proxyconf") && ENV['http_proxy']
config.proxy.http = ENV['http_proxy']
config.proxy.https = ENV['https_proxy']
config.proxy.no_proxy = "localhost,127.0.0.1"
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment