Skip to content

Instantly share code, notes, and snippets.

@momijiame
Last active November 13, 2023 07:58
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save momijiame/d89890c1e9874f5e15f8cff9311544a5 to your computer and use it in GitHub Desktop.
Save momijiame/d89890c1e9874f5e15f8cff9311544a5 to your computer and use it in GitHub Desktop.
Vagrantfile for Hadoop (3.3) Cluster with Hive (3.1)
#!/bin/sh
vagrant up node1 node2 master
#!/bin/sh
set -euxo pipefail
HOSTNAME=$1
: "Set hostname" && {
sudo hostname $HOSTNAME
echo $HOSTNAME | sudo tee /etc/hostname > /dev/null
}
: "Edit hosts file" && {
if ! bash -c "grep 192.168.56.10 /etc/hosts" ; then
cat << 'EOF' | sudo tee -a /etc/hosts > /dev/null
192.168.56.10 master
192.168.56.11 node1
192.168.56.12 node2
EOF
fi
}
: "Install common packages" && {
sudo yum -y install epel-release
sudo yum -y install java-1.8.0-openjdk-devel openssh-clients rsync wget sshpass
}
: "Download Hadoop" && {
if ! bash -c "ls | grep hadoop-*.tar.gz"; then
wget http://ftp.riken.jp/net/apache/hadoop/common/hadoop-3.3.6/hadoop-3.3.6.tar.gz -nv
tar xf hadoop-3.3.6.tar.gz
fi
}
: "Set environment variables to shell RC file" && {
if ! bash -c "grep JAVA_HOME ~/.bashrc"; then
cat << 'EOF' >> ~/.bashrc
export JAVA_HOME=/usr/lib/jvm/jre-1.8.0-openjdk
export HADOOP_HOME=~/hadoop-3.3.6
export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop
export PATH=$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$JAVA_HOME/bin:$PATH
EOF
fi
set +u
source ~/.bashrc
set -u
}
: "Hadoop execution check" && {
hadoop version
}
: "Install SSH public key to all nodes" && {
ssh-keygen -t ed25519 -P '' -f ~/.ssh/id_ed25519
for node in master node1 node2; do
sshpass -p "vagrant" ssh-copy-id -i ~/.ssh/id_ed25519.pub -o "StrictHostKeyChecking no" $node
done;
}
: "Copy Hadoop directory to nodes" && {
for node in node1 node2; do
scp -r $HADOOP_HOME $node:~/
done;
}
: "Setting configuration files" && {
: "etc/hadoop/workers" && {
cat << 'EOF' > $HADOOP_HOME/etc/hadoop/workers
node1
node2
EOF
}
: "etc/hadoop/core-site.xml" && {
if ! bash -c "grep fs.defaultFS $HADOOP_HOME/etc/hadoop/core-site.xml"; then
cat << 'EOF' > /tmp/core-site.xml.property
<property>
<name>fs.defaultFS</name>
<value>hdfs://master:9000</value>
</property>
EOF
sed -i -e '
/^<configuration>$/r /tmp/core-site.xml.property
/^$/d
' $HADOOP_HOME/etc/hadoop/core-site.xml
fi
}
: "etc/hadoop/hdfs-site.xml" && {
if ! bash -c "grep dfs.replication $HADOOP_HOME/etc/hadoop/hdfs-site.xml" ; then
cat << 'EOF' > /tmp/hdfs-site.xml.property
<property>
<name>dfs.replication</name>
<value>2</value>
</property>
<property>
<name>dfs.namenode.secondary.http-address</name>
<value>master:50090</value>
</property>
EOF
sed -i -e '
/^<configuration>$/r /tmp/hdfs-site.xml.property
/^$/d
' $HADOOP_HOME/etc/hadoop/hdfs-site.xml
fi
}
: "etc/hadoop/mapred-site.xml" && {
if ! bash -c "grep mapreduce.framework.nam $HADOOP_HOME/etc/hadoop/mapred-site.xml"; then
cat << 'EOF' > /tmp/mapred-site.xml.property
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>mapreduce.application.classpath</name>
<value>$HADOOP_MAPRED_HOME/share/hadoop/mapreduce/*:$HADOOP_MAPRED_HOME/share/hadoop/mapreduce/lib/*</value>
</property>
EOF
sed -i -e '
/^<configuration>$/r /tmp/mapred-site.xml.property
/^$/d
' $HADOOP_HOME/etc/hadoop/mapred-site.xml
fi
}
: "etc/hadoop/yarn-site.xml" && {
if ! bash -c "grep yarn.nodemanager.aux-service $HADOOP_HOME/etc/hadoop/yarn-site.xml"; then
cat << 'EOF' > /tmp/yarn-site.xml.property
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.resourcemanager.hostname</name>
<value>master</value>
</property>
<property>
<name>yarn.nodemanager.env-whitelist</name>
<value>JAVA_HOME,HADOOP_COMMON_HOME,HADOOP_HDFS_HOME,HADOOP_CONF_DIR,CLASSPATH_PREPEND_DISTCACHE,HADOOP_YARN_HOME,HADOOP_HOME,PATH,LANG,TZ,HADOOP_MAPRED_HOME</value>
</property>
EOF
sed -i -e '
/^<configuration>$/r /tmp/yarn-site.xml.property
/^$/d
' $HADOOP_HOME/etc/hadoop/yarn-site.xml
fi
}
: "Copy to workers" && {
for node in node1 node2; do
scp $HADOOP_HOME/etc/hadoop/* $node:$HADOOP_HOME/etc/hadoop/
done;
}
}
: "Format HDFS" && {
$HADOOP_HOME/bin/hdfs namenode -format -force
}
: "Start daemons" && {
: "HDFS" && {
if ! bash -c "jps | grep NameNode"; then
$HADOOP_HOME/sbin/start-dfs.sh
fi
}
: "YARN" && {
if ! bash -c "jps | grep ResourceManager"; then
$HADOOP_HOME/sbin/start-yarn.sh
fi
}
: "MapReduce JobHistory server" && {
if ! bash -c "jps | grep JobHistoryServer" ; then
$HADOOP_HOME/bin/mapred --daemon start historyserver
fi
}
}
: "Setup Hive" && {
: "Download Hive (3.x)" && {
if ! bash -c "ls | grep apache-hive-*.tar.gz" ; then
wget http://ftp.riken.jp/net/apache/hive/hive-3.1.3/apache-hive-3.1.3-bin.tar.gz -nv
tar xf apache-hive-3.1.3-bin.tar.gz
fi
}
: "Set environment variables to shell RC file" && {
if ! bash -c "grep HIVE_HOME ~/.bashrc" ; then
cat << 'EOF' >> ~/.bashrc
export HIVE_HOME=~/apache-hive-3.1.3-bin
export PATH=$HIVE_HOME/bin:$PATH
EOF
fi
set +u
source ~/.bashrc
set -u
}
: "Setup HDFS working directory" && {
$HADOOP_HOME/bin/hadoop fs -mkdir -p /user/hive/warehouse
$HADOOP_HOME/bin/hadoop fs -chmod g+w /user/hive/warehouse
$HADOOP_HOME/bin/hadoop fs -mkdir -p /tmp
$HADOOP_HOME/bin/hadoop fs -chmod g+w /tmp
$HIVE_HOME/bin/schematool -dbType derby -initSchema --verbose
}
}
#!/bin/sh
set -euxo pipefail
HOSTNAME=$1
: "Set hostname" && {
sudo hostname $HOSTNAME
echo $HOSTNAME | sudo tee /etc/hostname > /dev/null
}
: "Edit hosts file" && {
if ! bash -c "grep 192.168.56.10 /etc/hosts" ; then
cat << 'EOF' | sudo tee -a /etc/hosts > /dev/null
192.168.56.10 master
192.168.56.11 node1
192.168.56.12 node2
EOF
fi
}
: "Install common packages" && {
sudo yum -y install java-1.8.0-openjdk-devel openssh-clients rsync wget
}
: "Set environment variables to shell RC file" && {
if ! bash -c "grep JAVA_HOME /etc/hosts" ; then
cat << 'EOF' >> ~/.bashrc
export JAVA_HOME=/usr/lib/jvm/jre-1.8.0-openjdk
export HADOOP_HOME=~/hadoop-3.3.6
export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop
export PATH=$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$JAVA_HOME/bin:$PATH
EOF
fi
}
# -*- mode: ruby -*-
# vi: set ft=ruby :
# Vagrantfile API/syntax version. Don't touch unless you know what you're doing!
VAGRANTFILE_API_VERSION = "2"
Vagrant.configure(VAGRANTFILE_API_VERSION) do |config|
config.vm.define :master, primary: true do |master|
master.vm.box = "bento/centos-7"
master.vm.network "private_network", ip: "192.168.56.10"
master.vm.provider "virtualbox" do |vb|
vb.memory = "8192"
end
master.vm.provision "shell", privileged: false do |s|
s.path = "master.sh"
s.args = "master"
end
end
(1..2).each {|i|
node_name = "node" + i.to_s
config.vm.define node_name do |node|
node.vm.box = "bento/centos-7"
node.vm.network "private_network", ip: "192.168.56.1" + i.to_s
node.vm.provider "virtualbox" do |vb|
vb.memory = "4096"
end
node.vm.provision "shell", privileged: false do |s|
s.path = "nodes.sh"
s.args = node_name
end
end
}
if Vagrant.has_plugin?("vagrant-proxyconf") && ENV['http_proxy']
config.proxy.http = ENV['http_proxy']
config.proxy.https = ENV['https_proxy']
config.proxy.no_proxy = "localhost,127.0.0.1"
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment