Skip to content

Instantly share code, notes, and snippets.

@tzolov
Last active December 21, 2015 07:18
Show Gist options
  • Save tzolov/6269716 to your computer and use it in GitHub Desktop.
Save tzolov/6269716 to your computer and use it in GitHub Desktop.
* Vagrantfile for Pivotal HD 1.0.1 CE - Defines the type of VMs required and how to configure and provision those machines * pcc_provision.sh - Provisioning is applied only to the pcc VM. It follows the PHD_10_Guid.pdf instruction to install PCC on the pcc VM and PHD on the phd[1..3] VMs. Annotations in the script can provide additional informat…
#!/bin/bash
# Make sure you have the latest Vagrant and VirtualBox installed.
# 1. Add CentOS-6.2-x86_64 box to your Vagrant configuration.
vagrant box add CentOS-6.2-x86_64 https://s3.amazonaws.com/Vagrant_BaseBoxes/centos-6.2-x86_64-201306301713.box
# 2. Download and uncompress the PHD 1.0.1.
wget "http://bitcast-a.v1.o1.sjc1.bitgravity.com/greenplum/pivotal-sw/phd_1.0.1.0-19_community.tar.gz"
tar -xzf ./phd_1.0.1.0-19_community.tar.gz
cd PHD_1.0.1_CE
# 3. Download oracle jdk6 installation binary.
wget --no-cookies --header "Cookie: gpw_e24=http%3A%2F%2Fwww.oracle.com" "http://download.oracle.com/otn-pub/java/jdk/6u45-b06/jdk-6u45-linux-x64-rpm.bin" --no-check-certificate
# 4. Add Vagratnfile and the provisioning script that creates multi-VM Pivotal HD cluster.
wget “https://gist.github.com/tzolov/6269716/download” -O gist.tar.gz
tar --strip-components=1 -xzf ./gist.tar.gz
# 5. Run Vagrant and see how it builds, deploys and strats the cluster.
vagrant up
#!/bin/bash
# Note: The default pwd is /home/vagrant.
# Note: 'root' is the default user. You can not change the root user in the script. "$sudo su - gpadmin" will not work!
# Use the inline syntax instead: "$su - -c "some command" gpadmin".
CLUSTER_NAME=PHD_C1
SERVICES=hdfs,yarn,pig,zookeeper,hbase,hive
MASTER_NODE=phd1.localdomain
MASTER_AND_SLAVES=$MASTER_NODE,phd2.localdomain,phd3.localdomain
ROOT_PASSWORD=vagrant
GPADMIN_PASSWORD=gpadmin
echo "********************************************************************************"
echo "* Prepare PCC (Pivotal Control Center) Perquisites "
echo "********************************************************************************"
# If missing try to download the Oracle JDK6 installation binary.
if [ ! -f /vagrant/jdk-6u45-linux-x64-rpm.bin ];
then
cd /vagrant; wget -O jdk-6u45-linux-x64-rpm.bin --no-cookies --header "Cookie: gpw_e24=http%3A%2F%2Fwww.oracle.com" "http://download.oracle.com/otn-pub/java/jdk/6u45-b06/jdk-6u45-linux-x64-rpm.bin" --no-check-certificate; cd ~
fi
# Ensure that all installation packages are available in the same folder where the 'vagrant up' is executed.
[ ! -f /vagrant/jdk-6u45-linux-x64-rpm.bin ] && ( echo "Can not find jdk-6u45-linux-x64-rpm.bin in the vagrant startup directory"; exit 1 )
[ ! -f /vagrant/PCC-2.0.1.84.121.163.x86_64.tar.gz ] && ( echo "Can not find PCC-2.0.1.84.121.163.x86_64.tar.gz in the vagrant startup directory"; exit 1 )
[ ! -f /vagrant/PHD-1.0.1.0-19.tar.gz ] && ( echo "Can not find PHD-1.0.1.0-19.tar.gz in the vagrant startup directory"; exit 1 )
# Disable security.
sestatus; chkconfig iptables off; service iptables stop; service iptables status
# Install required packages.
yum -y install httpd mod_ssl postgresql postgresql-devel postgresql-server compat-readline5 createrepo sigar nc expect
# Install Oracle Java 6 on PCC (e.g Admin) node.
cp /vagrant/jdk-6u45-linux-x64-rpm.bin .; chmod a+x ./jdk-6u45-linux-x64-rpm.bin; sudo ./jdk-6u45-linux-x64-rpm.bin; java -version
echo "********************************************************************************"
echo "* Install PCC (Pivotal Control Center) "
echo "********************************************************************************"
service commander stop
# Copy, uncompress and enter the PCC package folder
cp /vagrant/PCC-2.0.1.84.121.163.x86_64.tar.gz* /home/vagrant/; tar --no-same-owner -xzvf ./PCC-2.0.1.84.121.163.x86_64.tar.gz; cd /home/vagrant/PCC-2.0.1.84
# Install PCC as root using root with root's login shell (Note: will not work without the '-' option)
su - -c "cd /home/vagrant/PCC-2.0.1.84; ./install" root
echo "********************************************************************************"
echo "* Prepare Hosts for Cluster: $CLUSTER_NAME "
echo "********************************************************************************"
# Add Sun Java JDK RPM installer binary (jdk-6u45-linux-x64-rpm.bin). Will be installed on all cluster hosts automatically.
# The JDK rpm file need to have execute permission. If the JDK version you have downloaded is only available as a .rpm file
# (not as a Linux binary installer .bin file) you need to install the JDK by hand on all cluster nodes.
su - -c "cp /vagrant/jdk-6u45-linux-x64-rpm.bin .; chmod a+x ./jdk-6u45-linux-x64-rpm.bin" gpadmin
echo "Importing Pivotal HD, PHDTools packages into the PCC local yum repository ..."
# (Required) For installing PHD
su - -c "cp /vagrant/PHD-1.0.1.0-19.tar.gz* .; tar -xzf ./PHD-1.0.1.0-19.tar.gz; icm_client import -p ./PHD-1.0.1.0-19" gpadmin
# (Optional) Import DataLoader and UUS installation packages
#su - -c "cp /vagrant/PHDTools-1.0.1-19.tar.gz* .; tar -xzf ./PHDTools-1.0.1-19.tar.gz; icm_client import -p ./PHDTools-1.0.1-19" gpadmin
# Create a hostfile (HostFile.txt) that contains the hostnames of all your cluster nodes except the Admin node; separated by newlines
# Important: The hostfile should contain all nodes within your cluster EXCEPT the Admin node (e.g. except pcc.localdomain).
su - -c "echo $MASTER_AND_SLAVES | tr , '\n' > /home/gpadmin/HostFile.txt" gpadmin
# Preparing the Cluster Nodes for Pivotal HD
# Note: preparehosts expects user inputs like root and gpadmin passwords. The 'expect' tool is used
# to emulate this user interaction.
cat > /home/gpadmin/preparehosts.exp <<EOF
#!/usr/bin/expect -f
set timeout 100
spawn icm_client preparehosts --hostfile=./HostFile.txt --java=jdk-6u45-linux-x64-rpm.bin --ntp --selinuxoff --iptablesoff
expect "Please enter the root user's password. This is required for creating gpadmin users on the cluster nodes:"
send -- "$ROOT_PASSWORD\r"
expect "Please enter a non-empty password to be used for the gpadmin user:"
send -- "$GPADMIN_PASSWORD\r"
send -- "\r"
expect eof
EOF
chown gpadmin:gpadmin /home/gpadmin/preparehosts.exp; chmod a+x /home/gpadmin/preparehosts.exp
# Prepare all PHD hosts
su - -c "expect -f /home/gpadmin/preparehosts.exp" gpadmin
# Verify that all hosts are prepared for installation
su - -c "icm_client scanhosts -f ./HostFile.txt" gpadmin
echo "********************************************************************************"
echo "* Deploy Cluster: $CLUSTER_NAME "
echo "********************************************************************************"
# Fetching Default Cluster Configuration Templates.
su - -c "icm_client fetch-template -o ~/ClusterConfigDir" gpadmin
# Modify clusterConfig.xml. Assign the Hosts to Hadoop Service Roles following the convention:
#
# MASTER_NODE client, namenode, secondarynameonde, yarn-resourcemanager, mapreduce-historyserver,
# hbase-master, hive-server, hive-metastore, hawq-master, hawq-standbymaste
#
# MASTER_AND_SLAVES datanode, yarn-nodemanager, zookeeper-server, hbase-regionserver, hawq-segment
sed -e "s/<clusterName>.*<\/clusterName>/<clusterName>$CLUSTER_NAME<\/clusterName>/g;\
s/<services>.*<\/services>/<services>$SERVICES<\/services>/g;\
s/<client>.*<\/client>/<client>$MASTER_NODE<\/client>/g;\
s/<namenode>.*<\/namenode>/<namenode>$MASTER_NODE<\/namenode>/g;\
s/<datanode>.*<\/datanode>/<datanode>$MASTER_AND_SLAVES<\/datanode>/g;\
s/<secondarynamenode>.*<\/secondarynamenode>/<secondarynamenode>$MASTER_NODE<\/secondarynamenode>/g;\
s/<yarn-resourcemanager>.*<\/yarn-resourcemanager>/<yarn-resourcemanager>$MASTER_NODE<\/yarn-resourcemanager>/g;\
s/<yarn-nodemanager>.*<\/yarn-nodemanager>/<yarn-nodemanager>$MASTER_AND_SLAVES<\/yarn-nodemanager>/g;\
s/<mapreduce-historyserver>.*<\/mapreduce-historyserver>/<mapreduce-historyserver>$MASTER_NODE<\/mapreduce-historyserver>/g;\
s/<zookeeper-server>.*<\/zookeeper-server>/<zookeeper-server>$MASTER_AND_SLAVES<\/zookeeper-server>/g;\
s/<hbase-master>.*<\/hbase-master>/<hbase-master>$MASTER_NODE<\/hbase-master>/g;\
s/<hbase-regionserver>.*<\/hbase-regionserver>/<hbase-regionserver>$MASTER_AND_SLAVES<\/hbase-regionserver>/g;\
s/<hive-server>.*<\/hive-server>/<hive-server>$MASTER_NODE<\/hive-server>/g;\
s/<hive-metastore>.*<\/hive-metastore>/<hive-metastore>$MASTER_NODE<\/hive-metastore>/g;" /home/gpadmin/ClusterConfigDir/clusterConfig.xml > /home/gpadmin/ClusterConfigDir/clusterConfig.tmp.xml
# Override the default clusterConfig.xml
su - -c "cp -R ~/ClusterConfigDir/clusterConfig.tmp.xml ~/ClusterConfigDir/clusterConfig.xml" gpadmin
# As gpadmin deploy the PHD cluster. Pivotal HD manager deploys clusters using input from the cluster
# configuration directory. This cluster configuration directory contains files that describes the topology
# and configuration for the cluster and the installation procedure.
su - -c "icm_client deploy -c ~/ClusterConfigDir" gpadmin
echo "********************************************************************************"
echo "* Start Cluster: $CLUSTER_NAME "
echo "********************************************************************************"
su - -c "icm_client start -l $CLUSTER_NAME" gpadmin
# -*- mode: ruby -*-
# vi: set ft=ruby :
# Vagrantfile API/syntax version. Don't touch unless you know what you're doing!
VAGRANTFILE_API_VERSION = "2"
# Embedded provisioning script common for all cluster hosts and PCC.
$phd_provision_script = <<SCRIPT
#!/bin/bash
# Set timezone and run NTP (set to Europe - Amsterdam time).
/etc/init.d/ntpd stop; mv /etc/localtime /etc/localtime.bak; ln -s /usr/share/zoneinfo/Europe/Amsterdam /etc/localtime; /etc/init.d/ntpd start
cat > /etc/hosts <<EOF
127.0.0.1 localhost.localdomain localhost
::1 localhost6.localdomain6 localhost6
10.211.55.100 pcc.localdomain pcc
10.211.55.101 phd1.localdomain phd1
10.211.55.102 phd2.localdomain phd2
10.211.55.103 phd3.localdomain phd3
EOF
yum -y install postgresql-devel nc expect
SCRIPT
Vagrant.configure(VAGRANTFILE_API_VERSION) do |config|
config.vm.define :phd1 do |phd1|
phd1.vm.box = "CentOS-6.2-x86_64"
phd1.vm.provider :virtualbox do |v|
v.name = "phd1"
v.customize ["modifyvm", :id, "--memory", "3072"]
end
phd1.vm.provider "vmware_fusion" do |v|
v.name = "phd1"
v.vmx["memsize"] = "4096"
end
phd1.vm.hostname = "phd1.localdomain"
phd1.vm.network :private_network, ip: "10.211.55.101"
phd1.vm.provision :shell, :inline => $phd_provision_script
end
config.vm.define :phd2 do |phd2|
phd2.vm.box = "CentOS-6.2-x86_64"
phd2.vm.provider :virtualbox do |v|
v.name = "phd2"
v.customize ["modifyvm", :id, "--memory", "1024"]
end
phd2.vm.provider "vmware_fusion" do |v|
v.name = "phd2"
v.vmx["memsize"] = "1024"
end
phd2.vm.hostname = "phd2.localdomain"
phd2.vm.network :private_network, ip: "10.211.55.102"
phd2.vm.provision :shell, :inline => $phd_provision_script
end
config.vm.define :phd3 do |phd3|
phd3.vm.box = "CentOS-6.2-x86_64"
phd3.vm.provider :virtualbox do |v|
v.name = "phd3"
v.customize ["modifyvm", :id, "--memory", "1024"]
end
phd3.vm.provider "vmware_fusion" do |v|
v.name = "phd3"
v.vmx["memsize"] = "1024"
end
phd3.vm.hostname = "phd3.localdomain"
phd3.vm.network :private_network, ip: "10.211.55.103"
phd3.vm.provision :shell, :inline => $phd_provision_script
end
config.vm.define :pcc do |pcc|
pcc.vm.box = "CentOS-6.2-x86_64"
pcc.vm.provider :virtualbox do |v|
v.name = "pcc"
v.customize ["modifyvm", :id, "--memory", "350"]
end
pcc.vm.provider "vmware_fusion" do |v|
v.name = "pcc"
v.vmx["memsize"] = "350"
end
pcc.vm.hostname = "pcc.localdomain"
pcc.vm.network :private_network, ip: "10.211.55.100"
pcc.vm.network :forwarded_port, guest: 5000, host: 5000
pcc.vm.provision :shell, :inline => $phd_provision_script
pcc.vm.provision :shell, :path => "pcc_provision.sh"
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment