Last active
December 22, 2015 04:18
-
-
Save tzolov/6415996 to your computer and use it in GitHub Desktop.
Vagrantfile for Pivotal HD 1.0.1 CE + HAWQ 1.1.0-8
Defines the type of VMs required and how to configure and provision those machines * pcc_provision.sh - Provisioning is applied only to the pcc VM. It follows the PHD_10_Guid.pdf instruction to install PCC on the pcc VM and PHD on the phd[1..3] VMs. Annotations in the script can provide addition…
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# All configuration and installation steps applied here follow the PHD installation guide: | |
# http://bitcast-a.v1.o1.sjc1.bitgravity.com/greenplum/pivotal-docs/PHD_10_Guide.pdf | |
# | |
# Note: The default pwd is /home/vagrant. | |
# | |
# Note: By default, Vagrant shares your project directory (that is the one with the Vagrantfile) | |
# to the /vagrant directory in your guest VMs. | |
# | |
# Note: 'root' is the default user. You can not change the root user in the script. "$sudo su - gpadmin" will not work! | |
# Use the inline syntax instead: "$su - -c "some command" gpadmin". | |
# Sets the cluster name to be used in PCC (Pivotal Control Center) | |
CLUSTER_NAME=PHD_C1 | |
# List of Hadoop services to be deployed with this installation. | |
# Note: Hive is disabled because phd2 and ph3 VMs are configured with just 1GB of memory (Vagrantfile)! To enable Hive | |
# increase the memory of the VMs to 2GB at least (edit Vagrantfile) and then add 'hive' to the $SERVICES variable. | |
# Alternativly if you don't have enough physical memory then you can remove one VM (phd3 for example) and increase the memory | |
# of the remaining VMs. For this you need to remove phd3 definition from the Vagrangfile and from the $MASTER_AND_SLAVES list. | |
SERVICES=hdfs,yarn,pig,zookeeper,hbase,gpxf,hawq | |
# Sets the dns name of the VM used as Master node for all Hadoop services (e.g. namenode, hawq master, jobtracker ...) | |
# Note: Master node is not an Admin node (where PCC runs). By convention the Admin node is the pcc.localdomain. | |
MASTER_NODE=phd1.localdomain | |
# By default the HAWQ master is collocated with the other master services. | |
HAWQ_MASTER=$MASTER_NODE | |
# List of all Pivotal HD nodes in the cluster (including the master node) | |
MASTER_AND_SLAVES=$MASTER_NODE,phd2.localdomain,phd3.localdomain | |
# By default all nodes will be used as Hawq segment hosts. Edit the $HAWQ_SEGMENT_HOSTS variable to change this setup. | |
HAWQ_SEGMENT_HOSTS=$MASTER_AND_SLAVES | |
# Client node defaults to the MASTER node | |
CLIENT_NODE=$MASTER_NODE | |
# Root password required for creating gpadmin users on the cluster nodes. | |
# (By default Vagrant creates 'vagrant' root user on every VM. The password is 'vagrant' - used below) | |
ROOT_PASSWORD=vagrant | |
# Non-empty password to be used for the gpadmin user. Required by the PHD installation. | |
GPADMIN_PASSWORD=gpadmin | |
echo "********************************************************************************" | |
echo "* Prepare PCC (Pivotal Control Center) Perquisites " | |
echo "********************************************************************************" | |
# If missing try to download the Oracle JDK6 installation binary. | |
if [ ! -f /vagrant/jdk-6u45-linux-x64-rpm.bin ]; | |
then | |
cd /vagrant; wget --cookies=off --header "Cookie: gpw_e24=http%3A%2F%2Fwww.oracle.com" "http://download.oracle.com/otn-pub/java/jdk/6u45-b06/jdk-6u45-linux-x64-rpm.bin"; cd ~ | |
fi | |
# Ensure that all installation packages are available in the same folder where the 'vagrant up' is executed. | |
[ ! -f /vagrant/jdk-6u45-linux-x64-rpm.bin ] && ( echo "Can not find jdk-6u45-linux-x64-rpm.bin in the vagrant startup directory"; exit 1 ) | |
[ ! -f /vagrant/PCC-2.0.1.84.121.163.x86_64.tar.gz ] && ( echo "Can not find PCC-2.0.1.84.121.163.x86_64.tar.gz in the vagrant startup directory"; exit 1 ) | |
[ ! -f /vagrant/PHD-1.0.1.0-19.tar.gz ] && ( echo "Can not find PHD-1.0.1.0-19.tar.gz in the vagrant startup directory"; exit 1 ) | |
# <HAWQ> If PADS (e.g. HAWQ) is not available locally, download it from the public distribution. | |
if [ ! -f /vagrant/PADS-1.1.0-8.tar.gz ]; | |
then | |
cd /vagrant; wget http://bitcast-a.v1.o1.sjc1.bitgravity.com/greenplum/pivotal-sw/PADS-1.1.0-8.tar.gz; cd ~ | |
fi | |
# </HAWQ> | |
# Disable security. | |
sestatus; chkconfig iptables off; service iptables stop; service iptables status | |
# Install required packages. | |
yum -y install httpd mod_ssl postgresql postgresql-devel postgresql-server compat-readline5 createrepo sigar nc expect | |
# Install Oracle Java 6 on PCC (e.g Admin) node. | |
cp /vagrant/jdk-6u45-linux-x64-rpm.bin .; chmod a+x ./jdk-6u45-linux-x64-rpm.bin; sudo ./jdk-6u45-linux-x64-rpm.bin; java -version | |
echo "********************************************************************************" | |
echo "* Install PCC (Pivotal Control Center) " | |
echo "********************************************************************************" | |
service commander stop | |
# Copy, uncompress and enter the PCC package folder | |
tar --no-same-owner -xzvf /vagrant/PCC-2.0.1.84.121.163.x86_64.tar.gz --directory /home/vagrant/; cd /home/vagrant/PCC-2.0.1.84 | |
# Install PCC as root using root's login shell (Note: will not work without the '-' option) | |
su - -c "cd /home/vagrant/PCC-2.0.1.84; ./install" root | |
echo "********************************************************************************" | |
echo "* Prepare Hosts for Cluster: $CLUSTER_NAME " | |
echo "********************************************************************************" | |
# Add Sun Java JDK RPM installer binary (jdk-6u45-linux-x64-rpm.bin). Will be installed on all cluster hosts automatically. | |
# The JDK rpm file need to have execute permission. If the JDK version you have downloaded is only available as a .rpm file | |
# (not as a Linux binary installer .bin file) you need to install the JDK by hand on all cluster nodes. | |
su - -c "cp /vagrant/jdk-6u45-linux-x64-rpm.bin .; chmod a+x ./jdk-6u45-linux-x64-rpm.bin" gpadmin | |
echo "Import PHD & PADS packages into the PCC local yum repository ..." | |
# (Required) For installing PHD | |
su - -c "tar -xzf /vagrant/PHD-1.0.1.0-19.tar.gz --directory ~; icm_client import -p ./PHD-1.0.1.0-19" gpadmin | |
# <<HAQW>> | |
# Import HAWQ packages in the local yum repo | |
su - -c "tar -xzf /vagrant/PADS-1.1.0-8.tar.gz --directory ~; icm_client import -p ./PADS-1.1.0-8" gpadmin | |
# <</HAWQ>> | |
# (Optional) Import DataLoader and UUS installation packages | |
#su - -c "tar -xzf /vagrant/PHDTools-1.0.1-19.tar.gz --directory ~; icm_client import -p ./PHDTools-1.0.1-19" gpadmin | |
# Create a hostfile (HostFile.txt) that contains the hostnames of all cluster nodes (except pcc) separated by newlines. | |
# Important: The hostfile should contain all nodes within your cluster EXCEPT the Admin node (e.g. except pcc.localdomain). | |
su - -c "echo $MASTER_AND_SLAVES | tr , '\n' > /home/gpadmin/HostFile.txt" gpadmin | |
echo "Prepare cluster hosts" | |
# Preparing the Cluster Nodes for Pivotal HD | |
# Note: preparehosts expects user inputs like root and gpadmin passwords. The 'expect' tool is used to emulate this user interaction. | |
cat > /home/gpadmin/preparehosts.exp <<EOF | |
#!/usr/bin/expect -f | |
set timeout 100 | |
spawn icm_client preparehosts --hostfile=./HostFile.txt --java=jdk-6u45-linux-x64-rpm.bin --ntp --selinuxoff --iptablesoff | |
expect "Please enter the root user's password. This is required for creating gpadmin users on the cluster nodes:" | |
send -- "$ROOT_PASSWORD\r" | |
expect "Please enter a non-empty password to be used for the gpadmin user:" | |
send -- "$GPADMIN_PASSWORD\r" | |
send -- "\r" | |
expect eof | |
EOF | |
chown gpadmin:gpadmin /home/gpadmin/preparehosts.exp; chmod a+x /home/gpadmin/preparehosts.exp | |
# Prepare all PHD hosts | |
su - -c "expect -f /home/gpadmin/preparehosts.exp" gpadmin | |
# <<HAWQ>> | |
echo "Prepare HAWQ hosts" | |
# Set vm.overcommit_memory to 1 to prevent OOM and other VM issues. | |
sed -i 's/vm.overcommit_memory = 2/vm.overcommit_memory = 1/g' /usr/lib/gphd/gphdmgr/hawq_sys_config/sysctl.conf | |
# Prepare all Hawq hosts. Asumes that HAWQ is deployed on all hosts. | |
su - -c "icm_client prepare-hawq-hosts -f ./HostFile.txt -g /usr/lib/gphd/gphdmgr/hawq_sys_config/" gpadmin | |
# <</HAWQ>> | |
# Verify that all hosts are prepared for installation | |
su - -c "icm_client scanhosts -f ./HostFile.txt" gpadmin | |
echo "********************************************************************************" | |
echo "* Deploy Cluster: $CLUSTER_NAME " | |
echo "********************************************************************************" | |
# Cluster is deployed as gpadmin user! | |
# Pivotal HD manager deploys clusters using input from the cluster configuration directory. This cluster | |
# configuration directory contains files that describes the topology and configuration for the cluster and the | |
# installation procedure. | |
# Fetch the default Cluster Configuration Templates. | |
su - -c "icm_client fetch-template -o ~/ClusterConfigDir" gpadmin | |
# Use the following convention to assign cluster hosts to Hadoop service roles. All changes are | |
# applied to the ~/ClusterConfigDir/clusterConfig.xml file, generated in the previous step. | |
# Note: By default HAWQ_MASTER=MASTER_NODE, CLIENT_NODE=MASTER_NODE and HAWQ_SEGMENT_HOSTS=MASTER_AND_SLAVES | |
# --------------------------------------------------------------------------------------------------------- | |
# Hosts | Services | |
# --------------------------------------------------------------------------------------------------------- | |
# MASTER_NODE | client, namenode, secondarynameonde, yarn-resourcemanager, mapreduce-historyserver, | |
# | hbase-master,hive-server,hive-metastore,hawq-master,hawq-standbymaste,hawq-segment, | |
# | gpxf-agent | |
# | | |
# MASTER_AND_SLAVES | datanode,yarn-nodemanager,zookeeper-server,hbase-regionserver,hawq-segment,gpxf-agent | |
# --------------------------------------------------------------------------------------------------------- | |
# Apply the mapping convention (above) to the default clusterConfig.xml. | |
sed -i "\ | |
s/<clusterName>.*<\/clusterName>/<clusterName>$CLUSTER_NAME<\/clusterName>/g;\ | |
s/<services>.*<\/services>/<services>$SERVICES<\/services>/g;\ | |
s/<client>.*<\/client>/<client>$CLIENT_NODE<\/client>/g;\ | |
s/<namenode>.*<\/namenode>/<namenode>$MASTER_NODE<\/namenode>/g;\ | |
s/<datanode>.*<\/datanode>/<datanode>$MASTER_AND_SLAVES<\/datanode>/g;\ | |
s/<secondarynamenode>.*<\/secondarynamenode>/<secondarynamenode>$MASTER_NODE<\/secondarynamenode>/g;\ | |
s/<yarn-resourcemanager>.*<\/yarn-resourcemanager>/<yarn-resourcemanager>$MASTER_NODE<\/yarn-resourcemanager>/g;\ | |
s/<yarn-nodemanager>.*<\/yarn-nodemanager>/<yarn-nodemanager>$MASTER_AND_SLAVES<\/yarn-nodemanager>/g;\ | |
s/<mapreduce-historyserver>.*<\/mapreduce-historyserver>/<mapreduce-historyserver>$MASTER_NODE<\/mapreduce-historyserver>/g;\ | |
s/<zookeeper-server>.*<\/zookeeper-server>/<zookeeper-server>$MASTER_AND_SLAVES<\/zookeeper-server>/g;\ | |
s/<hbase-master>.*<\/hbase-master>/<hbase-master>$MASTER_NODE<\/hbase-master>/g;\ | |
s/<hbase-regionserver>.*<\/hbase-regionserver>/<hbase-regionserver>$MASTER_AND_SLAVES<\/hbase-regionserver>/g;\ | |
s/<hive-server>.*<\/hive-server>/<hive-server>$MASTER_NODE<\/hive-server>/g;\ | |
s/<hive-metastore>.*<\/hive-metastore>/<hive-metastore>$MASTER_NODE<\/hive-metastore>/g;\ | |
s/<hawq-master>.*<\/hawq-master>/<hawq-master>$HAWQ_MASTER<\/hawq-master>/g;\ | |
s/<hawq-standbymaster>.*<\/hawq-standbymaster>/<hawq-standbymaster>$HAWQ_MASTER<\/hawq-standbymaster>/g;\ | |
s/<hawq-segment>.*<\/hawq-segment>/<hawq-segment>$HAWQ_SEGMENT_HOSTS<\/hawq-segment>/g;" /home/gpadmin/ClusterConfigDir/clusterConfig.xml | |
# Use ICM to perform the deploy | |
su - -c "icm_client deploy -c ~/ClusterConfigDir" gpadmin | |
echo "********************************************************************************" | |
echo "* HAWQ - post deploy configuration " | |
echo "********************************************************************************" | |
# <<HAWQ>> | |
su - -c "echo $HAWQ_SEGMENT_HOSTS | tr , '\n' > /home/gpadmin/HAWQ_Segment_Hosts.txt" gpadmin | |
su - -c "\ | |
scp /home/gpadmin/HAWQ_Segment_Hosts.txt gpadmin@$HAWQ_MASTER:/home/gpadmin/HAWQ_Segment_Hosts.txt;\ | |
ssh gpadmin@$HAWQ_MASTER 'source /usr/local/hawq/greenplum_path.sh;\ | |
/usr/local/hawq/bin/gpssh-exkeys -f /home/gpadmin/HAWQ_Segment_Hosts.txt -p $GPADMIN_PASSWORD'" gpadmin | |
# <</HAWQ>> | |
echo "********************************************************************************" | |
echo "* Start Cluster: $CLUSTER_NAME " | |
echo "********************************************************************************" | |
su - -c "icm_client start -l $CLUSTER_NAME" gpadmin | |
echo "********************************************************************************" | |
echo "* Initialise HAWQ " | |
echo "********************************************************************************" | |
# <<HAWQ>> | |
su - -c "ssh gpadmin@$HAWQ_MASTER '/etc/init.d/hawq init'" gpadmin; | |
# <</HAWQ>> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- mode: ruby -*- | |
# vi: set ft=ruby : | |
# Vagrantfile API/syntax version. Don't touch unless you know what you're doing! | |
VAGRANTFILE_API_VERSION = "2" | |
# Embedded provisioning script common for all cluster hosts and PCC. | |
$phd_provision_script = <<SCRIPT | |
#!/bin/bash | |
# Install the packages required for all cluster and admin nodes | |
yum -y install postgresql-devel nc expect ed ntp | |
# Set timezone and run NTP (set to Europe - Amsterdam time). | |
/etc/init.d/ntpd stop; mv /etc/localtime /etc/localtime.bak; ln -s /usr/share/zoneinfo/Europe/Amsterdam /etc/localtime; /etc/init.d/ntpd start | |
cat > /etc/hosts <<EOF | |
127.0.0.1 localhost.localdomain localhost | |
::1 localhost6.localdomain6 localhost6 | |
10.211.55.100 pcc.localdomain pcc | |
10.211.55.101 phd1.localdomain phd1 | |
10.211.55.102 phd2.localdomain phd2 | |
10.211.55.103 phd3.localdomain phd3 | |
EOF | |
SCRIPT | |
Vagrant.configure(VAGRANTFILE_API_VERSION) do |config| | |
config.vm.define :phd1 do |phd1| | |
phd1.vm.box = "CentOS-6.2-x86_64" | |
phd1.vm.provider :virtualbox do |v| | |
v.name = "phd1" | |
v.customize ["modifyvm", :id, "--memory", "2048"] | |
end | |
phd1.vm.provider "vmware_fusion" do |v| | |
v.name = "phd1" | |
v.vmx["memsize"] = "2048" | |
end | |
phd1.vm.hostname = "phd1.localdomain" | |
phd1.vm.network :private_network, ip: "10.211.55.101" | |
phd1.vm.provision :shell, :inline => $phd_provision_script | |
end | |
config.vm.define :phd2 do |phd2| | |
phd2.vm.box = "CentOS-6.2-x86_64" | |
phd2.vm.provider :virtualbox do |v| | |
v.name = "phd2" | |
v.customize ["modifyvm", :id, "--memory", "1024"] | |
end | |
phd2.vm.provider "vmware_fusion" do |v| | |
v.name = "phd2" | |
v.vmx["memsize"] = "1024" | |
end | |
phd2.vm.hostname = "phd2.localdomain" | |
phd2.vm.network :private_network, ip: "10.211.55.102" | |
phd2.vm.provision :shell, :inline => $phd_provision_script | |
end | |
config.vm.define :phd3 do |phd3| | |
phd3.vm.box = "CentOS-6.2-x86_64" | |
phd3.vm.provider :virtualbox do |v| | |
v.name = "phd3" | |
v.customize ["modifyvm", :id, "--memory", "1024"] | |
end | |
phd3.vm.provider "vmware_fusion" do |v| | |
v.name = "phd3" | |
v.vmx["memsize"] = "1024" | |
end | |
phd3.vm.hostname = "phd3.localdomain" | |
phd3.vm.network :private_network, ip: "10.211.55.103" | |
phd3.vm.provision :shell, :inline => $phd_provision_script | |
end | |
config.vm.define :pcc do |pcc| | |
pcc.vm.box = "CentOS-6.2-x86_64" | |
pcc.vm.provider :virtualbox do |v| | |
v.name = "pcc" | |
v.customize ["modifyvm", :id, "--memory", "352"] | |
end | |
pcc.vm.provider "vmware_fusion" do |v| | |
v.name = "pcc" | |
v.vmx["memsize"] = "352" | |
end | |
pcc.vm.hostname = "pcc.localdomain" | |
pcc.vm.network :private_network, ip: "10.211.55.100" | |
pcc.vm.network :forwarded_port, guest: 5000, host: 5000 | |
pcc.vm.provision :shell, :inline => $phd_provision_script | |
pcc.vm.provision :shell, :path => "pcc_provision.sh" | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment