Skip to content

Instantly share code, notes, and snippets.

@abajwa-hw
Last active June 8, 2016 15:54
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 2 You must be signed in to fork a gist
  • Save abajwa-hw/32e0714d9f5f4b8910e1cfc865dbc004 to your computer and use it in GitHub Desktop.
Save abajwa-hw/32e0714d9f5f4b8910e1cfc865dbc004 to your computer and use it in GitHub Desktop.
Automated build script to create HAWQ (HDB) single VM sandbox on HDP
###########################################################################################################################
##HDB on HDP sandbox setup script
###Pre-reqs:
#- Laptop with at least 10-12 GB RAM (mine has 16 GB)
#- ISO image of Centos 6.7 or later downloaded from [here](http://isoredirect.centos.org/centos/6/isos/x86_64/).
# - In my case, I used CentOS-6.7-x86_64-bin-DVD1.iso.
##### Setup Centos 6.7 or later on VM
#- Start a CentOS VM using ISO
# - Open VMWare Fusion and click File > New > Install from disc/image > Use another disk
# - Select the iso file > Deselect easy install > Customize settings > name: HDB_sandbox
# - Under Processors and memory, set to memory to 8048MB and Processors to 4
# - Press Play to start VM
#- Go through CentOS install wizard
# - Install > Skip > Next > English > US English > Basic Storage Devices > Yes, discard
# - Change hostame to sandbox.hortonworks.com and click Configure Nextwork > double click "eth0"
# - Select 'Connect automatically' > Apply > Close > Next > America/Los Angeles > Next > password: hadoop > Use anyway > Use all space > Next > Basic server > Next
# - Then select "Write changes to disk" and this should install CentOS. Click Reboot once done
# - Once VM boots, login as root/hadoop and then run 'ip a' command to find the IP.
# - SSH into the VM from your laptop by opening a terminal window and typing 'ssh root@<ip_address_of_VM>'. Login as root/hadoop
# - Run the below script. You can customize the functionality by modifying the exports below. Otherwise to run the script with defaults, just run below one-liner and wait for 30 min:
# curl -sSL https://gist.github.com/abajwa-hw/32e0714d9f5f4b8910e1cfc865dbc004/raw | sudo -E sh
###########################################################################################################################
#Customize HDB install bits location
export HDB_DOWNLOAD_LOC=https://www.dropbox.com/s/5rzhqxajbd5pq9k/hdb-2.0.0.0-22126.tar.gz
export HDB_AMBARI_DOWNLOAD_LOC=https://www.dropbox.com/s/6ik8f3r472f7mzq/hdb-ambari-plugin-2.0.0-448.tar.gz
#Customize which services to deploy and other configs
export ambari_services="HDFS MAPREDUCE2 YARN ZOOKEEPER HIVE ZEPPELIN SPARK HAWQ PXF"
export ambari_password="admin"
export cluster_name=hdp
export host_count=1
################
# Script start #
################
set -e
ip=$(/sbin/ip -o -4 addr list eth0 | awk '{print $4}' | cut -d/ -f1)
#add /etc/hosts entry
echo "${ip} $(hostname -f) $(hostname) sandbox.hortonworks.com" | sudo tee -a /etc/hosts
#remove any files from previous install attempts
rm -rf /staging
rm -rf ~/ambari-bootstrap
rm -rf /usr/lib/hue
rm -f /etc/init.d/startup_script
#install python sh module from pip - used later by sandbox splash screen page
yum install -y epel-release
yum install -y python-pip
pip install sh
#install bootstrap - for now use Dan's fork with _ fix
yum install -y git python-argparse
cd ~
#git clone https://github.com/seanorama/ambari-bootstrap.git
git clone https://github.com/dbbaskette/ambari-bootstrap.git
#install Ambari
echo "Installing Ambari..."
install_ambari_server=true ~/ambari-bootstrap/ambari-bootstrap.sh
#install zeppelin service defn
git clone https://github.com/hortonworks-gallery/ambari-zeppelin-service.git /var/lib/ambari-server/resources/stacks/HDP/2.4/services/ZEPPELIN
sed -i.bak '/dependencies for all/a \ "ZEPPELIN_MASTER-START": ["NAMENODE-START", "DATANODE-START"],' /var/lib/ambari-server/resources/stacks/HDP/2.4/role_command_order.json
#HAWQ setup
echo "Setting up HAWQ service defn..."
mkdir /staging
chmod a+rx /staging
cd /staging
wget ${HDB_DOWNLOAD_LOC}
wget ${HDB_AMBARI_DOWNLOAD_LOC}
tar -xvzf /staging/hdb-2.0.0.0-*.tar.gz -C /staging/
tar -xvzf /staging/hdb-ambari-plugin-2.0.0-*.tar.gz -C /staging/
yum install -y httpd
service httpd start
chkconfig httpd on
cd /staging/hdb*
./setup_repo.sh
cd /staging/hdb-ambari-plugin*
./setup_repo.sh
yum install -y hdb-ambari-plugin
#restart Ambari
echo "Restarting Ambari..."
service ambari-server restart
service ambari-agent restart
sleep 5
curl -u admin:admin -H X-Requested-By:ambari http://localhost:8080/api/v1/hosts
#make VM look like sandbox
echo "Make VM look like sandbox..."
cd ~
wget https://github.com/abajwa-hw/security-workshops/raw/master/scripts/startup-HDB.zip
unzip startup-HDB.zip -d /
ln -s /usr/lib/hue/tools/start_scripts/startup_script /etc/init.d/startup_script
rm -f startup-HDB.zip
echo "vmware" > /virtualization
#boot in text only and remove rhgb
#plymouth-set-default-theme text
sed -i "s/rhgb//g" /boot/grub/grub.conf
#add startup_script and splash page to startup
echo "setterm -blank 0" >> /etc/rc.local
echo "/etc/rc.d/init.d/startup_script start" >> /etc/rc.local
echo "python /usr/lib/hue/tools/start_scripts/splash.py" >> /etc/rc.local
#provide custom configs for HAWQ, and HDFS proxy users
echo "Creating custom configs..."
cat << EOF > ~/ambari-bootstrap/deploy/configuration-custom.json
{
"configurations" : {
"hdfs-site": {
"dfs.allow.truncate": "true",
"dfs.block.access.token.enable": "false",
"dfs.block.local-path-access.user": "gpadmin",
"dfs.client.read.shortcircuit": "true",
"dfs.client.socket-timeout": "300000000",
"dfs.client.use.legacy.blockreader.local": "false",
"dfs.datanode.handler.count": "60",
"dfs.datanode.socket.write.timeout": "7200000",
"dfs.namenode.handler.count": "600",
"dfs.support.append": "true"
},
"hawq-site":{
"hawq_master_address_port":"10432"
},
"hawq-env":{
"hawq_password":"gpadmin"
},
"core-site": {
"hadoop.proxyuser.root.groups": "*",
"hadoop.proxyuser.root.hosts": "*",
"ipc.client.connection.maxidletime": "3600000",
"ipc.client.connect.timeout": "300000",
"ipc.server.listen.queue.size": "3300"
}
}
}
EOF
echo "Starting cluster install..."
#generate BP using Ambari recommendation API and deploy HDP
cd ~/ambari-bootstrap/deploy/
./deploy-recommended-cluster.bash
sleep 5
#wait until cluster deployed
source ~/ambari-bootstrap/extras/ambari_functions.sh
ambari_configs
ambari_wait_request_complete 1
##post install steps
cd ~
echo "Update Zeppelin configs for HAWQ"
curl -sSL https://gist.githubusercontent.com/abajwa-hw/0fd9772c916fac3fc5912f462168799a/raw | sudo -E python
echo "Downloading demo HAWQ demo notebook and restarting Zeppelin"
notebook_id=2BQPFYB1X
sudo -u zeppelin mkdir /usr/hdp/current/zeppelin-server/lib/notebook/$notebook_id
sudo -u zeppelin wget https://gist.githubusercontent.com/abajwa-hw/2f72d084dd1d0c5889783ecf0cd967ab/raw -O /usr/hdp/current/zeppelin-server/lib/notebook/$notebook_id/note.json
curl -u admin:$ambari_password -i -H 'X-Requested-By: zeppelin' -X PUT -d '{"RequestInfo": {"context" :"Stop ZEPPELIN via REST"}, "Body": {"ServiceInfo": {"state": "INSTALLED"}}}' http://localhost:8080/api/v1/clusters/$cluster_name/services/ZEPPELIN
sleep 30
curl -u admin:$ambari_password -i -H 'X-Requested-By: zeppelin' -X PUT -d '{"RequestInfo": {"context" :"Start ZEPPELIN via REST"}, "Body": {"ServiceInfo": {"state": "STARTED"}}}' http://localhost:8080/api/v1/clusters/$cluster_name/services/ZEPPELIN
echo "import data into hive"
cd /tmp
wget https://raw.githubusercontent.com/abajwa-hw/security-workshops/master/data/sample_07.csv
hive -e "CREATE TABLE sample_07 (
code string ,
description string ,
total_emp int ,
salary int )
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' STORED AS TextFile; "
hive -e "load data local inpath '/tmp/sample_07.csv' into table sample_07;"
echo "import retail sample data from pivotal github"
cd /tmp
git clone https://github.com/pivotalsoftware/pivotal-samples.git
cd /tmp/pivotal-samples/sample-data/
sudo -u hdfs ./load_data_to_HDFS.sh
sudo -u hdfs hdfs dfs -chmod -R 777 /retail_demo
hive -f /tmp/pivotal-samples/hive/create_hive_tables.sql
echo "getting ready to export VM"
rm -f /etc/udev/rules.d/*-persistent-net.rules
sed -i '/^HWADDR/d' /etc/sysconfig/network-scripts/ifcfg-eth0
sed -i '/^UUID/d' /etc/sysconfig/network-scripts/ifcfg-eth0
echo "reduce VM size"
wget http://dev2.hortonworks.com.s3.amazonaws.com/stuff/zero_machine.sh
chmod +x zero_machine.sh
./zero_machine.sh
/bin/rm -f zero_machine.sh
echo "Configure local connections to HAWQ and reload HAWQ configs.."
ip=$(/sbin/ip -o -4 addr list eth0 | awk '{print $4}' | cut -d/ -f1)
echo "# File is generated from ${SCRIPT}" > /data/hawq/master/pg_hba.conf
echo "local all gpadmin ident" >> /data/hawq/master/pg_hba.conf
echo "host all gpadmin 127.0.0.1/28 trust" >> /data/hawq/master/pg_hba.conf
echo "host all all ${ip}/32 trust" >> /data/hawq/master/pg_hba.conf
set +e
sudo -u gpadmin bash -c "source /usr/local/hawq/greenplum_path.sh; hawq stop cluster -a --reload"
set -e
echo "Install is complete. Access Ambari on port 8080, Zeppelin on port 9995"
echo "A demo HAWQ notebook is available at http://VM_ADDRESS:9995/#/notebook/2BQPFYB1X"
echo "To take an export of this VM, shutdown and stop the VM first then export the .ova file by running below from on your local laptop (replace HDB_sandbox with the name of your VM). This will export the .ova file in your Mac's Downloads dir"
echo "/Applications/VMware\ Fusion.app/Contents/Library/VMware\ OVF\ Tool/ovftool --acceptAllEulas ~/Documents/Virtual\ Machines.localized/HDB_sandbox.vmwarevm/HDB_sandbox.vmx ~/Downloads/HDB_sandbox.ova"
exit 0
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment