Last active
June 8, 2016 15:54
-
-
Save abajwa-hw/32e0714d9f5f4b8910e1cfc865dbc004 to your computer and use it in GitHub Desktop.
Automated build script to create HAWQ (HDB) single VM sandbox on HDP
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
########################################################################################################################### | |
##HDB on HDP sandbox setup script | |
###Pre-reqs: | |
#- Laptop with at least 10-12 GB RAM (mine has 16 GB) | |
#- ISO image of Centos 6.7 or later downloaded from [here](http://isoredirect.centos.org/centos/6/isos/x86_64/). | |
# - In my case, I used CentOS-6.7-x86_64-bin-DVD1.iso. | |
##### Setup Centos 6.7 or later on VM | |
#- Start a CentOS VM using ISO | |
# - Open VMWare Fusion and click File > New > Install from disc/image > Use another disk | |
# - Select the iso file > Deselect easy install > Customize settings > name: HDB_sandbox | |
# - Under Processors and memory, set to memory to 8048MB and Processors to 4 | |
# - Press Play to start VM | |
#- Go through CentOS install wizard | |
# - Install > Skip > Next > English > US English > Basic Storage Devices > Yes, discard | |
# - Change hostame to sandbox.hortonworks.com and click Configure Nextwork > double click "eth0" | |
# - Select 'Connect automatically' > Apply > Close > Next > America/Los Angeles > Next > password: hadoop > Use anyway > Use all space > Next > Basic server > Next | |
# - Then select "Write changes to disk" and this should install CentOS. Click Reboot once done | |
# - Once VM boots, login as root/hadoop and then run 'ip a' command to find the IP. | |
# - SSH into the VM from your laptop by opening a terminal window and typing 'ssh root@<ip_address_of_VM>'. Login as root/hadoop | |
# - Run the below script. You can customize the functionality by modifying the exports below. Otherwise to run the script with defaults, just run below one-liner and wait for 30 min: | |
# curl -sSL https://gist.github.com/abajwa-hw/32e0714d9f5f4b8910e1cfc865dbc004/raw | sudo -E sh | |
########################################################################################################################### | |
#Customize HDB install bits location | |
export HDB_DOWNLOAD_LOC=https://www.dropbox.com/s/5rzhqxajbd5pq9k/hdb-2.0.0.0-22126.tar.gz | |
export HDB_AMBARI_DOWNLOAD_LOC=https://www.dropbox.com/s/6ik8f3r472f7mzq/hdb-ambari-plugin-2.0.0-448.tar.gz | |
#Customize which services to deploy and other configs | |
export ambari_services="HDFS MAPREDUCE2 YARN ZOOKEEPER HIVE ZEPPELIN SPARK HAWQ PXF" | |
export ambari_password="admin" | |
export cluster_name=hdp | |
export host_count=1 | |
################ | |
# Script start # | |
################ | |
set -e | |
ip=$(/sbin/ip -o -4 addr list eth0 | awk '{print $4}' | cut -d/ -f1) | |
#add /etc/hosts entry | |
echo "${ip} $(hostname -f) $(hostname) sandbox.hortonworks.com" | sudo tee -a /etc/hosts | |
#remove any files from previous install attempts | |
rm -rf /staging | |
rm -rf ~/ambari-bootstrap | |
rm -rf /usr/lib/hue | |
rm -f /etc/init.d/startup_script | |
#install python sh module from pip - used later by sandbox splash screen page | |
yum install -y epel-release | |
yum install -y python-pip | |
pip install sh | |
#install bootstrap - for now use Dan's fork with _ fix | |
yum install -y git python-argparse | |
cd ~ | |
#git clone https://github.com/seanorama/ambari-bootstrap.git | |
git clone https://github.com/dbbaskette/ambari-bootstrap.git | |
#install Ambari | |
echo "Installing Ambari..." | |
install_ambari_server=true ~/ambari-bootstrap/ambari-bootstrap.sh | |
#install zeppelin service defn | |
git clone https://github.com/hortonworks-gallery/ambari-zeppelin-service.git /var/lib/ambari-server/resources/stacks/HDP/2.4/services/ZEPPELIN | |
sed -i.bak '/dependencies for all/a \ "ZEPPELIN_MASTER-START": ["NAMENODE-START", "DATANODE-START"],' /var/lib/ambari-server/resources/stacks/HDP/2.4/role_command_order.json | |
#HAWQ setup | |
echo "Setting up HAWQ service defn..." | |
mkdir /staging | |
chmod a+rx /staging | |
cd /staging | |
wget ${HDB_DOWNLOAD_LOC} | |
wget ${HDB_AMBARI_DOWNLOAD_LOC} | |
tar -xvzf /staging/hdb-2.0.0.0-*.tar.gz -C /staging/ | |
tar -xvzf /staging/hdb-ambari-plugin-2.0.0-*.tar.gz -C /staging/ | |
yum install -y httpd | |
service httpd start | |
chkconfig httpd on | |
cd /staging/hdb* | |
./setup_repo.sh | |
cd /staging/hdb-ambari-plugin* | |
./setup_repo.sh | |
yum install -y hdb-ambari-plugin | |
#restart Ambari | |
echo "Restarting Ambari..." | |
service ambari-server restart | |
service ambari-agent restart | |
sleep 5 | |
curl -u admin:admin -H X-Requested-By:ambari http://localhost:8080/api/v1/hosts | |
#make VM look like sandbox | |
echo "Make VM look like sandbox..." | |
cd ~ | |
wget https://github.com/abajwa-hw/security-workshops/raw/master/scripts/startup-HDB.zip | |
unzip startup-HDB.zip -d / | |
ln -s /usr/lib/hue/tools/start_scripts/startup_script /etc/init.d/startup_script | |
rm -f startup-HDB.zip | |
echo "vmware" > /virtualization | |
#boot in text only and remove rhgb | |
#plymouth-set-default-theme text | |
sed -i "s/rhgb//g" /boot/grub/grub.conf | |
#add startup_script and splash page to startup | |
echo "setterm -blank 0" >> /etc/rc.local | |
echo "/etc/rc.d/init.d/startup_script start" >> /etc/rc.local | |
echo "python /usr/lib/hue/tools/start_scripts/splash.py" >> /etc/rc.local | |
#provide custom configs for HAWQ, and HDFS proxy users | |
echo "Creating custom configs..." | |
cat << EOF > ~/ambari-bootstrap/deploy/configuration-custom.json | |
{ | |
"configurations" : { | |
"hdfs-site": { | |
"dfs.allow.truncate": "true", | |
"dfs.block.access.token.enable": "false", | |
"dfs.block.local-path-access.user": "gpadmin", | |
"dfs.client.read.shortcircuit": "true", | |
"dfs.client.socket-timeout": "300000000", | |
"dfs.client.use.legacy.blockreader.local": "false", | |
"dfs.datanode.handler.count": "60", | |
"dfs.datanode.socket.write.timeout": "7200000", | |
"dfs.namenode.handler.count": "600", | |
"dfs.support.append": "true" | |
}, | |
"hawq-site":{ | |
"hawq_master_address_port":"10432" | |
}, | |
"hawq-env":{ | |
"hawq_password":"gpadmin" | |
}, | |
"core-site": { | |
"hadoop.proxyuser.root.groups": "*", | |
"hadoop.proxyuser.root.hosts": "*", | |
"ipc.client.connection.maxidletime": "3600000", | |
"ipc.client.connect.timeout": "300000", | |
"ipc.server.listen.queue.size": "3300" | |
} | |
} | |
} | |
EOF | |
echo "Starting cluster install..." | |
#generate BP using Ambari recommendation API and deploy HDP | |
cd ~/ambari-bootstrap/deploy/ | |
./deploy-recommended-cluster.bash | |
sleep 5 | |
#wait until cluster deployed | |
source ~/ambari-bootstrap/extras/ambari_functions.sh | |
ambari_configs | |
ambari_wait_request_complete 1 | |
##post install steps | |
cd ~ | |
echo "Update Zeppelin configs for HAWQ" | |
curl -sSL https://gist.githubusercontent.com/abajwa-hw/0fd9772c916fac3fc5912f462168799a/raw | sudo -E python | |
echo "Downloading demo HAWQ demo notebook and restarting Zeppelin" | |
notebook_id=2BQPFYB1X | |
sudo -u zeppelin mkdir /usr/hdp/current/zeppelin-server/lib/notebook/$notebook_id | |
sudo -u zeppelin wget https://gist.githubusercontent.com/abajwa-hw/2f72d084dd1d0c5889783ecf0cd967ab/raw -O /usr/hdp/current/zeppelin-server/lib/notebook/$notebook_id/note.json | |
curl -u admin:$ambari_password -i -H 'X-Requested-By: zeppelin' -X PUT -d '{"RequestInfo": {"context" :"Stop ZEPPELIN via REST"}, "Body": {"ServiceInfo": {"state": "INSTALLED"}}}' http://localhost:8080/api/v1/clusters/$cluster_name/services/ZEPPELIN | |
sleep 30 | |
curl -u admin:$ambari_password -i -H 'X-Requested-By: zeppelin' -X PUT -d '{"RequestInfo": {"context" :"Start ZEPPELIN via REST"}, "Body": {"ServiceInfo": {"state": "STARTED"}}}' http://localhost:8080/api/v1/clusters/$cluster_name/services/ZEPPELIN | |
echo "import data into hive" | |
cd /tmp | |
wget https://raw.githubusercontent.com/abajwa-hw/security-workshops/master/data/sample_07.csv | |
hive -e "CREATE TABLE sample_07 ( | |
code string , | |
description string , | |
total_emp int , | |
salary int ) | |
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' STORED AS TextFile; " | |
hive -e "load data local inpath '/tmp/sample_07.csv' into table sample_07;" | |
echo "import retail sample data from pivotal github" | |
cd /tmp | |
git clone https://github.com/pivotalsoftware/pivotal-samples.git | |
cd /tmp/pivotal-samples/sample-data/ | |
sudo -u hdfs ./load_data_to_HDFS.sh | |
sudo -u hdfs hdfs dfs -chmod -R 777 /retail_demo | |
hive -f /tmp/pivotal-samples/hive/create_hive_tables.sql | |
echo "getting ready to export VM" | |
rm -f /etc/udev/rules.d/*-persistent-net.rules | |
sed -i '/^HWADDR/d' /etc/sysconfig/network-scripts/ifcfg-eth0 | |
sed -i '/^UUID/d' /etc/sysconfig/network-scripts/ifcfg-eth0 | |
echo "reduce VM size" | |
wget http://dev2.hortonworks.com.s3.amazonaws.com/stuff/zero_machine.sh | |
chmod +x zero_machine.sh | |
./zero_machine.sh | |
/bin/rm -f zero_machine.sh | |
echo "Configure local connections to HAWQ and reload HAWQ configs.." | |
ip=$(/sbin/ip -o -4 addr list eth0 | awk '{print $4}' | cut -d/ -f1) | |
echo "# File is generated from ${SCRIPT}" > /data/hawq/master/pg_hba.conf | |
echo "local all gpadmin ident" >> /data/hawq/master/pg_hba.conf | |
echo "host all gpadmin 127.0.0.1/28 trust" >> /data/hawq/master/pg_hba.conf | |
echo "host all all ${ip}/32 trust" >> /data/hawq/master/pg_hba.conf | |
set +e | |
sudo -u gpadmin bash -c "source /usr/local/hawq/greenplum_path.sh; hawq stop cluster -a --reload" | |
set -e | |
echo "Install is complete. Access Ambari on port 8080, Zeppelin on port 9995" | |
echo "A demo HAWQ notebook is available at http://VM_ADDRESS:9995/#/notebook/2BQPFYB1X" | |
echo "To take an export of this VM, shutdown and stop the VM first then export the .ova file by running below from on your local laptop (replace HDB_sandbox with the name of your VM). This will export the .ova file in your Mac's Downloads dir" | |
echo "/Applications/VMware\ Fusion.app/Contents/Library/VMware\ OVF\ Tool/ovftool --acceptAllEulas ~/Documents/Virtual\ Machines.localized/HDB_sandbox.vmwarevm/HDB_sandbox.vmx ~/Downloads/HDB_sandbox.ova" | |
exit 0 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment