Skip to content

Instantly share code, notes, and snippets.

@abajwa-hw
abajwa-hw / test-configuration-custom.json
Last active March 1, 2018 03:06
test configuration-custom.json for Ambari 2.6.1/HDP 2.6.4/HDF 3.0.2
{
"configurations": {
"core-site": {
"fs.trash.interval": "4320"
},
"ams-grafana-env": {
"metrics_grafana_password": "StrongPassword"
},
"hdfs-site": {
"dfs.replication" : "1",
@abajwa-hw
abajwa-hw / export_atlas.sh
Last active September 15, 2018 01:04
Export Hive DB artifacts from Atlas to zip
curl -X POST -u admin:admin -H "Content-Type: application/json" -H "Cache-Control: no-cache" -d '{
"itemsToExport": [
{ "typeName": "kafka_topic", "uniqueAttributes": { "name": "FOREX" } },
{ "typeName": "kafka_topic", "uniqueAttributes": { "name": "PRIVATE" } },
{ "typeName": "hbase_table", "uniqueAttributes": { "name": "T_FOREX" } },
{ "typeName": "hbase_table", "uniqueAttributes": { "name": "T_PRIVATE" } },
{ "typeName": "hive_db", "uniqueAttributes": { "name": "finance" } },
{ "typeName": "hive_db", "uniqueAttributes": { "name": "hortoniabank" } },
{ "typeName": "hive_db", "uniqueAttributes": { "name": "cost_savings" } },
{ "typeName": "hive_db", "uniqueAttributes": { "name": "consent_master" } },
@abajwa-hw
abajwa-hw / setup_hdp_hdf_ccdemo.sh
Last active July 25, 2022 12:43
Install latest HDP 2.6 and Nifi (HDF 3.0) and credit card demo on CentOS 6.6
#Sets up single node HDP+Nifi+credit card demo on CentOS 6.6
#Run script below and then use ~/CreditCardTransactionMonitor/startDemoServices.sh to start demo
export ambari_password=${ambari_password:-admin}
export host_count=${host_count:-1} #choose number of nodes
export ambari_services=${ambari_services:-HDFS HIVE PIG SPARK MAPREDUCE2 TEZ YARN ZOOKEEPER ZEPPELIN NIFI HBASE PHOENIX STORM KAFKA AMBARI_INFRA ATLAS} #AMBARI_METRICS can be added post-install
export hdp_ver=${hdp_ver:-2.6}
export hdf_mpack="http://public-repo-1.hortonworks.com/HDF/centos6/3.x/updates/3.0.1.1/tars/hdf_ambari_mp/hdf-ambari-mpack-3.0.1.1-5.tar.gz"
export vm_name=sandbox
export ambari_version=2.5.1.0
@abajwa-hw
abajwa-hw / softlayer_user_onboarding.sh
Last active July 16, 2017 20:29
Script to onboard users for sandbox tutorials on SoftLayer
#Script to onboard users for sandbox tutorials. One liner to download and run:
#curl -sSL https://gist.github.com/abajwa-hw/176aa9f7a43d46068206cbdb96179e00/raw | sudo -E bash
export ambari_pass=${ambari_pass:-'IBMDem0s!'} #ambari password
export refresh_notebooks=${refresh_notebooks:-'false'} #whether to pull latest Zeppelin notebooks from github. Disabling as there may not be access
ambari_url="https://localhost:8080/api/v1"
echo "Turning off iptables..."
sudo service iptables stop
@abajwa-hw
abajwa-hw / generate_hive_tpcds.sh
Last active January 31, 2018 01:25
Generate TPC-DS dataset for Hive
#To run - export any variables then execute below:
#curl -sSL https://gist.github.com/abajwa-hw/2e49079e5d89692b9eace82d0c25c4ab/raw | sudo -E sh
#sudo -u hdfs -s
#cd /home/hdfs
#set java home and size of dataset (in GB). Min is 2
export tpcds_size_gb=${tpcds_size_gb:-2}
export java_home=${java_home:-/usr/java/default}
@abajwa-hw
abajwa-hw / deploy_vanilla_hdp.sh
Last active May 10, 2020 07:01
vanilla HDP install
# Launch Centos/RHEL 7 VM with at least 8 vcpu / 32Gb+ memory / 100Gb disk
#To run - export any variables then execute below:
#export cluster_name=DESIRED_CLUSTER_NAME; curl -sSL https://gist.github.com/abajwa-hw/7794ea013c96f3f41c4a8b10aeeccd4d/raw | sudo -E sh
export cluster_name=${cluster_name:-hdp}
export ambari_password=${ambari_password:-BadPass#1} #Ambari password
export host_count=${host_count:-1} #choose number of nodes
export ambari_services=${ambari_services:-HDFS HIVE PIG MAPREDUCE2 TEZ YARN ZOOKEEPER ZEPPELIN AMBARI_INFRA_SOLR KAFKA SPARK2} #AMBARI_METRICS can be added post-install
export hdp_ver=${hdp_ver:-3.1}
export ambari_version=2.7.3.0
@abajwa-hw
abajwa-hw / setup_trucking_demo.sh
Last active May 2, 2019 01:37
Deploy trucking demo on single node HDP 2.5 (Centos 6)
#assuming HDP 2.5 single node already installed with Hbase/Storm/Kafka started
export demo_root=~
export JAVA_HOME=/usr/java/default
cd ${demo_root}
sudo yum -y groupinstall "Development Tools"
sudo yum install -y wget git
@abajwa-hw
abajwa-hw / install_ambari_hdf.sh
Created October 19, 2016 14:41
Install Ambari and HDF 2.0 mpack on RHEL7
#Based on http://docs.hortonworks.com/HDPDocuments/HDF2/HDF-2.0.0/bk_ambari-installation/content/index.html
export host_os=centos7
export ambari_version=2.4.1.0
export hdf_ambari_mpack_url="http://public-repo-1.hortonworks.com/HDF/${host_os}/2.x/updates/2.0.0.0/tars/hdf_ambari_mp/hdf-ambari-mpack-2.0.0.0-579.tar.gz"
export ambari_repo="http://public-repo-1.hortonworks.com/ambari/${host_os}/2.x/updates/${ambari_version}/ambari.repo"
#1. turn off firewall
systemctl stop firewalld.service
systemctl disable firewalld
@abajwa-hw
abajwa-hw / start_hdp_on_boot.sh
Last active December 21, 2018 02:19
Script to setup startup of HDP on boot
#curl -sSL https://gist.github.com/abajwa-hw/4bf004d0fb065d404760eaeebc15e74e/raw | sudo -E sh
#################################
## write out start_services.sh #
#################################
cat > /root/start_services.sh <<-'EOS'
export ambari_user=demokitadmin
export ambari_pass=BadPass#1
@abajwa-hw
abajwa-hw / flow.xml
Last active November 10, 2016 08:28
Twitter demo flow.xml
<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n<flowController encoding-version=\"1.0\">\n <maxTimerDrivenThreadCount>10</maxTimerDrivenThreadCount>\n <maxEventDrivenThreadCount>5</maxEventDrivenThreadCount>\n <rootGroup>\n <id>7c84501d-d10c-407c-b9f3-1d80e38fe36a</id>\n <name>NiFi Flow</name>\n <position x=\"0.0\" y=\"0.0\"/>\n <comment/>\n <processGroup>\n <id>f5ca9391-0f9a-4e95-35d7-0fc2188fe61b</id>\n <name>Twitter Dashboard - Push tweets into HDFS/Solr</name>\n <position x=\"469.0\" y=\"166.0\"/>\n <comment/>\n <processor>\n <id>f6327477-fb7d-4af0-bae5-88e29c322093</id>\n <name>PutSolrContentStream</name>\n <position x=\"570.1624606053042\" y=\"398.4867144979376\"/>\n <styles/>\n <comment/>\n <class>org.apache.nifi.processors.solr.PutSolrContentStream</class>\n <maxConcurrentTasks>1</maxConcurrentTasks>\n <schedulingPeriod>0 sec</schedulingPeriod>\n <penalizationPeriod>30 sec</penaliza