Skip to content

Instantly share code, notes, and snippets.

@deanproctor
Last active April 22, 2020 15:22
Show Gist options
  • Save deanproctor/415ac1c68a7808e98ee92828c1dc4044 to your computer and use it in GitHub Desktop.
Save deanproctor/415ac1c68a7808e98ee92828c1dc4044 to your computer and use it in GitHub Desktop.
Bash script to install StreamSets Data Collector on Ubuntu 16.04
#!/usr/bin/env bash
SDC_VER=3.13.0
SDC_ROOT=/opt
SDC_HOME=$SDC_ROOT/streamsets-datacollector
SDC_CONF=/etc/sdc
SDC_LOG=/var/log/sdc
SDC_DATA=/var/lib/sdc
SDC_RESOURCES=/var/lib/sdc-resources
OS_PACKAGES="core azure-keyvault-credentialstore-lib azure-lib crypto-lib jdbc-lib jks-credentialstore-lib jython_2_7-lib orchestrator-lib wholefile-transformer-lib"
ENT_PACKAGES="oracle-lib-1.1.0 snowflake-lib-1.3.0"
echo "Creating installation directories"
sudo mkdir -p $SDC_ROOT $SDC_CONF $SDC_LOG $SDC_DATA $SDC_RESOURCES
echo "Installing dependencies"
sudo apt-get -y -qq update > /dev/null
sudo apt-get -y -qq install openjdk-8-jdk-headless wget ntp > /dev/null
echo "Downloading StreamSets packages..."
for pkg in $OS_PACKAGES
do
echo -e "\t$pkg"
wget -qO - https://archives.streamsets.com/datacollector/$SDC_VER/tarball/streamsets-datacollector-$pkg-$SDC_VER.tgz | sudo tar xzf - -C $SDC_ROOT
done
for pkg in $ENT_PACKAGES
do
echo -e "\t$pkg"
wget -qO - https://archives.streamsets.com/datacollector/enterprise/tarball/enterprise/streamsets-datacollector-$pkg.tgz | sudo tar xzf - -C $SDC_HOME-$SDC_VER
done
echo -e "\tOracle JDBC driver"
sudo wget -qP $SDC_HOME-$SDC_VER/streamsets-libs-extras/streamsets-datacollector-jdbc-lib/lib/ https://dean-bucket.s3.amazonaws.com/ojdbc8.jar
echo "Renaming download directory to $SDC_HOME"
sudo mv $SDC_HOME-$SDC_VER $SDC_HOME
echo "Installing config files to $SDC_CONF"
sudo cp -R $SDC_HOME/etc/* $SDC_CONF
echo "Creating sdc user"
sudo groupadd -r sdc && sudo useradd -r -d $SDC_HOME -g sdc -s /sbin/nologin sdc
echo "Setting sdc user permissions"
sudo chown -R sdc:sdc $SDC_HOME $SDC_CONF $SDC_LOG $SDC_DATA $SDC_RESOURCES
echo "Updating SDC configuration settings..."
echo -e "\tUpdating service directories"
sudo sed -i "s@/opt/streamsets-datacollector@$SDC_HOME@" $SDC_HOME/systemd/sdc.service
sudo sed -i "s@/etc/sdc@$SDC_CONF@" $SDC_HOME/systemd/sdc.service
sudo sed -i "s@/var/log\/sdc@$SDC_LOG@" $SDC_HOME/systemd/sdc.service
sudo sed -i "s@/var/lib/sdc@$SDC_DATA@" $SDC_HOME/systemd/sdc.service
echo -e "\tEnabling HTTPS"
sudo sed -i 's/http.port=18630/http.port=-1/' $SDC_CONF/sdc.properties
sudo sed -i 's/https.port=-1/https.port=18630/' $SDC_CONF/sdc.properties
echo -e "\tIncreasing batch and parser limits"
sudo sed -i 's/production.maxBatchSize=1000/production.maxBatchSize=100000/' $SDC_CONF/sdc.properties
sudo sed -i 's/#parser.limit=5335040/parser.limit=5335040/' $SDC_CONF/sdc.properties
TOTAL_MEM=$(free -m | grep Mem | awk '{print $2}')
JAVA_MEM=$(awk "BEGIN {printf \"%.0f\",$TOTAL_MEM*.8}")
echo -e "\tIncreasing JVM memory to ${JAVA_MEM}M"
sudo sed -i "s/-Xmx1024m -Xms1024m/-Xmx${JAVA_MEM}m -Xms${JAVA_MEM}m/" $SDC_HOME/libexec/sdc-env.sh
if [ $TOTAL_MEM -gt 8192 ]
then
echo -e "\tChanging garbage collector to G1GC"
sudo sed -i 's/-XX:+UseConcMarkSweepGC -XX:+UseParNewGC/-XX:+UseG1GC/' $SDC_HOME/libexec/sdc-env.sh
fi
echo -e "\tInstalling SDC CLI options"
echo "export SDC_CLI_JAVA_OPTS=\"-Djavax.net.ssl.trustStore=$SDC_CONF/keystore.jks -Djavax.net.ssl.trustStorePassword=password\"" >> ~/.bash_profile
echo "PATH=\$PATH:$SDC_HOME/bin" >> ~/.bash_profile
source ~/.bash_profile
echo "Setting system timezone to UTC"
sudo timedatectl set-timezone UTC
echo "Installing sdc service files"
sudo cp $SDC_HOME/systemd/sdc.service /etc/systemd/system/sdc.service
sudo cp $SDC_HOME/systemd/sdc.socket /etc/systemd/system/sdc.socket
sudo systemctl daemon-reload
echo "Starting ntp service"
sudo /lib/systemd/systemd-sysv-install enable ntp
sudo systemctl -q start ntp
echo "Starting sdc service"
sudo systemctl -q start sdc
sudo systemctl -q enable sdc
until [ $($SDC_HOME/bin/streamsets cli -U https://localhost:18630 ping | grep -c info) -eq 1 ]
do
echo -e "\tWaiting for sdc to start..."
sleep 5
done
echo "Done"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment