Skip to content

Instantly share code, notes, and snippets.

@oschrenk
Last active December 30, 2015 09:39
Show Gist options
  • Save oschrenk/7810631 to your computer and use it in GitHub Desktop.
Save oschrenk/7810631 to your computer and use it in GitHub Desktop.
Configuring Hadoop on OS X for the first time
#!/bin/sh
function error_handler() {
echo "Error occurred in script at line: ${1}."
echo "Line exited with status: ${2}"
}
trap 'error_handler ${LINENO} $?' ERR
set -o errexit #
echo ""
echo "Installing Dependencies"
echo "-----------------"
brew install xmlstarlet
echo ""
echo "Installing Hadoop"
echo "-----------------"
brew install hadoop
HADOOP_VERSION=$(brew info hadoop | grep "hadoop:" | grep -o "[0-9].[0-9].[0-9]")
HADOOP_LIBEXEC_PATH=$(brew --prefix)/Cellar/hadoop/$HADOOP_VERSION/libexec
cd $HADOOP_LIBEXEC_PATH
echo ""
echo "Configuring Hadoop"
echo "------------------"
file=$(echo "conf/hadoop-env.sh")
if [ -n "$(grep HADOOP_OPTS conf/hadoop-env.sh | sed 's/^ *//g' | grep -v "^#")" ]; then
echo " WARN HADOOP_OPTS already set, not changing file."
else
echo " Configuring $file."
printf "\n\nHADOOP_OPTS=\"-Djava.net.preferIPv4Stack=true -Djava.security.krb5.realm=OX.AC.UK -Djava.security.krb5.kdc=kdc0.ox.ac.uk:kdc1.ox.ac.uk\"" >> conf/hadoop-env.sh
fi
# core-site.xml
file=$(echo "conf/core-site.xml")
if [ -n "$(xmlstarlet sel -T -t -v "/configuration/property" $file)" ]; then
echo " WARN $file already configured, not changing file."
else
echo " Configuring $file"
xml ed -L -s /configuration -t elem -n property -v "" $file
xml ed -L -s /configuration/property -t elem -n name -v "fs.default.name" $file
xml ed -L -a /configuration/property/name -t elem -n value -v "hdfs://localhost:9000" $file
xml ed -L -s /configuration -t elem -n property2 -v "" $file
xml ed -L -s /configuration/property2 -t elem -n name -v "hadoop.tmp.dir" $file
xml ed -L -a /configuration/property2/name -t elem -n value -v '/Users/${user.name}/.hadoop-store' $file
xml ed -L -r /configuration/property2 -v property $file
fi
# conf/hdfs-site.xml
file=$(echo "conf/hdfs-site.xml")
if [ -n "$(xmlstarlet sel -T -t -v "/configuration/property" $file)" ]; then
echo " WARN $file already configured, not changing file."
else
echo " Configuring $file"
xml ed -L -s /configuration -t elem -n property -v "" $file
xml ed -L -s /configuration/property -t elem -n name -v "dfs.replication" $file
xml ed -L -a /configuration/property/name -t elem -n value -v "1" $file
fi
# conf/mapred-site.xml
file=$(echo "conf/mapred-site.xml")
if [ -n "$(xmlstarlet sel -T -t -v "/configuration/property" $file)" ]; then
echo " WARN $file already configured, not changing file."
else
echo " Configuring $file"
xml ed -L -s /configuration -t elem -n property -v "" $file
xml ed -L -s /configuration/property -t elem -n name -v "mapred.job.tracker" $file
xml ed -L -a /configuration/property/name -t elem -n value -v "localhost:9001" $file
xml ed -L -s /configuration -t elem -n property -v "" $file
xml ed -L -s /configuration/property -t elem -n name -v "mapred.tasktracker.map.tasks.maximum" $file
xml ed -L -a /configuration/property/name -t elem -n value -v "$(sysctl -n hw.physicalcpu)" $file
fi
echo ""
echo "Configuring SSH"
echo "---------------"
echo " Hadoop manages all its nodes using ssh"
echo ""
echo " Enabling Remote Login on OS X"
sudo systemsetup -setremotelogin on
SSH_KEY_FILENAME=$HOME/.ssh/id_rsa_hadoop
if [ -f $SSH_KEY_FILENAME ]; then
echo " WARN $SSH_KEY_FILENAME already exists"
else
echo " Creating passwordless SSH key for localhost at $SSH_KEY_FILENAME"
ssh-keygen -t rsa -f $SSH_KEY_FILENAME -P ""
fi
if [ -n "$(grep $(cat $SSH_KEY_FILENAME.pub | awk '{print $2}') ~/.ssh/authorized_keys)" ]; then
echo " INFO SSH key already authorized"
else
echo " Authorizing SSH key"
cat $SSH_KEY_FILENAME.pub >> ~/.ssh/authorized_keys
fi
#!/bin/sh
if [ -n "$(grep localhost ~/.ssh/config)" ]; then
echo " WARN localhost is already configured in SSH config"
else
echo " Changing SSH config to use $SSH_KEY_FILENAME for localhost"
CURRENT_USER=$(whoami)
printf "\n\nHost localhost\n\tUser $CURRENT_USER\n\tIdentityFile $SSH_KEY_FILENAME\n" >> $HOME/.ssh/config
fi
echo ""
echo "Formatting HDFS"
echo "---------------"
echo ""
bin/hadoop namenode -format
echo " Creating /user/$(whoami) directory so that hadoop fs -ls works as expected"
hadoop fs -mkdir /user
hadoop fs -mkdir /user/$(whoami)
echo ""
echo "Launching Hadoop"
echo "---------------"
echo ""
bin/start-all.sh
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment