notes on getting hadoop (both single node and a cluster) going on Ubuntu LTS; there are just notes to myself and have some gaps in them. Not meant to be a tutorial or a walkthrough.
sudo apt-get update | |
sudo apt-get install vim | |
# generate keys and copy them over | |
ssh-copy-id -i id_rsa.pub vagrant@hnclient # move node ssh key to the client | |
# update /etc/hosts on each node to know at least the master; the master about the slave nodes | |
# make sure master can ssh into localost | |
cat .ssh/id_rsa.pub >> .ssh/authorized_keys | |
# re-read: http://stackoverflow.com/questions/8872807/hadoop-datanodes-cannot-find-namenode | |
# on both master & slave make sure that /etc/hosts is rocking | |
# and that on master, when you do `netstat -tuplen` that the namenode is bound to the "external" ip and not loopback / 127.. | |
sudo apt-get install openjdk-7-jdk <<-EOF | |
yes | |
EOF | |
wget http://mirror.nexcess.net/apache/hadoop/common/hadoop-1.2.1/hadoop-1.2.1-bin.tar.gz | |
tar zxvf hadoop-1.2.1-bin.tar.gz | |
# edit bash rc file | |
export HADOOP_PREFIX=/usr/local/hadoop | |
export PATH=$PATH:$HADOOP_PREFIX/bin | |
sudo vim /usr/local/hadoop/conf/hadoop-env.sh # put in the following... | |
export JAVA_HOME=/usr/lib/jvm/java-7-openjdk-amd64/ | |
export HADOOP_OPTS=-Djava.net.preferIPv4Stack=true # disable IPv6 because it doesn't place nice w/ Hadoop | |
vim /usr/local/hadoop/conf/core-site.xml # put in the following | |
<<-HERE | |
<configuration> | |
<property> | |
<name>fs.default.name</name> | |
<value>hdfs://hnname:10001</value> | |
</property> | |
<property> | |
<name>hadoop.tmp.dir</name> | |
<value>/usr/local/hadoop/tmp</value> | |
</property> | |
</configuration> | |
HERE | |
# watch the xml. don't accidentally call `name` `key` | |
# make sure the host and port names are mispelled ie hname for hnname | |
sudo vim /usr/local/hadoop/conf/mapred-site.xml | |
<<-HERE | |
<configuration> | |
<property> | |
<name>mapred.job.tracker</name> | |
<value>hnname:10002</value> | |
</property> | |
</configuration> | |
HERE | |
sudo rm -rf /usr/local/hadoop/tmp | |
sudo rm -rf /usr/local/hadoop/logs | |
sudo mkdir /usr/local/hadoop/tmp | |
sudo mkdir /usr/local/hadoop/logs | |
sudo chown vagrant:vagrant /usr/local/hadoop/tmp | |
sudo chown vagrant:vagrant /usr/local/hadoop/logs | |
hadoop namenode -format | |
start-all.sh | |
# if you get errors because it can't open logs... | |
sudo mkdir /usr/local/hadoop/logs/ | |
sudo chown vagrant /usr/local/hadoop/logs/ | |
hadoop namenode -format # refresh w/ this | |
start-all.sh | |
# visit the master server gui at masterhost:50070 | |
update masters | |
updates slaves | |
# test w/ hadoop jar /usr/local/hadoop/hadoop-examples-1.2.1.jar wordcount /data/words.txt /data/results | |
# hadoop dfs -cat /fubar/results/part-r-00000 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment