Last active Dec 29, 2015
notes on getting hadoop (both single node and a cluster) going on Ubuntu LTS; there are just notes to myself and have some gaps in them. Not meant to be a tutorial or a walkthrough.
sudo apt-get update
sudo apt-get install vim
# generate keys and copy them over
ssh-copy-id -i vagrant@hnclient # move node ssh key to the client
# update /etc/hosts on each node to know at least the master; the master about the slave nodes
# make sure master can ssh into localost
cat .ssh/ >> .ssh/authorized_keys
# re-read:
# on both master & slave make sure that /etc/hosts is rocking
# and that on master, when you do `netstat -tuplen` that the namenode is bound to the "external" ip and not loopback / 127..
sudo apt-get install openjdk-7-jdk <<-EOF
tar zxvf hadoop-1.2.1-bin.tar.gz
# edit bash rc file
export HADOOP_PREFIX=/usr/local/hadoop
sudo vim /usr/local/hadoop/conf/ # put in the following...
export JAVA_HOME=/usr/lib/jvm/java-7-openjdk-amd64/
export # disable IPv6 because it doesn't place nice w/ Hadoop
vim /usr/local/hadoop/conf/core-site.xml # put in the following
# watch the xml. don't accidentally call `name` `key`
# make sure the host and port names are mispelled ie hname for hnname
sudo vim /usr/local/hadoop/conf/mapred-site.xml
sudo rm -rf /usr/local/hadoop/tmp
sudo rm -rf /usr/local/hadoop/logs
sudo mkdir /usr/local/hadoop/tmp
sudo mkdir /usr/local/hadoop/logs
sudo chown vagrant:vagrant /usr/local/hadoop/tmp
sudo chown vagrant:vagrant /usr/local/hadoop/logs
hadoop namenode -format
# if you get errors because it can't open logs...
sudo mkdir /usr/local/hadoop/logs/
sudo chown vagrant /usr/local/hadoop/logs/
hadoop namenode -format # refresh w/ this
# visit the master server gui at masterhost:50070
update masters
updates slaves
# test w/ hadoop jar /usr/local/hadoop/hadoop-examples-1.2.1.jar wordcount /data/words.txt /data/results
# hadoop dfs -cat /fubar/results/part-r-00000
