Last active
August 29, 2015 14:01
-
-
Save tianhuil/6297b234adb0f6cb0fc2 to your computer and use it in GitHub Desktop.
Installing Hadoop on Ubuntu
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Instaling Ubuntu | |
# http://www.cloudera.com/content/cloudera-content/cloudera-docs/CDH4/latest/CDH4-Quick-Start/cdh4qs_topic_3_2.html | |
# While, we're at it, let's install the JDK ... | |
echo "y" | sudo apt-get install openjdk-6-jdk | |
# ... and Yelp's mrjob | |
pip install mrjob | |
# Install maven | |
echo "y" | sudo apt-get install maven | |
# download and install this Debian package from Cloudera. | |
wget http://archive.cloudera.com/cdh4/one-click-install/lucid/amd64/cdh4-repository_1.0_all.deb | |
sudo dpkg -i cdh4-repository_1.0_all.deb | |
# add cloudera public key to repository | |
curl -s http://archive.cloudera.com/cdh4/ubuntu/lucid/amd64/cdh/archive.key | sudo apt-key add - | |
# install hadoop | |
echo "y" | sudo apt-get update | |
echo "y" | sudo apt-get install hadoop-0.20-conf-pseudo | |
# then check that everything is working correctly: | |
dpkg -L hadoop-0.20-conf-pseudo | |
# install scalding | |
echo "y" | sudo apt-get install scala | |
echo "y" | sudo apt-get install git | |
git clone https://github.com/twitter/scalding.git | |
cd scalding | |
./sbt update && ./sbt test && ./sbt assembly | |
# Format the NameNode | |
sudo -u hdfs hdfs namenode -format | |
# Start HDFS | |
for x in `cd /etc/init.d ; ls hadoop-hdfs-*` ; do sudo service $x start ; done | |
# Create the /tmp Directory | |
sudo -u hdfs hadoop fs -mkdir /tmp | |
sudo -u hdfs hadoop fs -chmod -R 1777 /tmp | |
# Create MapReduce system directories: | |
sudo -u hdfs hadoop fs -mkdir -p /var/lib/hadoop-hdfs/cache/mapred/mapred/staging | |
sudo -u hdfs hadoop fs -chmod 1777 /var/lib/hadoop-hdfs/cache/mapred/mapred/staging | |
sudo -u hdfs hadoop fs -chown -R mapred /var/lib/hadoop-hdfs/cache/mapred | |
# Verify HDFS File Structure | |
sudo -u hdfs hadoop fs -ls -R / | |
# Start MapReduce | |
for x in `cd /etc/init.d ; ls hadoop-0.20-mapreduce-*` ; do sudo service $x start ; done | |
# Create a Hadoop User directories | |
export HDUSER=$USER | |
sudo -u hdfs hadoop fs -mkdir /user/$HDUSER | |
sudo -u hdfs hadoop fs -chown $HDUSER /user/$HDUSER | |
# Test Hadoop | |
hadoop fs -mkdir input | |
hadoop fs -put /etc/hadoop/conf/*.xml input | |
hadoop fs -ls input | |
/usr/bin/hadoop jar /usr/lib/hadoop-0.20-mapreduce/hadoop-examples.jar grep input output 'dfs[a-z.]+' | |
hadoop fs -ls | |
hadoop fs -ls output | |
hadoop fs -cat output/part-00000 | head | |
# Since your account is behind a firewall has been cutoff, run these commands from your local machine | |
# they forward digital ocean's localhost to your local one for the job, task tracker and namenode. | |
# then visit localhost:50030 on a browser to see the results ... | |
# ssh -f -N -L 50030:localhost:50030 user@digitalocean | |
# ssh -f -N -L 50060:localhost:50060 user@digitalocean | |
# ssh -f -N -L 50070:localhost:50070 user@digitalocean |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment