Last active
June 12, 2017 19:50
-
-
Save kwarodom/9638402e3eecd6c91d1cc6d447e5d37a to your computer and use it in GitHub Desktop.
This is the instruction how to install hadoop on Ubuntu > 16.04 using in DEPA Big Data Training Class
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
#This is the installation file for hadoop: HDFS, Yarn, MapReduce on Ubuntu 17.04 64-bit | |
#WARNING!!!! DO NOT RUN THIS FILE | |
#update package | |
sudo apt-get update | |
#setup SSH | |
sudo apt-get install –y openssh-server | |
ssh-keygen –t dsa –P '' | |
cat .ssh/id_dsa.pub >> .ssh/authorized_key | |
ssh localhost | |
#setup JAVA https://www.digitalocean.com/community/tutorials/how-to-install-java-with-apt-get-on-ubuntu-16-04 | |
sudo add-apt-repository ppa:webupd8team/java | |
sudo apt-get update | |
sudo apt-get install oracle-java8-installer | |
#managing JAVA | |
sudo update-alternatives --config java | |
#Add environment variable at .bashrc | |
export JAVA_HOME=/usr/lib/jvm/java-8-oracle | |
export PATH=$PATH:$JAVA_HOME/bin | |
#setup hadoop | |
#1.download hadoop | |
wget http://mirror.cc.columbia.edu/pub/software/apache/hadoop/common/hadoop-2.7.2/hadoop-2.7.2.tar.gz | |
#2.create Folder for Hadoop | |
sudo mkdir –p /usr/local/hadoop | |
sudo chown -R arit:arit /usr/local/hadoop | |
sudo mkdir /var/log/hadoop | |
sudo chown –R arit:arit /var/log/hadoop | |
sudo mkdir –p /var/hadoop_data | |
sudo mkdir –p /var/hadoop_data/namenode | |
sudo mkdir –p /var/hadoop_data/datanode | |
sudo chown -R arit:arit /var/hadoop_data | |
#3.install hadoop | |
tar –xvf hadoop-2.7.2.tar.gz | |
sudo mv ./hadoop-2.7.2/* /usr/local/hadoop | |
sudo chown -R arit:arit /usr/local/hadoop | |
#4.setup hadoop variable environment | |
cd ~/ | |
nano .bashrc | |
#TODO this has to be added to the file manually, though with bash script can be done | |
# export HADOOP_HOME=/usr/local/hadoop | |
# export PATH=$PATH:$HADOOP_HOME/bin | |
# export PATH=$PATH:$HADOOP_HOME/sbin | |
#5. edit hadoop-env.sh script to set java env | |
cd /usr/local/hadoop/etc/hadoop | |
nano hadoop-env.sh | |
#TODO this has to be added to the file manually, though with bash script can be done | |
# export JAVA_HOME=/usr/lib/jvm/java-8-oracle | |
# export HADOOP_LOG_DIR=/var/log/hadoop | |
#6. edit yarn-env.sh script to set yarn log dir | |
nano yarn-env.sh | |
#TODO this has to be added to the file manually, though with bash script can be done | |
# export YARN_LOG_DIR=/var/log/hadoop | |
#7. format hdfs file system | |
hdfs namenode -format | |
#8. start hadoop (at /usr/local/hadoop/etc/hadoop) | |
#8.1 start HDFS | |
start-dfs.sh | |
#TODO cheeck whether 2 processes: NameNode and DataNode are running | |
jps | |
#8.2 start YARN | |
start-yarn.sh | |
#TODO cheeck whether 2 processes: ResourceManager and NodeManager are running | |
jps | |
#9. to stop hadoop | |
stop-yarn.sh | |
stop-dfs.sh | |
#TODO check hadoop process | |
jps | |
#10. install MapReduce | |
cd /usr/local/hadoop/share/hadoop/mapreduce | |
hadoop jar hadoop-mapreduce-examples-2.7.2.jar pi 10 1000 | |
sudo mkdir /usr/lab | |
sudo nano /usr/lab/wc-data.txt | |
sudo mkdir /usr/lab/output | |
#11. observe terminal screen | |
hadoop jar hadoop-mapreduce-examples-2.7.2.jar wordcount /usr/lab/wc-data.txt /usr/lab/output/o2 | |
#12. after successfully installed HDFS, YARN, MapReduce, go to configure the following files in these directory | |
# Main Package Directory : /usr/local/hadoop | |
# Data Directory : /var/hadoop_data/namenode | |
# : /var/hadoop_data/datanode | |
# Log file Directory : /var/log/hadoop | |
cd /usr/local/hadoop/etc/hadoop | |
#12.1 core-site.xml | |
nano core-site.xml | |
#TODO add this | |
<configuration> | |
<property> | |
<name>fs.defaultFS</name> | |
<value>hdfs://localhost:9000</value> | |
</property> | |
<property> | |
<name>hadoop.proxyuser.root.hosts</name> | |
<value>*</value> | |
</property> | |
<property> | |
<name>hadoop.proxyuser.root.groups</name> | |
<value>*</value> | |
</property> | |
</configuration> | |
#12.2 hdfs-site.xml | |
nano hdfs-site.xml | |
#TODO add this | |
<configuration> | |
<property> | |
<name>dfs.replication</name> | |
<value>1</value> | |
</property> | |
<property> | |
<name>dfs.namenode.name.dir</name> | |
<value>file:/var/hadoop_data/namenode</value> | |
</property> | |
<property> | |
<name>dfs.namenode.data.dir</name> | |
<value>file:/var/hadoop_data/datanode</value> | |
</property> | |
<property> | |
<name>dfs.namenode.acls.enabled</name> | |
<value>true</value> | |
</property> | |
</configuration> | |
#12.3 yarn-site.xml | |
nano yarn-site.xml | |
#TODO add this | |
<configuration> | |
<!-- Site specific YARN configuration properties --> | |
<property> | |
<name>yarn.resourcemanager.hostname</name> | |
<value>localhost</value> | |
</property> | |
<property> | |
<name>yarn.resourcemanager.scheduler.address</name> | |
<value>localhost:8030</value> | |
</property> | |
<property> | |
<name>yarn.resourcemanager.resource-tracker.address</name> | |
<value>localhost:8031</value> | |
</property> | |
<property> | |
<name>yarn.resourcemanager.address</name> | |
<value>localhost:8032</value> | |
</property> | |
<property> | |
<name>yarn.resourcemanager.admin.address</name> | |
<value>localhost:8033</value> | |
</property> | |
<property> | |
<name>yarn.resourcemanager.webapp.address</name> | |
<value>localhost:8088</value> | |
</property> | |
<property> | |
<name>yarn.nodemanager.aux-services</name> | |
<value>mapreduce_shuffle</value> | |
</property> | |
<property> | |
<name>yarn.nodemanager.aux-services.mapreduce_shuffle.class</name> | |
<value>org.apache.hadoop.mapred.ShuffleHandler</value> | |
</property> | |
</configuration> | |
#12.4 mapred-site.xml | |
cp mapred-site.xml.template mapred-site.xml | |
nano mapred-site.xml | |
#TODO add this | |
<configuration> | |
<property> | |
<name>mapreduce.framework.name</name> | |
<value>yarn</value> | |
</property> | |
</configuration> | |
#13. Initial HDFS : HDFS Formatting | |
hadoop namenode -format | |
#14. start hadoop (at /usr/local/hadoop/etc/hadoop) | |
#14.1 start HDFS | |
start-dfs.sh | |
#TODO cheeck whether 2 processes: NameNode and DataNode are running | |
jps | |
#14.2 start YARN | |
start-yarn.sh | |
#TODO cheeck whether 2 processes: ResourceManager and NodeManager are running | |
jps | |
#15. to stop hadoop | |
stop-yarn.sh | |
stop-dfs.sh | |
#TODO check hadoop process | |
jps | |
#16. Post Test Installation | |
http://< IP >:50070/ | |
http://< IP >:8088 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment