Skip to content

Instantly share code, notes, and snippets.

@arturmkrtchyan
Last active August 29, 2015 14:16
Show Gist options
  • Save arturmkrtchyan/d7a5efe21b18051823f1 to your computer and use it in GitHub Desktop.
Save arturmkrtchyan/d7a5efe21b18051823f1 to your computer and use it in GitHub Desktop.
Hadoop Cluster Installation
10.64.200.48
10.64.200.49
10.64.200.50
10.64.200.51
sudo addgroup hadoop
sudo adduser --ingroup hadoop hduser
sudo adduser hduser sudo
sudo apt-get install maven build-essential zlib1g-dev cmake pkg-config libssl-dev
wget http://apache.mirror.anlx.net/hadoop/core/hadoop-2.6.0/hadoop-2.6.0-src.tar.gz
tar -xvf hadoop-2.6.0-src.tar.gz
cd hadoop-2.6.0-src/
mvn clean package -Pdist,native -Dmaven.javadoc.skip=true -DskipTests -Dtar
### jps on Namenode
8257 NameNode
8674 ResourceManager
8511 SecondaryNameNode
### jps on Damenode
21595 DataNode
22139 NodeManager
cp /home/hduser/hadoop-2.6.0-src/hadoop-dist/target/hadoop-2.6.0.tar.gz /home/hduser/
scp /home/hduser/hadoop-2.6.0-src/hadoop-dist/target/hadoop-2.6.0.tar.gz hduser@10.64.200.48:/home/hduser/
scp /home/hduser/hadoop-2.6.0-src/hadoop-dist/target/hadoop-2.6.0.tar.gz hduser@10.64.200.49:/home/hduser/
scp /home/hduser/hadoop-2.6.0-src/hadoop-dist/target/hadoop-2.6.0.tar.gz hduser@10.64.200.50:/home/hduser/
scp /home/hduser/hadoop-2.6.0-src/hadoop-dist/target/hadoop-2.6.0.tar.gz hduser@10.64.200.51:/home/hduser/
ssh-copy-id -i /home/hduser/.ssh/id_rsa.pub hduser@10.64.200.48
ssh-copy-id -i /home/hduser/.ssh/id_rsa.pub hduser@10.64.200.49
ssh-copy-id -i /home/hduser/.ssh/id_rsa.pub hduser@10.64.200.50
ssh-copy-id -i /home/hduser/.ssh/id_rsa.pub hduser@10.64.200.51
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://10.64.200.47/</value>
<description>NameNode URI</description>
</property>
</configuration>
<configuration>
<property>
<name>dfs.datanode.data.dir</name>
<value>file:///usr/local/hadoop/data/datanode</value>
<description>DataNode directory</description>
</property>
<property>
<name>dfs.replication</name>
<value>3</value>
</property>
<property>
<name>dfs.permissions</name>
<value>false</value>
</property>
<property>
<name>dfs.datanode.use.datanode.hostname</name>
<value>false</value>
</property>
<property>
<name>dfs.namenode.http-address</name>
<value>10.64.200.47:50070</value>
<description>Your NameNode hostname for http access.</description>
</property>
<property>
<name>dfs.namenode.secondary.http-address</name>
<value>10.64.200.47:50090</value>
<description>Your Secondary NameNode hostname for http access.</description>
</property>
</configuration>
mkdir -pv $HADOOP_INSTALL/data/datanode
mkdir -pv $HADOOP_INSTALL/logs
export JAVA_HOME=$(readlink -f /usr/bin/java | sed "s:bin/java::")
Hadoop 2.6.0
Subversion Unknown -r Unknown
Compiled by hduser on ${date}
Compiled with protoc 2.5.0
From source with checksum ${checksum}
This command was run using /usr/local/hadoop-2.6.0/share/hadoop/common/hadoop-common-2.6.0.jar
<configuration>
<property>
<name>dfs.namenode.name.dir</name>
<value>file:///usr/local/hadoop/data/namenode</value>
<description>NameNode directory for namespace and transaction logs storage.</description>
</property>
<property>
<name>dfs.replication</name>
<value>3</value>
</property>
<property>
<name>dfs.permissions</name>
<value>false</value>
</property>
<property>
<name>dfs.datanode.use.datanode.hostname</name>
<value>false</value>
</property>
<property>
<name>dfs.namenode.datanode.registration.ip-hostname-check</name>
<value>false</value>
</property>
<property>
<name>dfs.namenode.http-address</name>
<value>10.64.200.47:50070</value>
<description>Your NameNode hostname for http access.</description>
</property>
<property>
<name>dfs.namenode.secondary.http-address</name>
<value>10.64.200.47:50090</value>
<description>Your Secondary NameNode hostname for http access.</description>
</property>
</configuration>
sudo tar -xvf /home/hduser/hadoop-2.6.0.tar.gz -C /usr/local/
sudo ln -s /usr/local/hadoop-2.6.0 /usr/local/hadoop
sudo chown -R hduser:hadoop /usr/local/hadoop-2.6.0
sudo add-apt-repository ppa:webupd8team/java
sudo apt-get update
sudo apt-get install oracle-java8-installer
java -version
sudo apt-get install oracle-java8-set-default
wget https://protobuf.googlecode.com/files/protobuf-2.5.0.tar.gz
tar -xvf protobuf-2.5.0.tar.gz
cd protobuf-2.5.0/
sudo ./configure
sudo make
sudo make check
sudo make install
sudo ldconfig
protoc --version
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
</configuration>
mkdir -pv $HADOOP_INSTALL/data/namenode
mkdir -pv $HADOOP_INSTALL/logs
sudo su - hduser
ssh-keygen -t rsa -P "" -f ~/.ssh/id_rsa
cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys
ssh localhost
export JAVA_HOME=$(readlink -f /usr/bin/java | sed "s:bin/java::")
export HADOOP_INSTALL=/usr/local/hadoop
export PATH=$PATH:$HADOOP_INSTALL/bin
export PATH=$PATH:$HADOOP_INSTALL/sbin
export HADOOP_MAPRED_HOME=$HADOOP_INSTALL
export HADOOP_COMMON_HOME=$HADOOP_INSTALL
export HADOOP_HDFS_HOME=$HADOOP_INSTALL
export YARN_HOME=$HADOOP_INSTALL
export HADOOP_HOME=$HADOOP_INSTALL
export HADOOP_CONF_DIR=${HADOOP_HOME}"/etc/hadoop"
#### Word Count Example
wget http://www.gutenberg.org/cache/epub/1661/pg1661.txt
hdfs dfs -mkdir /samples/input
hdfs dfs -put pg1661.txt /samples/input
hadoop jar $HADOOP_INSTALL/share/hadoop/mapreduce/hadoop-mapreduce-examples-2.6.0.jar wordcount /samples/input /samples/output
hdfs dfs -cat /samples/output/part* | less
#### PI calculation example with 16 maps and 100000 samples, run the following command:
hadoop jar $HADOOP_INSTALL/share/hadoop/mapreduce/hadoop-mapreduce-examples-2.6.0.jar pi 16 100000
#### To see all examples run the following command:
hadoop jar $HADOOP_INSTALL/share/hadoop/mapreduce/hadoop-mapreduce-examples-2.6.0.jar
start-dfs.sh
start-yarn.sh
<configuration>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.nodemanager.aux-services.mapreduce_shuffle.class</name>
<value>org.apache.hadoop.mapred.ShuffleHandler</value>
</property>
<property>
<name>yarn.resourcemanager.resource-tracker.address</name>
<value>10.64.200.47:8025</value>
</property>
<property>
<name>yarn.resourcemanager.scheduler.address</name>
<value>10.64.200.47:8030</value>
</property>
<property>
<name>yarn.resourcemanager.address</name>
<value>10.64.200.47:8050</value>
</property>
</configuration>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment