Skip to content

Instantly share code, notes, and snippets.

@yzhang1991
Last active April 30, 2020 03:00
Show Gist options
  • Save yzhang1991/95112c9d25c575c5c28a to your computer and use it in GitHub Desktop.
Save yzhang1991/95112c9d25c575c5c28a to your computer and use it in GitHub Desktop.
Hadoop install
sudo apt-get install -y lib64z1-dev autoconf automake libtool libssl-dev findbugs
findbugs: http://findbugs.sourceforge.net/downloads.html
I found this may be useful: https://frankfzw.wordpress.com/2015/01/22/install-hadoop-2-6-0-on-ubuntu-server-14-04/
http://www.bogotobogo.com/Hadoop/BigData_hadoop_Install_on_ubuntu_single_node_cluster.php
apt-get update
apt-get install default-jdk
addgroup hadoop
adduser --ingroup hadoop hduser
adduser hduser sudo
cp -r .ssh /home/hduser/.ssh
chown -R hduser:hadoop /home/hduser/.ssh
wget http://mirrors.sonic.net/apache/hadoop/common/hadoop-2.6.0/hadoop-2.6.0.tar.gz
tar xvzf hadoop-2.6.0.tar.gz
mv hadoop-2.6.0 /usr/local/hadoop
chown -R hduser:hadoop /usr/local/hadoop
su hduser
cd ~
echo "#HADOOP VARIABLES START" >> ~/.bashrc
echo "export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64" >> ~/.bashrc
echo "export HADOOP_INSTALL=/usr/local/hadoop" >> ~/.bashrc
echo "export PATH=\$PATH:\$HADOOP_INSTALL/bin" >> ~/.bashrc
echo "export PATH=\$PATH:\$HADOOP_INSTALL/sbin" >> ~/.bashrc
echo "export HADOOP_MAPRED_HOME=\$HADOOP_INSTALL" >> ~/.bashrc
echo "export HADOOP_COMMON_HOME=\$HADOOP_INSTALL" >> ~/.bashrc
echo "export HADOOP_HDFS_HOME=\$HADOOP_INSTALL" >> ~/.bashrc
echo "export YARN_HOME=\$HADOOP_INSTALL" >> ~/.bashrc
echo "export HADOOP_COMMON_LIB_NATIVE_DIR=\$HADOOP_INSTALL/lib/native" >> ~/.bashrc
echo "export HADOOP_OPTS=\"-Djava.library.path=\$HADOOP_INSTALL/lib\"" >> ~/.bashrc
echo "#HADOOP VARIABLES END" >> ~/.bashrc
source ~/.bashrc
echo "export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64" >> /usr/local/hadoop/etc/hadoop/hadoop-env.sh
export HADOOP_OPTS="$HADOOP_OPTS -Djava.library.path=/usr/local/hadoop/lib/native"
export HADOOP_COMMON_LIB_NATIVE_DIR="/usr/local/hadoop/lib/native/"
sudo mkdir -p /app/hadoop/tmp
sudo chown hduser:hadoop /app/hadoop/tmp
vim /usr/local/hadoop/etc/hadoop/core-site.xml
<configuration>
<property>
<name>hadoop.tmp.dir</name>
<value>/app/hadoop/tmp</value>
<description>A base for other temporary directories.</description>
</property>
<property>
<name>fs.default.name</name>
<value>hdfs://localhost:54310</value>
<description>The name of the default file system. A URI whose
scheme and authority determine the FileSystem implementation. The
uri's scheme determines the config property (fs.SCHEME.impl) naming
the FileSystem implementation class. The uri's authority is used to
determine the host, port, etc. for a filesystem.</description>
</property>
</configuration>
cp /usr/local/hadoop/etc/hadoop/mapred-site.xml.template /usr/local/hadoop/etc/hadoop/mapred-site.xml
vim /usr/local/hadoop/etc/hadoop/mapred-site.xml
<configuration>
<property>
<name>mapred.job.tracker</name>
<value>localhost:54311</value>
<description>The host and port that the MapReduce job tracker runs
at. If "local", then jobs are run in-process as a single map
and reduce task.
</description>
</property>
</configuration>
sudo mkdir -p /usr/local/hadoop_store/hdfs/namenode
sudo mkdir -p /usr/local/hadoop_store/hdfs/datanode
sudo chown -R hduser:hadoop /usr/local/hadoop_store
vim /usr/local/hadoop/etc/hadoop/hdfs-site.xml
<configuration>
<property>
<name>dfs.replication</name>
<value>1</value>
<description>Default block replication.
The actual number of replications can be specified when the file is created.
The default is used if replication is not specified in create time.
</description>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>file:/usr/local/hadoop_store/hdfs/namenode</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>file:/usr/local/hadoop_store/hdfs/datanode</value>
</property>
</configuration>
cd /usr/local/hadoop_store/hdfs/namenode
hadoop namenode -format
cd /usr/local/hadoop/sbin
start-dfs.sh
stop-dfs.sh
http://localhost:50070
Expand to more nodes:
vim /etc/hosts
SSH
vim /usr/local/hadoop/etc/hadoop/slaves
hdfs1
hdfs2
vim /usr/local/hadoop/etc/hadoop/core-site.xml
vim /usr/local/hadoop/etc/hadoop/mapred-site.xml
vim /usr/local/hadoop/etc/hadoop/hdfs-site.xml
hadoop namenode -format
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment