Last active March 18, 2018 18:32
Install Hadoop on Ubuntu 16.04

Copy line by line and paste to an interactive shell

Create a new user named hadoop

sudo useradd -m hadoop -s /bin/bash     # 创建hadoop用户
sudo passwd hadoop          # 修改密码
sudo adduser hadoop sudo    # 增加管理员权限

Log out current user, log in hadoop

sudo apt-get update         # Update system

Install ssh

sudo apt-get install openssh-server
cd ~
mkdir .ssh                  # 可能该文件已存在,不影响
cd ~/.ssh/
ssh-keygen -t rsa           # 会有提示,都按回车就可以
cat >> authorized_keys  # 加入授权

Install Java

参照这个安装Java JDK,推荐安装Oracle 版本的。 安装openjdk

sudo apt-get update # 更新软件包列表
sudo apt-get install openjdk-8-jdk

安装oracle Java JDK

sudo add-apt-repository ppa:webupd8team/java # 添加仓库源
sudo apt-get update # 更新软件包列表
sudo apt-get install oracle-java8-installer


sudo update-alternatives --config java

Download & Install Hadoop

wget -P ~/Downloads
sudo tar zxvf ~/Downloads/hadoop-3.0.0.tar.gz -C /usr/local
sudo mv /usr/local/hadoop-3.0.0 /usr/local/hadoop
sudo chown -R hadoop /usr/local/hadoop

Configure hadoop

echo "export JAVA_HOME=$(readlink -f $(which java) | sed "s:bin/java::")" >> /usr/local/hadoop/etc/hadoop/

Configure core-site.xml

sudo vim /usr/local/hadoop/etc/hadoop/core-site.xml
        <description>Abase for other temporary directories.</description>

Configure hdfs-site.xml

sudo vim /usr/local/hadoop/etc/hadoop/hdfs-site.xml

Configure environment

echo "
export JAVA_HOME=$(readlink -f $(which java) | sed "s:/jre/bin/java::")
export PATH=\$PATH:\$JAVA_HOME/bin
export PATH=\$PATH:\$HADOOP_HOME/bin
export HADOOP_HOME=/usr/local/hadoop
export HADOOP_CLASSPATH=${JAVA_HOME}/lib/tools.jar
" >> ~/.bashrc
source ~/.bashrc

start hadoop

cd /usr/local/hadoop
bin/hdfs namenode -format      # namenode 格式化
sbin/              # 开启守护进程
jps                             # 判断是否启动成功

注:勿反复使用hdfs namenode -format 命令 ,如果修改配置后选择N

如需关闭hadoop 进程则可以使用,第二次启动直接运行

Test hadoop

Save source code from the url, as in /usr/local/hadoop.

WordCount1.0 ver

cd /usr/local/hadoop
jar -cvf wordcount.jar *.class

bin/hadoop fs -mkdir /input_wordcount
bin/hadoop fs -put input/* /input_wordcount/ 

bin/hadoop fs -ls /                     # 查看文件
bin/hadoop fs -ls /input_wordcount          # 查看file1 file2
bin/hadoop fs -cat /input_wordcount/file1   # 打印文件
bin/hadoop jar wordcount.jar WordCount /input_wordcount /output_wordcount   # 进行运算

bin/hadoop fs -ls /output_wordcount
bin/hadoop fs -cat /output_wordcount/part-r-00000     # 查看结果
