Skip to content

Instantly share code, notes, and snippets.

@lyekumchew
Last active April 7, 2020 05:08
Show Gist options
  • Save lyekumchew/9abb7ea0f0b6929bab82c834b65b7e7c to your computer and use it in GitHub Desktop.
Save lyekumchew/9abb7ea0f0b6929bab82c834b65b7e7c to your computer and use it in GitHub Desktop.
hadoop
# Hadoop settings
export HDFS_NAMENODE_USER="hduser"
export HDFS_DATANODE_USER="hduser"
export HDFS_SECONDARYNAMENODE_USER="hduser"
export YARN_RESOURCEMANAGER_USER="hduser"
export YARN_NODEMANAGER_USER="hduser"
export HADOOP_HOME=/home/hduser/hadoop-3.2.1
export PATH=$PATH:$HADOOP_HOME/bin
export PATH=$PATH:$HADOOP_HOME/sbin
export HADOOP_MAPRED_HOME=$HADOOP_HOME
export HADOOP_COMMON_HOME=$HADOOP_HOME
export HADOOP_HDFS_HOME=$HADOOP_HOME
export YARN_HOME=$HADOOP_HOME
export HADOOP_COMMON_LIB_NATIVE_DIR=$HADOOP_HOME/lib/native
export HADOOP_OPTS="-Djava.library.path=$HADOOP_HOME/lib"
系统: Debian 10.3 平台: DigitalOcean 配置: 2 核, 2 GB 内存, 60 GB 硬盘
Hadoop 版本: 3.2.1
# 为每台服务器添加用户与用户组
addgroup hadoop
adduser --ingroup hadoop hduser
usermod -aG sudo hduser
# 切换到 hduser 进行操作
su hduser
# master 生成密钥对
ssh-keygen -t rsa -f id_rsa
# 为 4 台服务器添加 pubkey
mkdir -p ~/.ssh && echo "ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQCmzSu7ppJ/f+AmpyCjL5GO1mimVQKCX6RUbnlskU/f7Ehmxc0jwX0ohnaaecSkXeH5Lzc9Xhu5mxcx0vuwCXVAS/DPFGKCe0aBcjMA0CES21EsBcQJ3t5eLK/Zj6qgtGW5Pv2IUoGk4FWk7skNu57T3uUuTqZvynHxhpU2P1uZHtDL6CTtw63YNLSoNl3j7dz/FngeJimk0sjr+nqxXL/okEdrrEutzPph6sX6kRYBi/97nzqtqx0rr1w+9tMfe2F+QtXLxIEjsV4vLJXHjSAMFmeJfUhUjT8rYcMBlV4l2Y6M3iXFLFxgHNgVzyplniFP6K29FTlFHf0/wXv754VH hduser@master" >> ~/.ssh/authorized_keys
# 添加完毕后,使用 master 分别连接其余三台 slave, 分别确认一下密钥对即可
ssh hduser@slave-01
ssh hduser@slave-02
ssh hduser@slave-03
# 创建 HDFS 数据文件夹
sudo mkdir -p /usr/local/hadoop/hdfs/data
sudo chown -R hduser:hadoop /usr/local/hadoop/hdfs/data
<configuration>
<property>
<name>fs.default.name</name>
<value>hdfs://master:9000</value>
</property>
</configuration>
# 下载hadoop-3.2.1,并解压,可能解压时间比较久,建议开个screen,每台机器都这样操作
wget https://archive.apache.org/dist/hadoop/common/hadoop-3.2.1/hadoop-3.2.1.tar.gz && tar -zxvf hadoop-3.2.1.tar.gz
# 在hadoop解压的包里面修改hadoop-env.sh中的java目录地址
vim etc/hadoop/hadoop-env.sh
# 为debian 10安装openjdk-8,由于debian 10默认软件源已经没有这么低版本的jdk了,所以在这里使用了非官方源安装 [1],且hadoop 3.x 只支持jdk-8版本 [2]
wget -qO - https://adoptopenjdk.jfrog.io/adoptopenjdk/api/gpg/key/public | sudo apt-key add –
sudo add-apt-repository --yes https://adoptopenjdk.jfrog.io/adoptopenjdk/deb/
sudo apt-get install -y software-properties-common
sudo apt-get install <adoptopenjdk-8-hotspot>
# 在.bashrc设置hadoop的变量
# 配置 yarn-site.xml...
# 将配置好的复制到每台 slave 服务器
scp ~/hadoop-3.2.1/etc/hadoop/* slave-{01..03}:/home/hduser/hadoop-3.2.1/etc/hadoop/
<configuration>
<property>
<name>dfs.namenode.name.dir</name><value>/usr/local/hadoop/data/namenode</value>
</property>
<property>
<name>dfs.datanode.data.dir</name><value>/usr/local/hadoop/data/datanode</value>
</property>
<property>
<name>dfs.replication</name>
<value>3</value>
</property>
</configuration>
10.130.149.135 master
10.130.157.48 slave-01
10.130.157.65 slave-02
10.130.116.38 slave-03
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>yarn.app.mapreduce.am.env</name>
<value>HADOOP_MAPRED_HOME=$HADOOP_HOME</value>
</property>
<property>
<name>mapreduce.map.env</name>
<value>HADOOP_MAPRED_HOME=$HADOOP_HOME</value>
</property>
<property>
<name>mapreduce.reduce.env</name>
<value>HADOOP_MAPRED_HOME=$HADOOP_HOME</value>
</property>
<property>
<name>yarn.app.mapreduce.am.resource.mb</name>
<value>512</value>
</property>
<property>
<name>mapreduce.map.memory.mb</name>
<value>256</value>
</property>
<property>
<name>mapreduce.reduce.memory.mb</name>
<value>256</value>
</property>
</configuration>
start-dfs.sh
start-yarn.sh
mr-jobhistory-daemon.sh start historyserver
stop-dfs.sh
stop-yarn.sh
mr-jobhistory-daemon.sh stop historyserver
scp ~/hadoop-3.2.1/etc/hadoop/* slave-01:/home/hduser/hadoop-3.2.1/etc/hadoop/
scp ~/hadoop-3.2.1/etc/hadoop/* slave-02:/home/hduser/hadoop-3.2.1/etc/hadoop/
scp ~/hadoop-3.2.1/etc/hadoop/* slave-03:/home/hduser/hadoop-3.2.1/etc/hadoop/
<configuration>
<property>
<name>yarn.acl.enable</name>
<value>0</value>
</property>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name>
<value>org.apache.hadoop.mapred.ShuffleHandler</value>
</property>
<property>
<name>yarn.resourcemanager.hostname</name>
<value>master</value>
</property>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>yarn.nodemanager.resource.memory-mb</name>
<value>1536</value>
</property>
<property>
<name>yarn.scheduler.maximum-allocation-mb</name>
<value>1536</value>
</property>
<property>
<name>yarn.scheduler.minimum-allocation-mb</name>
<value>128</value>
</property>
<property>
<name>yarn.nodemanager.vmem-check-enabled</name>
<value>false</value>
</property>
</configuration>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment