Last active
November 16, 2021 11:32
-
-
Save CalvinHaynes/6f872a1fb4ba6717a293011f6bfb63a2 to your computer and use it in GitHub Desktop.
Hadoop集群Dockerfilev1.0.0
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# 实验一利用Hadoop搭建服务器集群的基础环境Dockerfile | |
FROM ubuntu:20.04 | |
ARG arch_name=amd64 | |
# 1.备份源列表 | |
RUN cp /etc/apt/sources.list /etc/apt/sources.backup.list | |
# 2.把本目录下的sources.list中的镜像源添加到Docker中,下载速度起飞 | |
COPY sources.list /etc/apt/sources.list | |
# 3.更新源 | |
RUN apt-get update | |
# 翻墙代理 | |
# RUN export hostip=$(cat /etc/resolv.conf |grep -oP '(?<=nameserver\ ).*') && \ | |
# export https_proxy="http://${hostip}:7890" && \ | |
# export http_proxy="http://${hostip}:7890" | |
# 设置一些环境变量 | |
ENV TZ=Asia/Shanghai \ | |
LANG=en_US.utf8 \ | |
LANGUAGE=en_US.UTF-8 \ | |
DEBIAN_FRONTEND=noninteractive | |
# 安装一些Hadoop集群需要的基本环境和辅助程序 | |
RUN apt-get install -y openjdk-8-jdk sudo vim ssh openssl wget openssh-server openssh-client net-tools iputils-ping | |
# pdsh全称是parallel distributed shell,可以并行执行对远程目标主机的操作,利于解决批量执行命令或分发任务的运维需求。 | |
# 适用于大批量服务器的配置,部署,文件复制等运维操作。 | |
RUN apt-get install -y pdsh && \ | |
echo ssh >> /etc/pdsh/rcmd_default | |
# SSH配置 | |
RUN ssh-keygen -t rsa -P "" -f ~/.ssh/id_rsa && \ | |
cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys && \ | |
chmod 600 ~/.ssh/authorized_keys && \ | |
chmod 700 ~/.ssh && \ | |
echo "service ssh start" >> ~/.bashrc | |
# 安装Hadoop | |
RUN wget https://dlcdn.apache.org/hadoop/common/hadoop-3.2.2/hadoop-3.2.2.tar.gz && \ | |
tar -xzf hadoop-3.2.2.tar.gz && \ | |
mv hadoop-3.2.2 /usr/local/hadoop && \ | |
rm hadoop-3.2.2.tar.gz | |
# 安装Scala | |
RUN wget https://downloads.lightbend.com/scala/2.12.6/scala-2.12.6.tgz && \ | |
tar -xzf scala-2.12.6.tgz && \ | |
mv scala-2.12.6 /usr/local/scala && \ | |
rm scala-2.12.6.tgz | |
# 安装Spark | |
RUN wget https://dlcdn.apache.org/spark/spark-3.2.0/spark-3.2.0-bin-hadoop3.2-scala2.13.tgz && \ | |
tar -xzf spark-3.2.0-bin-hadoop3.2-scala2.13.tgz && \ | |
mv spark-3.2.0-bin-hadoop3.2-scala2.13 /usr/local/spark && \ | |
rm spark-3.2.0-bin-hadoop3.2-scala2.13.tgz | |
# 创建一些目录,一会儿配置Hadoop文件要用到 | |
RUN mkdir -p /usr/local/hadoop/hadoop_data/hdfs && \ | |
mkdir -p /usr/local/hadoop/hadoop_data/hdfs/tmp && \ | |
mkdir -p /usr/local/hadoop/hadoop_data/hdfs/namenode && \ | |
mkdir -p /usr/local/hadoop/hadoop_data/hdfs/datanode && \ | |
mkdir -p /usr/local/hadoop/hadoop_data/hdfs/edits && \ | |
mkdir -p /usr/local/hadoop/hadoop_data/hdfs/checkpoints && \ | |
mkdir -p /usr/local/hadoop/hadoop_data/hdfs/checkpoints/edits | |
# 开放Hadoop和Spark相关端口,方便外部进行访问 | |
EXPOSE 8080 8081 8088 50090 9864 19888 50070 | |
CMD ["/bin/bash"] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment