Last active
October 16, 2021 22:19
-
-
Save avcaliani/d988386328d42289c65945ac322f323a to your computer and use it in GitHub Desktop.
🐳 Spark - Mini Cluster
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
version: '3' | |
services: | |
spark-master: | |
user: root | |
container_name: spark-master | |
build: . | |
image: spark | |
ports: | |
- "9090:8080" | |
- "7077:7077" | |
volumes: | |
- ./spark/jobs:/jobs | |
- ./DATALAKE:/DATALAKE | |
spark-worker: | |
user: root | |
container_name: spark-worker | |
build: . | |
image: spark | |
ports: | |
- "9091:8080" | |
- "7000:7000" | |
depends_on: | |
- spark-master | |
environment: | |
- SPARK_WORKLOAD=worker | |
- SPARK_LOCAL_IP=spark-worker | |
volumes: | |
- ./spark/jobs:/jobs | |
- ./DATALAKE:/DATALAKE |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
FROM python:3.9 | |
ENV JAVA_HOME="/usr/lib/jvm/java-11-openjdk-amd64" | |
ENV SPARK_HOME="/opt/spark" | |
ENV SPARK_VERSION="3.1.2" | |
ENV HADOOP_VERSION="3.2" | |
ENV PATH="$SPARK_HOME/bin:$PATH" | |
ENV PYSPARK_PYTHON=python | |
ENV PATH="$SPARK_HOME/python:$PATH" | |
# Spark Config | |
ENV SPARK_WORKLOAD="master" \ | |
SPARK_LOCAL_IP="spark-master" \ | |
SPARK_LOG_DIR="/opt/spark/logs" | |
# Master Config | |
ENV SPARK_MASTER_PORT=7077 \ | |
SPARK_MASTER_WEBUI_PORT=8080 \ | |
SPARK_MASTER_LOG="/opt/spark/logs/spark-master.out" | |
# Worker Config | |
ENV SPARK_MASTER="spark://spark-master:7077" \ | |
SPARK_WORKER_WEBUI_PORT=8080 \ | |
SPARK_WORKER_PORT=7000 \ | |
SPARK_WORKER_LOG="/opt/spark/logs/spark-worker.out" \ | |
SPARK_WORKER_CORES=1 \ | |
SPARK_WORKER_MEMORY="1G" \ | |
SPARK_DRIVER_MEMORY="1G" \ | |
SPARK_EXECUTOR_MEMORY="1G" | |
WORKDIR /opt | |
# Java | |
RUN echo "deb http://ftp.us.debian.org/debian sid main" >> /etc/apt/sources.list | |
RUN apt-get update && apt-get -y install gcc-10-base openjdk-11-jdk && apt-get -y autoremove | |
# Spark | |
ADD "https://archive.apache.org/dist/spark/spark-$SPARK_VERSION/spark-$SPARK_VERSION-bin-hadoop$HADOOP_VERSION.tgz" . | |
RUN tar -xzf spark*.tgz && rm -f spark*.tgz && mv spark* spark | |
# Spark Config | |
ADD log4j.properties /opt/spark/conf/log4j.properties | |
ADD init-spark.sh / | |
# TODO: Check if this is necessary | |
RUN mkdir -p $SPARK_LOG_DIR \ | |
&& touch $SPARK_MASTER_LOG \ | |
&& touch $SPARK_WORKER_LOG \ | |
&& ln -sf /dev/stdout $SPARK_MASTER_LOG \ | |
&& ln -sf /dev/stdout $SPARK_WORKER_LOG | |
# TODO: Check if this is necessary | |
#EXPOSE 8080 7077 6066 | |
WORKDIR /opt/spark | |
CMD ["/bin/bash", "/init-spark.sh"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash -xe | |
# Activating Spark environment variables | |
. "/opt/spark/bin/load-spark-env.sh" | |
# Master Node | |
if [ "$SPARK_WORKLOAD" == "master" ]; then | |
SPARK_MASTER_HOST=$(hostname) | |
export SPARK_MASTER_HOST | |
/opt/spark/bin/spark-class "org.apache.spark.deploy.master.Master" \ | |
--ip "$SPARK_MASTER_HOST" \ | |
--port "$SPARK_MASTER_PORT" \ | |
--webui-port "$SPARK_MASTER_WEBUI_PORT" \ | |
>>"$SPARK_MASTER_LOG" | |
# Worker Node | |
elif [ "$SPARK_WORKLOAD" == "worker" ]; then | |
/opt/spark/bin/spark-class "org.apache.spark.deploy.worker.Worker" \ | |
--webui-port "$SPARK_WORKER_WEBUI_PORT" \ | |
"$SPARK_MASTER" \ | |
>>"$SPARK_WORKER_LOG" | |
else | |
echo "Undefined Workload Type $SPARK_WORKLOAD, must specify: master or worker" | |
fi |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
log4j.rootCategory=INFO, console | |
# Spark :: Log :: Console Appender | |
log4j.appender.console=org.apache.log4j.ConsoleAppender | |
log4j.appender.console.target=System.err | |
log4j.appender.console.layout=org.apache.log4j.PatternLayout | |
log4j.appender.console.layout.ConversionPattern=%d{yyyy/MM/dd HH:mm:ss} %p [%t] %c: %m%n | |
# Spark :: Log :: Spark Stuff | |
log4j.logger.org.apache.spark=WARN | |
log4j.logger.org.apache.hadoop=ERROR | |
log4j.logger.org.sparkproject.jetty=WARN |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment