Skip to content

Instantly share code, notes, and snippets.

@benjamintanweihao
Created September 21, 2019 16:24
Show Gist options
  • Star 3 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save benjamintanweihao/29c85fcd598cc7ccab4c9b3baba3f96f to your computer and use it in GitHub Desktop.
Save benjamintanweihao/29c85fcd598cc7ccab4c9b3baba3f96f to your computer and use it in GitHub Desktop.
# docker run -it --rm --name sparkdev -e DISPLAY=${DISPLAY} -v `pwd`:/home/developer/SparkCourse -v /tmp/.X11-unix:/tmp/.X11-unix -v ~/.PyCharmCE2019.2:/home/developer/.PyCharmCE2019.2 -p 4040:4040 benjamintanweihao/sparkdev
# docker exec -it sparkdev /opt/pycharm/bin/pycharm.sh
FROM debian:stretch
MAINTAINER Getty Images "https://github.com/gettyimages"
RUN apt-get update \
&& apt-get install -y locales \
&& dpkg-reconfigure -f noninteractive locales \
&& locale-gen C.UTF-8 \
&& /usr/sbin/update-locale LANG=C.UTF-8 \
&& echo "en_US.UTF-8 UTF-8" >> /etc/locale.gen \
&& locale-gen \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*
# Users with other locales should set this in their derivative image
ENV LANG en_US.UTF-8
ENV LANGUAGE en_US:en
ENV LC_ALL en_US.UTF-8
RUN apt-get update \
&& apt-get install -y curl unzip \
python3 python3-setuptools \
&& ln -s /usr/bin/python3 /usr/bin/python \
&& easy_install3 pip py4j \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*
# http://blog.stuart.axelbrooke.com/python-3-on-spark-return-of-the-pythonhashseed
ENV PYTHONHASHSEED 0
ENV PYTHONIOENCODING UTF-8
ENV PIP_DISABLE_PIP_VERSION_CHECK 1
# JAVA and PYTHON
RUN apt-get update && apt-get install --no-install-recommends -y \
openjdk-8-jre \
python python-dev python-setuptools python-pip \
python3 python3-dev python3-setuptools python3-pip \
gcc git openssh-client less curl \
libxtst-dev libxext-dev libxrender-dev libfreetype6-dev \
libfontconfig1 libgtk2.0-0 libxslt1.1 libxxf86vm1 \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/* \
&& useradd -ms /bin/bash developer
# HADOOP
ENV HADOOP_VERSION 3.0.0
ENV HADOOP_HOME /usr/hadoop-$HADOOP_VERSION
ENV HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop
ENV PATH $PATH:$HADOOP_HOME/bin
RUN curl -sL --retry 3 \
"http://archive.apache.org/dist/hadoop/common/hadoop-$HADOOP_VERSION/hadoop-$HADOOP_VERSION.tar.gz" \
| gunzip \
| tar -x -C /usr/ \
&& rm -rf $HADOOP_HOME/share/doc \
&& chown -R root:root $HADOOP_HOME
# SPARK
ENV SPARK_VERSION 2.4.1
ENV SPARK_PACKAGE spark-${SPARK_VERSION}-bin-without-hadoop
ENV SPARK_HOME /usr/spark-${SPARK_VERSION}
ENV SPARK_DIST_CLASSPATH="$HADOOP_HOME/etc/hadoop/*:$HADOOP_HOME/share/hadoop/common/lib/*:$HADOOP_HOME/share/hadoop/common/*:$HADOOP_HOME/share/hadoop/hdfs/*:$HADOOP_HOME/share/hadoop/hdfs/lib/*:$HADOOP_HOME/share/hadoop/hdfs/*:$HADOOP_HOME/share/hadoop/yarn/lib/*:$HADOOP_HOME/share/hadoop/yarn/*:$HADOOP_HOME/share/hadoop/mapreduce/lib/*:$HADOOP_HOME/share/hadoop/mapreduce/*:$HADOOP_HOME/share/hadoop/tools/lib/*"
ENV PATH $PATH:${SPARK_HOME}/bin
RUN curl -sL --retry 3 \
"https://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/${SPARK_PACKAGE}.tgz" \
| gunzip \
| tar x -C /usr/ \
&& mv /usr/$SPARK_PACKAGE $SPARK_HOME \
&& chown -R root:root $SPARK_HOME
WORKDIR /opt/pycharm
ARG pycharm_source=https://download.jetbrains.com/python/pycharm-community-192.6603.24.tar.gz
RUN curl -fsSL $pycharm_source -o /opt/pycharm/installer.tgz \
&& tar --strip-components=1 -xzf installer.tgz \
&& rm installer.tgz \
&& /usr/bin/python2 /opt/pycharm/helpers/pydev/setup_cython.py build_ext --inplace \
&& /usr/bin/python3 /opt/pycharm/helpers/pydev/setup_cython.py build_ext --inplace
RUN pip3 install apache-beam[docs,test]
WORKDIR $SPARK_HOME
USER developer
ENV HOME /home/developer
CMD ["bin/spark-class", "org.apache.spark.deploy.master.Master"]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment