-
-
Save benjamintanweihao/29c85fcd598cc7ccab4c9b3baba3f96f to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# docker run -it --rm --name sparkdev -e DISPLAY=${DISPLAY} -v `pwd`:/home/developer/SparkCourse -v /tmp/.X11-unix:/tmp/.X11-unix -v ~/.PyCharmCE2019.2:/home/developer/.PyCharmCE2019.2 -p 4040:4040 benjamintanweihao/sparkdev | |
# docker exec -it sparkdev /opt/pycharm/bin/pycharm.sh | |
FROM debian:stretch | |
MAINTAINER Getty Images "https://github.com/gettyimages" | |
RUN apt-get update \ | |
&& apt-get install -y locales \ | |
&& dpkg-reconfigure -f noninteractive locales \ | |
&& locale-gen C.UTF-8 \ | |
&& /usr/sbin/update-locale LANG=C.UTF-8 \ | |
&& echo "en_US.UTF-8 UTF-8" >> /etc/locale.gen \ | |
&& locale-gen \ | |
&& apt-get clean \ | |
&& rm -rf /var/lib/apt/lists/* | |
# Users with other locales should set this in their derivative image | |
ENV LANG en_US.UTF-8 | |
ENV LANGUAGE en_US:en | |
ENV LC_ALL en_US.UTF-8 | |
RUN apt-get update \ | |
&& apt-get install -y curl unzip \ | |
python3 python3-setuptools \ | |
&& ln -s /usr/bin/python3 /usr/bin/python \ | |
&& easy_install3 pip py4j \ | |
&& apt-get clean \ | |
&& rm -rf /var/lib/apt/lists/* | |
# http://blog.stuart.axelbrooke.com/python-3-on-spark-return-of-the-pythonhashseed | |
ENV PYTHONHASHSEED 0 | |
ENV PYTHONIOENCODING UTF-8 | |
ENV PIP_DISABLE_PIP_VERSION_CHECK 1 | |
# JAVA and PYTHON | |
RUN apt-get update && apt-get install --no-install-recommends -y \ | |
openjdk-8-jre \ | |
python python-dev python-setuptools python-pip \ | |
python3 python3-dev python3-setuptools python3-pip \ | |
gcc git openssh-client less curl \ | |
libxtst-dev libxext-dev libxrender-dev libfreetype6-dev \ | |
libfontconfig1 libgtk2.0-0 libxslt1.1 libxxf86vm1 \ | |
&& apt-get clean \ | |
&& rm -rf /var/lib/apt/lists/* \ | |
&& useradd -ms /bin/bash developer | |
# HADOOP | |
ENV HADOOP_VERSION 3.0.0 | |
ENV HADOOP_HOME /usr/hadoop-$HADOOP_VERSION | |
ENV HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop | |
ENV PATH $PATH:$HADOOP_HOME/bin | |
RUN curl -sL --retry 3 \ | |
"http://archive.apache.org/dist/hadoop/common/hadoop-$HADOOP_VERSION/hadoop-$HADOOP_VERSION.tar.gz" \ | |
| gunzip \ | |
| tar -x -C /usr/ \ | |
&& rm -rf $HADOOP_HOME/share/doc \ | |
&& chown -R root:root $HADOOP_HOME | |
# SPARK | |
ENV SPARK_VERSION 2.4.1 | |
ENV SPARK_PACKAGE spark-${SPARK_VERSION}-bin-without-hadoop | |
ENV SPARK_HOME /usr/spark-${SPARK_VERSION} | |
ENV SPARK_DIST_CLASSPATH="$HADOOP_HOME/etc/hadoop/*:$HADOOP_HOME/share/hadoop/common/lib/*:$HADOOP_HOME/share/hadoop/common/*:$HADOOP_HOME/share/hadoop/hdfs/*:$HADOOP_HOME/share/hadoop/hdfs/lib/*:$HADOOP_HOME/share/hadoop/hdfs/*:$HADOOP_HOME/share/hadoop/yarn/lib/*:$HADOOP_HOME/share/hadoop/yarn/*:$HADOOP_HOME/share/hadoop/mapreduce/lib/*:$HADOOP_HOME/share/hadoop/mapreduce/*:$HADOOP_HOME/share/hadoop/tools/lib/*" | |
ENV PATH $PATH:${SPARK_HOME}/bin | |
RUN curl -sL --retry 3 \ | |
"https://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/${SPARK_PACKAGE}.tgz" \ | |
| gunzip \ | |
| tar x -C /usr/ \ | |
&& mv /usr/$SPARK_PACKAGE $SPARK_HOME \ | |
&& chown -R root:root $SPARK_HOME | |
WORKDIR /opt/pycharm | |
ARG pycharm_source=https://download.jetbrains.com/python/pycharm-community-192.6603.24.tar.gz | |
RUN curl -fsSL $pycharm_source -o /opt/pycharm/installer.tgz \ | |
&& tar --strip-components=1 -xzf installer.tgz \ | |
&& rm installer.tgz \ | |
&& /usr/bin/python2 /opt/pycharm/helpers/pydev/setup_cython.py build_ext --inplace \ | |
&& /usr/bin/python3 /opt/pycharm/helpers/pydev/setup_cython.py build_ext --inplace | |
RUN pip3 install apache-beam[docs,test] | |
WORKDIR $SPARK_HOME | |
USER developer | |
ENV HOME /home/developer | |
CMD ["bin/spark-class", "org.apache.spark.deploy.master.Master"] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment