Created
November 15, 2017 06:23
-
-
Save eiso/7c46cc7afe55a4a3e5e443c70d1ad679 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# srcd/engine | |
FROM ubuntu:16.04 | |
RUN apt-get -y update | |
RUN apt-get -y upgrade | |
RUN apt-get -y install wget vim | |
WORKDIR sourced | |
RUN wget https://github.com/src-d/borges/releases/download/v0.8.3/borges_v0.8.3_linux_amd64.tar.gz | |
RUN tar -xvf borges_v0.8.3_linux_amd64.tar.gz | |
RUN cp borges_linux_amd64/borges /usr/local/bin | |
RUN wget https://gist.githubusercontent.com/eiso/160b28cb38fae87ad54cb4c8e812a265/raw/11f499c07cee464685796ea1c54da8dcdda22ef1/repos.txt | |
RUN borges pack --file=repos.txt --to=demo | |
RUN apt-get -y install python python3.5 python3-pip | |
RUN pip3 install --upgrade pip | |
RUN apt-get -y install openjdk-8-jre | |
ENV JAVA_HOME /usr/lib/jvm/java-8-openjdk-amd64 | |
RUN wget -O spark-2.2.0-bin-hadoop2.7.tgz "https://www.apache.org/dyn/mirrors/mirrors.cgi?action=download&filename=spark/spark-2.2.0/spark-2.2.0-bin-hadoop2.7.tgz" | |
RUN tar -xvzf spark-2.2.0-bin-hadoop2.7.tgz | |
ENV PATH=${PATH}:/sourced/spark-2.2.0-bin-hadoop2.7/bin | |
ENV SPARK_HOME /sourced/spark-2.2.0-bin-hadoop2.7 | |
ENV PYSPARK_PYTHON python3 | |
RUN pip install sourced-engine | |
ENV SPARK_BBLFSH_HOST spark.tech.sourced.bblfsh.grpc.host | |
ENV SPARK_BBLFSH_PORT spark.tech.sourced.bblfsh.grpc.port | |
ENV BBLFSH_HOST bblfshd | |
ENV BBLFSH_PORT 9432 | |
RUN printf "$SPARK_BBLFSH_HOST $BBLFSH_HOST\n$SPARK_BBLFSH_PORT $BBLFSH_PORT" > /sourced/spark-2.2.0-bin-hadoop2.7/conf/spark-defaults.conf | |
# Adding Jupyter Notebook support | |
RUN pip3 install jupyter | |
RUN pip3 install jupyter-spark \ | |
&& jupyter serverextension enable --py jupyter_spark \ | |
&& jupyter nbextension install --py jupyter_spark \ | |
&& jupyter nbextension enable --py jupyter_spark \ | |
&& jupyter nbextension enable --py widgetsnbextension | |
ENV ENGINE_VERSION 0.1.7 | |
ENV PYSPARK_SUBMIT_ARGS --packages tech.sourced:engine:$ENGINE_VERSION pyspark-shell | |
# Need to use /bin since aliases don't work with `docker exec` | |
RUN echo '#! /bin/sh' >> /sourced/pyspark | |
RUN echo '/usr/local/bin/pyspark --packages "tech.sourced:engine:${ENGINE_VERSION}"' >> /sourced/pyspark | |
RUN chmod u+x /sourced/pyspark | |
RUN echo '#! /bin/sh' >> /sourced/spark-shell | |
RUN echo '/usr/local/bin/spark-shell --packages "tech.sourced:engine:${ENGINE_VERSION}"' >> /sourced/spark-shell | |
RUN chmod u+x /sourced/spark-shell | |
RUN echo '#! /bin/sh' >> /sourced/jupyter | |
RUN echo '/usr/local/bin/jupyter notebook --ip=0.0.0.0 --allow-root' >> /sourced/jupyter | |
RUN chmod u+x /sourced/jupyter | |
ENV PATH=/sourced:${PATH} | |
CMD borges pack --file=repos.txt --to=repositories && sleep infinity | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment