Created
November 16, 2017 18:31
-
-
Save eiso/5e7d39a0f77fe34202aa9f90d980419c to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# srcd/engine | |
FROM ubuntu:16.04 | |
ENV ENGINE_VERSION 0.1.7 | |
RUN apt-get -y update | |
RUN apt-get -y upgrade | |
RUN apt-get -y install wget vim git | |
WORKDIR builds | |
RUN wget https://github.com/src-d/borges/releases/download/v0.8.3/borges_v0.8.3_linux_amd64.tar.gz | |
RUN tar -xvf borges_v0.8.3_linux_amd64.tar.gz | |
RUN cp borges_linux_amd64/borges /usr/local/bin | |
RUN mkdir /sourced | |
RUN wget https://gist.githubusercontent.com/eiso/160b28cb38fae87ad54cb4c8e812a265/raw/11f499c07cee464685796ea1c54da8dcdda22ef1/repos.txt | |
RUN borges pack --file=repos.txt --to=/sourced/sample-repos | |
RUN apt-get -y install python python3.5 python3-pip | |
RUN pip3 install --upgrade pip | |
RUN apt-get -y install openjdk-8-jre | |
ENV JAVA_HOME /usr/lib/jvm/java-8-openjdk-amd64 | |
RUN wget -O spark-2.2.0-bin-hadoop2.7.tgz "https://www.apache.org/dyn/mirrors/mirrors.cgi?action=download&filename=spark/spark-2.2.0/spark-2.2.0-bin-hadoop2.7.tgz" | |
RUN tar -xvzf spark-2.2.0-bin-hadoop2.7.tgz | |
ENV PATH=${PATH}:/builds/spark-2.2.0-bin-hadoop2.7/bin | |
ENV SPARK_HOME /builds/spark-2.2.0-bin-hadoop2.7 | |
ENV PYSPARK_PYTHON python3 | |
# Git clone the engine and installing it | |
RUN git clone https://github.com/src-d/engine.git | |
WORKDIR /builds/engine | |
RUN git checkout tags/v$ENGINE_VERSION | |
RUN ./sbt publishLocal | |
WORKDIR /builds | |
RUN pip install sourced-engine | |
ENV SPARK_BBLFSH_HOST spark.tech.sourced.bblfsh.grpc.host | |
ENV SPARK_BBLFSH_PORT spark.tech.sourced.bblfsh.grpc.port | |
ENV BBLFSH_HOST bblfshd | |
ENV BBLFSH_PORT 9432 | |
RUN printf "$SPARK_BBLFSH_HOST $BBLFSH_HOST\n$SPARK_BBLFSH_PORT $BBLFSH_PORT" > /builds/spark-2.2.0-bin-hadoop2.7/conf/spark-defaults.conf | |
# Adding Jupyter Notebook support | |
RUN pip3 install jupyter | |
RUN pip3 install jupyter-spark \ | |
&& jupyter serverextension enable --py jupyter_spark \ | |
&& jupyter nbextension install --py jupyter_spark \ | |
&& jupyter nbextension enable --py jupyter_spark \ | |
&& jupyter nbextension enable --py widgetsnbextension | |
ENV PYSPARK_SUBMIT_ARGS --packages tech.sourced:engine:$ENGINE_VERSION pyspark-shell | |
# Need to use /bin since aliases don't work with `docker exec` | |
RUN echo '#! /bin/sh' >> /builds/pyspark | |
RUN echo '/usr/local/bin/pyspark --packages "tech.sourced:engine:${ENGINE_VERSION}"' >> /builds/pyspark | |
RUN chmod u+x /builds/pyspark | |
RUN echo '#! /bin/sh' >> /builds/spark-shell | |
RUN echo '/usr/local/bin/spark-shell --packages "tech.sourced:engine:${ENGINE_VERSION}"' >> /builds/spark-shell | |
RUN chmod u+x /builds/spark-shell | |
RUN echo '#! /bin/sh' >> /builds/jupyter | |
RUN echo '/usr/local/bin/jupyter notebook --ip=0.0.0.0 --allow-root' >> /builds/jupyter | |
RUN chmod u+x /builds/jupyter | |
ENV PATH=/builds:${PATH} | |
WORKDIR /sourced | |
RUN cp /builds/engine/_examples/notebooks/* /sourced/ | |
CMD borges pack --file=/sourced/repos.txt --to=/sourced/user-repos && sleep infinity |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment