Skip to content

Instantly share code, notes, and snippets.

@eiso
Created November 16, 2017 18:31
Show Gist options
  • Save eiso/5e7d39a0f77fe34202aa9f90d980419c to your computer and use it in GitHub Desktop.
Save eiso/5e7d39a0f77fe34202aa9f90d980419c to your computer and use it in GitHub Desktop.
# srcd/engine
FROM ubuntu:16.04
ENV ENGINE_VERSION 0.1.7
RUN apt-get -y update
RUN apt-get -y upgrade
RUN apt-get -y install wget vim git
WORKDIR builds
RUN wget https://github.com/src-d/borges/releases/download/v0.8.3/borges_v0.8.3_linux_amd64.tar.gz
RUN tar -xvf borges_v0.8.3_linux_amd64.tar.gz
RUN cp borges_linux_amd64/borges /usr/local/bin
RUN mkdir /sourced
RUN wget https://gist.githubusercontent.com/eiso/160b28cb38fae87ad54cb4c8e812a265/raw/11f499c07cee464685796ea1c54da8dcdda22ef1/repos.txt
RUN borges pack --file=repos.txt --to=/sourced/sample-repos
RUN apt-get -y install python python3.5 python3-pip
RUN pip3 install --upgrade pip
RUN apt-get -y install openjdk-8-jre
ENV JAVA_HOME /usr/lib/jvm/java-8-openjdk-amd64
RUN wget -O spark-2.2.0-bin-hadoop2.7.tgz "https://www.apache.org/dyn/mirrors/mirrors.cgi?action=download&filename=spark/spark-2.2.0/spark-2.2.0-bin-hadoop2.7.tgz"
RUN tar -xvzf spark-2.2.0-bin-hadoop2.7.tgz
ENV PATH=${PATH}:/builds/spark-2.2.0-bin-hadoop2.7/bin
ENV SPARK_HOME /builds/spark-2.2.0-bin-hadoop2.7
ENV PYSPARK_PYTHON python3
# Git clone the engine and installing it
RUN git clone https://github.com/src-d/engine.git
WORKDIR /builds/engine
RUN git checkout tags/v$ENGINE_VERSION
RUN ./sbt publishLocal
WORKDIR /builds
RUN pip install sourced-engine
ENV SPARK_BBLFSH_HOST spark.tech.sourced.bblfsh.grpc.host
ENV SPARK_BBLFSH_PORT spark.tech.sourced.bblfsh.grpc.port
ENV BBLFSH_HOST bblfshd
ENV BBLFSH_PORT 9432
RUN printf "$SPARK_BBLFSH_HOST $BBLFSH_HOST\n$SPARK_BBLFSH_PORT $BBLFSH_PORT" > /builds/spark-2.2.0-bin-hadoop2.7/conf/spark-defaults.conf
# Adding Jupyter Notebook support
RUN pip3 install jupyter
RUN pip3 install jupyter-spark \
&& jupyter serverextension enable --py jupyter_spark \
&& jupyter nbextension install --py jupyter_spark \
&& jupyter nbextension enable --py jupyter_spark \
&& jupyter nbextension enable --py widgetsnbextension
ENV PYSPARK_SUBMIT_ARGS --packages tech.sourced:engine:$ENGINE_VERSION pyspark-shell
# Need to use /bin since aliases don't work with `docker exec`
RUN echo '#! /bin/sh' >> /builds/pyspark
RUN echo '/usr/local/bin/pyspark --packages "tech.sourced:engine:${ENGINE_VERSION}"' >> /builds/pyspark
RUN chmod u+x /builds/pyspark
RUN echo '#! /bin/sh' >> /builds/spark-shell
RUN echo '/usr/local/bin/spark-shell --packages "tech.sourced:engine:${ENGINE_VERSION}"' >> /builds/spark-shell
RUN chmod u+x /builds/spark-shell
RUN echo '#! /bin/sh' >> /builds/jupyter
RUN echo '/usr/local/bin/jupyter notebook --ip=0.0.0.0 --allow-root' >> /builds/jupyter
RUN chmod u+x /builds/jupyter
ENV PATH=/builds:${PATH}
WORKDIR /sourced
RUN cp /builds/engine/_examples/notebooks/* /sourced/
CMD borges pack --file=/sourced/repos.txt --to=/sourced/user-repos && sleep infinity
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment