Skip to content

Instantly share code, notes, and snippets.

@eiso
Created November 15, 2017 06:23
Show Gist options
  • Save eiso/7c46cc7afe55a4a3e5e443c70d1ad679 to your computer and use it in GitHub Desktop.
Save eiso/7c46cc7afe55a4a3e5e443c70d1ad679 to your computer and use it in GitHub Desktop.
# srcd/engine
FROM ubuntu:16.04
RUN apt-get -y update
RUN apt-get -y upgrade
RUN apt-get -y install wget vim
WORKDIR sourced
RUN wget https://github.com/src-d/borges/releases/download/v0.8.3/borges_v0.8.3_linux_amd64.tar.gz
RUN tar -xvf borges_v0.8.3_linux_amd64.tar.gz
RUN cp borges_linux_amd64/borges /usr/local/bin
RUN wget https://gist.githubusercontent.com/eiso/160b28cb38fae87ad54cb4c8e812a265/raw/11f499c07cee464685796ea1c54da8dcdda22ef1/repos.txt
RUN borges pack --file=repos.txt --to=demo
RUN apt-get -y install python python3.5 python3-pip
RUN pip3 install --upgrade pip
RUN apt-get -y install openjdk-8-jre
ENV JAVA_HOME /usr/lib/jvm/java-8-openjdk-amd64
RUN wget -O spark-2.2.0-bin-hadoop2.7.tgz "https://www.apache.org/dyn/mirrors/mirrors.cgi?action=download&filename=spark/spark-2.2.0/spark-2.2.0-bin-hadoop2.7.tgz"
RUN tar -xvzf spark-2.2.0-bin-hadoop2.7.tgz
ENV PATH=${PATH}:/sourced/spark-2.2.0-bin-hadoop2.7/bin
ENV SPARK_HOME /sourced/spark-2.2.0-bin-hadoop2.7
ENV PYSPARK_PYTHON python3
RUN pip install sourced-engine
ENV SPARK_BBLFSH_HOST spark.tech.sourced.bblfsh.grpc.host
ENV SPARK_BBLFSH_PORT spark.tech.sourced.bblfsh.grpc.port
ENV BBLFSH_HOST bblfshd
ENV BBLFSH_PORT 9432
RUN printf "$SPARK_BBLFSH_HOST $BBLFSH_HOST\n$SPARK_BBLFSH_PORT $BBLFSH_PORT" > /sourced/spark-2.2.0-bin-hadoop2.7/conf/spark-defaults.conf
# Adding Jupyter Notebook support
RUN pip3 install jupyter
RUN pip3 install jupyter-spark \
&& jupyter serverextension enable --py jupyter_spark \
&& jupyter nbextension install --py jupyter_spark \
&& jupyter nbextension enable --py jupyter_spark \
&& jupyter nbextension enable --py widgetsnbextension
ENV ENGINE_VERSION 0.1.7
ENV PYSPARK_SUBMIT_ARGS --packages tech.sourced:engine:$ENGINE_VERSION pyspark-shell
# Need to use /bin since aliases don't work with `docker exec`
RUN echo '#! /bin/sh' >> /sourced/pyspark
RUN echo '/usr/local/bin/pyspark --packages "tech.sourced:engine:${ENGINE_VERSION}"' >> /sourced/pyspark
RUN chmod u+x /sourced/pyspark
RUN echo '#! /bin/sh' >> /sourced/spark-shell
RUN echo '/usr/local/bin/spark-shell --packages "tech.sourced:engine:${ENGINE_VERSION}"' >> /sourced/spark-shell
RUN chmod u+x /sourced/spark-shell
RUN echo '#! /bin/sh' >> /sourced/jupyter
RUN echo '/usr/local/bin/jupyter notebook --ip=0.0.0.0 --allow-root' >> /sourced/jupyter
RUN chmod u+x /sourced/jupyter
ENV PATH=/sourced:${PATH}
CMD borges pack --file=repos.txt --to=repositories && sleep infinity
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment