Created
November 17, 2017 02:25
-
-
Save eiso/0e8aa2979a7dd6d38bf34c929a9418a6 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# srcd/engine | |
FROM ubuntu:16.04 | |
ENV ENGINE_VERSION 0.1.7 | |
RUN apt-get -y update && apt-get -y install wget vim git locales net-tools build-essential pkg-config cython3 python python3-dev python3-pip \ | |
libjpeg-turbo8-dev libpng-dev libfreetype6-dev libxft-dev libyaml-dev \ | |
libprotobuf-dev libsnappy-dev zlib1g-dev python3-cffi \ | |
curl libcairo2 gdb python3-dbg graphviz \ | |
lzop liblz4-dev pbzip2 liblzo2-dev \ | |
&& apt-get clean \ | |
&& locale-gen en_GB.UTF-8 && update-locale LANG=en_GB.UTF-8 | |
RUN pip3 install --upgrade pip | |
WORKDIR builds | |
# Install and run borges | |
RUN wget https://github.com/src-d/borges/releases/download/v0.8.3/borges_v0.8.3_linux_amd64.tar.gz && \ | |
tar -xvf borges_v0.8.3_linux_amd64.tar.gz && \ | |
cp borges_linux_amd64/borges /usr/local/bin | |
RUN wget https://gist.githubusercontent.com/eiso/160b28cb38fae87ad54cb4c8e812a265/raw/11f499c07cee464685796ea1c54da8dcdda22ef1/repos.txt | |
RUN borges pack --file=repos.txt --to=/builds/sample-repos | |
# Install Java and Spark | |
RUN apt-get -y install openjdk-8-jre | |
ENV JAVA_HOME /usr/lib/jvm/java-8-openjdk-amd64 | |
RUN wget -O spark-2.2.0-bin-hadoop2.7.tgz "https://www.apache.org/dyn/mirrors/mirrors.cgi?action=download&filename=spark/spark-2.2.0/spark-2.2.0-bin-hadoop2.7.tgz" | |
RUN tar -xvzf spark-2.2.0-bin-hadoop2.7.tgz | |
ENV PATH=${PATH}:/builds/spark-2.2.0-bin-hadoop2.7/bin | |
ENV SPARK_HOME /builds/spark-2.2.0-bin-hadoop2.7 | |
ENV PYSPARK_PYTHON python3 | |
# Git clone and install the engine | |
RUN git clone https://github.com/src-d/engine.git | |
WORKDIR /builds/engine | |
RUN git checkout tags/v$ENGINE_VERSION | |
RUN ./sbt publishLocal | |
WORKDIR /builds | |
RUN pip install sourced-engine==$ENGINE_VERSION | |
ENV SPARK_BBLFSH_HOST spark.tech.sourced.bblfsh.grpc.host | |
ENV SPARK_BBLFSH_PORT spark.tech.sourced.bblfsh.grpc.port | |
ENV BBLFSH_HOST bblfshd | |
ENV BBLFSH_PORT 9432 | |
RUN printf "$SPARK_BBLFSH_HOST $BBLFSH_HOST\n$SPARK_BBLFSH_PORT $BBLFSH_PORT" > /builds/spark-2.2.0-bin-hadoop2.7/conf/spark-defaults.conf | |
# Adding Jupyter Notebook support | |
RUN pip3 install jupyter | |
RUN pip3 install jupyter-spark \ | |
&& jupyter serverextension enable --py jupyter_spark \ | |
&& jupyter nbextension install --py jupyter_spark \ | |
&& jupyter nbextension enable --py jupyter_spark \ | |
&& jupyter nbextension enable --py widgetsnbextension | |
ENV PYSPARK_SUBMIT_ARGS --packages tech.sourced:engine:$ENGINE_VERSION pyspark-shell | |
# Adding data science standard packages | |
RUN pip3 install PyYAML \ | |
PyGitHub \ | |
requests \ | |
numpy \ | |
scipy \ | |
pandas \ | |
spacy \ | |
matplotlib \ | |
cairocffi \ | |
networkx \ | |
pydot \ | |
lz4 \ | |
python-lzo \ | |
protobuf \ | |
parquet | |
RUN python3 -c "import matplotlib; matplotlib.use('Agg'); import matplotlib.pyplot" | |
# Need to use /bin since aliases don't work with `docker exec` | |
RUN echo '#! /bin/sh' >> /builds/pyspark && \ | |
echo '/usr/local/bin/pyspark --packages "tech.sourced:engine:${ENGINE_VERSION}"' >> /builds/pyspark && \ | |
chmod u+x /builds/pyspark | |
RUN echo '#! /bin/sh' >> /builds/spark-shell && \ | |
echo '/usr/local/bin/spark-shell --packages "tech.sourced:engine:${ENGINE_VERSION}"' >> /builds/spark-shell && \ | |
chmod u+x /builds/spark-shell | |
RUN echo '#! /bin/sh' >> /builds/jupyter && \ | |
echo '/usr/local/bin/jupyter notebook --ip=0.0.0.0 --allow-root' >> /builds/jupyter && \ | |
chmod u+x /builds/jupyter | |
ENV PATH=/builds:${PATH} | |
WORKDIR /sourced | |
CMD mv /builds/repos.txt /sourced/sample-repos.txt && \ | |
mv /builds/sample-repos /sourced/ && \ | |
cp /builds/engine/_examples/notebooks/* /sourced/ && \ | |
borges pack --file=/sourced/repos.txt --to=/sourced/user-repos && \ | |
sleep infinity |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment