Created
March 29, 2018 19:56
-
-
Save campoy/d48de3f6aef356e641df36a1fea6a778 to your computer and use it in GitHub Desktop.
One liner gemini
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
FROM ubuntu:16.04 | |
# install java 8 and other tools | |
RUN apt-get update && apt-get install -y --no-install-recommends default-jdk curl git && \ | |
rm -rf /var/lib/apt/lists/* && \ | |
# needed to install cassandra | |
apt-get install -y --no-install-recommends curl && \ | |
echo "deb http://www.apache.org/dist/cassandra/debian 311x main" >> /etc/apt/sources.list.d/cassandra.sources.list && \ | |
curl https://www.apache.org/dist/cassandra/KEYS | apt-key add - && \ | |
apt-key adv --keyserver pool.sks-keyservers.net --recv-key A278B781FE4B2BDA && \ | |
apt-get update && apt-get install -y --no-install-recommends cassandra && \ | |
# install gemini | |
apt-get install -y --no-install-recommends git && \ | |
git clone https://github.com/src-d/gemini.git && \ | |
cd /gemini && ./sbt assembly && ./sbt package && \ | |
# install spark (needs to be 2.2.0) | |
apt-get install -y --no-install-recommends curl && \ | |
curl http://www.trieuvan.com/apache/spark/spark-2.2.0/spark-2.2.0-bin-hadoop2.7.tgz > /spark.tgz && \ | |
mkdir -p /spark && tar -xvzf /spark.tgz -C /spark --strip-components=1 && rm -f /spark.tgz && \ | |
apt-get remove git curl && rm -rf /var/lib/apt/lists/* | |
ENV SPARK_HOME=/spark | |
ADD startup.sh / | |
RUN chmod +x /startup.sh | |
CMD ["/startup.sh"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /bin/bash | |
# start cassandra as root (not recommended, but YOLO) | |
# logs available in /var/log/cassandra | |
cassandra -R &> /dev/null | |
# sleeping until cassandra is up | |
until cqlsh -e "describe tables;" &> /dev/null | |
do | |
echo "waiting for cassandra, sleeping for 5 seconds" | |
sleep 5 | |
done | |
# start hashing | |
cd gemini && /gemini/hash -h localhost /repositories | |
# finish reporting all duplicates across all repositories | |
/gemini/report |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment