Skip to content

Instantly share code, notes, and snippets.

@billmetangmo
Created February 5, 2018 13:49
Show Gist options
  • Save billmetangmo/ce5ae78bfe6833beb2eb04efaa67fa5f to your computer and use it in GitHub Desktop.
Save billmetangmo/ce5ae78bfe6833beb2eb04efaa67fa5f to your computer and use it in GitHub Desktop.
DockerFile Spark + RDMA from HiBD University of Ohio
FROM centos:latest
LABEL maintainer Bill METANGMO @billmetangmo github \
description="Optimized for HPC machine learning & Graph processing apps"
#dependencies="bash,procps,openjdk8-jre-base,openssh,ca-certificates"\
#external="scala"
# proxy are not defined as env variable as thi image would be used in client env where our proxy doesn't make sense
ARG http_proxy=
ARG https_proxy=
ARG no_proxy=localhost,127.0.0.1
ARG SCALA_VERSION=2.11.1
# install from lower subject to change to the most
WORKDIR /tmp
RUN yum --disablerepo=updates -y install libibverbs-utils libibverbs-devel libibverbs-devel-static libmlx4 libmlx5 ibutils libibcm libibcommon libibmad libibumad &&\
yum --disablerepo=updates -y install rdma librdmacm-utils librdmacm-devel librdmacm libibumad-devel &&\
yum --disablerepo=updates -y install wget &&\
yum clean all
ENV SCALA_HOME=/usr/local/share/scala\
SPARK_HOME=/usr/local/share/spark
################## SPARK-RDMA ###########################
RUN wget http://hibd.cse.ohio-state.edu/download/hibd/rdma-spark-0.9.4-bin.tar.gz && \
tar xzf rdma-spark-0.9.4-bin.tar.gz && \
rm -f "/tmp/rdma-spark-0.9.4-bin.tar.gz" &&\
mv /tmp/rdma-spark-0.9.4-bin "${SPARK_HOME}" &&\
rm "${SPARK_HOME}/bin/"*.cmd &&\
mv ${SPARK_HOME}/conf/slaves.template ${SPARK_HOME}/conf/slaves &&\
mv ${SPARK_HOME}/conf/spark-env.sh.template ${SPARK_HOME}/conf/spark-env.sh
################## SCALA ###########################
RUN wget "https://downloads.lightbend.com/scala/${SCALA_VERSION}/scala-${SCALA_VERSION}.tgz" && \
tar xzf "scala-${SCALA_VERSION}.tgz" && \
mkdir "${SCALA_HOME}" && \
rm "/tmp/scala-${SCALA_VERSION}/bin/"*.bat && \
mv "/tmp/scala-${SCALA_VERSION}/bin" "/tmp/scala-${SCALA_VERSION}/lib" "${SCALA_HOME}" && \
ln -s "${SCALA_HOME}/bin/"* "/usr/bin/" && \
rm -rf "/tmp/scala-${SCALA_VERSION}.tgz" &&\
rm -rf "/tmp/scala-${SCALA_VERSION}"
################## JAVA ###########################
RUN wget --no-cookies --no-check-certificate --header "Cookie: gpw_e24=http%3A%2F%2Fwww.oracle.com%2F; oraclelicense=accept-securebackup-cookie" \
"http://download.oracle.com/otn-pub/java/jdk/8u131-b11/d54c1d3a095b4ff2b6607d096fa80163/jre-8u131-linux-x64.rpm" &&\
yum -y localinstall jre-8u131-linux-x64.rpm &&\
rm -rf "/tmp/jre-8u131-linux-x64.rpm"
RUN sed -i -e "s/nohup --/nohup/g" ${SPARK_HOME}/sbin/spark-daemon.sh && \
echo "spark.ib.enabled true" >> ${SPARK_HOME}/conf/spark-defaults.conf && \
echo "hadoop.ib.enabled false" >> ${SPARK_HOME}/conf/spark-defaults.conf && \
echo "spark.executor.extraLibraryPath $SPARK_HOME/lib/native/Linux-amd64-64" >> ${SPARK_HOME}/conf/spark-defaults.conf && \
echo "spark.driver.extraLibraryPath $SPARK_HOME/lib/native/Linux-amd64-64" >> ${SPARK_HOME}/conf/spark-defaults.conf
WORKDIR /usr/local/share/spark
ENTRYPOINT /bin/bash
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment