Skip to content

Instantly share code, notes, and snippets.

@mik-laj
Last active October 12, 2020 15:11
Show Gist options
  • Save mik-laj/c090f51c970e4f6898e6fd986ea1e12e to your computer and use it in GitHub Desktop.
Save mik-laj/c090f51c970e4f6898e6fd986ea1e12e to your computer and use it in GitHub Desktop.
# Dockerfile with gcloud
FROM ${BASE_AIRFLOW_IMAGE}
RUN RUN curl https://sdk.cloud.google.com | bash \
&& echo "source /home/airflow/google-cloud-sdk/path.bash.inc" >> /home/airflow/.bashrc \
&& echo "source /home/airflow/google-cloud-sdk/completion.bash.inc" >> /home/airflow/.bashrc
# Dockerfile with Java
FROM ${BASE_AIRFLOW_IMAGE}
USER 0
# Install Java
RUN mkdir -pv /usr/share/man/man1 \
&& mkdir -pv /usr/share/man/man7 \
&& curl -fsSL https://adoptopenjdk.jfrog.io/adoptopenjdk/api/gpg/key/public | apt-key add - \
&& echo 'deb https://adoptopenjdk.jfrog.io/adoptopenjdk/deb/ buster main' > \
/etc/apt/sources.list.d/adoptopenjdk.list \
&& apt-get update \
&& apt-get install --no-install-recommends -y \
adoptopenjdk-8-hotspot-jre \
&& apt-get autoremove -yqq --purge \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*
ENV JAVA_HOME=/usr/lib/jvm/adoptopenjdk-8-hotspot-jre-amd64
RUN mkdir -p /opt/spark/jars
# Install Hadoop
ARG HADOOP_VERSION=2.9.2
ENV HADOOP_HOME=/opt/hadoop-$HADOOP_VERSION
RUN HADOOP_URL="https://www.apache.org/dist/hadoop/common/hadoop-$HADOOP_VERSION/hadoop-$HADOOP_VERSION.tar.gz" \
&& set -x \
&& curl https://dist.apache.org/repos/dist/release/hadoop/common/KEYS | gpg --import KEYS \
&& curl -fSL "$HADOOP_URL" -o /tmp/hadoop.tar.gz \
&& curl -fSL "$HADOOP_URL.asc" -o /tmp/hadoop.tar.gz.asc \
&& gpg --verify /tmp/hadoop.tar.gz.asc \
&& tar -xvf /tmp/hadoop.tar.gz -C /opt/ \
&& rm /tmp/hadoop.tar.gz /tmp/hadoop.tar.gz.asc \
&& ln -s "${HADOOP_HOME}/etc/hadoop" /etc/hadoop \
&& mkdir "${HADOOP_HOME}/logs" \
&& mkdir /hadoop-data
ENV HADOOP_CONF_DIR=/etc/hadoop
ENV MULTIHOMED_NETWORK=1
ENV USER=root
ENV PATH=$HADOOP_HOME/bin/:$PATH
# Install GCS connector for staging jars in GCS
ARG GCS_VARIANT="hadoop2"
ARG GCS_VERSION="2.1.5"
RUN GCS_JAR_PATH="/opt/spark/jars/gcs-connector-${GCS_VARIANT}-${GCS_VERSION}.jar" \
&& GCS_JAR_URL="https://storage.googleapis.com/hadoop-lib/gcs/gcs-connector-${GCS_VARIANT}-${GCS_VERSION}.jar" \
&& curl "${GCS_JAR_URL}" -o "${GCS_JAR_PATH}"
ENV HADOOP_CLASSPATH="/opt/spark/jars/gcs-connector-${GCS_VARIANT}-${GCS_VERSION}.jar:$HADOOP_CLASSPATH"
USER ${AIRFLOW_UID}
FROM ${BASE_AIRFLOW_IMAGE}
USER 0
RUN KUBECTL_VERSIPN="$(curl -s https://storage.googleapis.com/kubernetes-release/release/stable.txt)" \
&& KUBECTL_URL="https://storage.googleapis.com/kubernetes-release/release/${KUBECTL_VERSIPN}/bin/linux/amd64/kubectl" \
&& curl -L "${KUBECTL_URL}" --output /usr/local/bin/kubectl \
&& chmod +x /usr/local/bin/kubectl
USER ${AIRFLOW_UID}
@mik-laj
Copy link
Author

mik-laj commented Oct 9, 2020

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment