Skip to content

Instantly share code, notes, and snippets.

@thiagarajan-n
Created December 15, 2018 00:59
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save thiagarajan-n/90d4d2800b6313732346127080367051 to your computer and use it in GitHub Desktop.
Save thiagarajan-n/90d4d2800b6313732346127080367051 to your computer and use it in GitHub Desktop.
#
# Copyright 2017 StreamSets Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
FROM alpine:3.6
LABEL maintainer="Adam Kunicki <adam@streamsets.com>"
# glibc installation courtesy https://github.com/jeanblanchard/docker-alpine-glibc
ENV GLIBC_VERSION 2.25-r0
# Download and install glibc
# Note: libidn is required as a workaround for addressing AWS Kinesis Producer issue (https://github.com/awslabs/amazon-kinesis-producer/issues/86)
RUN apk add --update curl && \
curl -Lo /etc/apk/keys/sgerrand.rsa.pub https://alpine-pkgs.sgerrand.com/sgerrand.rsa.pub && \
curl -Lo glibc.apk "https://github.com/sgerrand/alpine-pkg-glibc/releases/download/${GLIBC_VERSION}/glibc-${GLIBC_VERSION}.apk" && \
curl -Lo glibc-bin.apk "https://github.com/sgerrand/alpine-pkg-glibc/releases/download/${GLIBC_VERSION}/glibc-bin-${GLIBC_VERSION}.apk" && \
apk add glibc-bin.apk glibc.apk && \
/usr/glibc-compat/sbin/ldconfig /lib /usr/glibc-compat/lib && \
echo 'hosts: files mdns4_minimal [NOTFOUND=return] dns mdns4' >> /etc/nsswitch.conf && \
apk add libidn && \
apk del curl && \
rm -rf glibc.apk glibc-bin.apk /var/cache/apk/*
# JRE installation courtesy https://github.com/jeanblanchard/docker-java
# Java Version
ENV JAVA_VERSION_MAJOR 8
ENV JAVA_VERSION_MINOR 191
ENV JAVA_VERSION_BUILD 12
ENV JAVA_PACKAGE server-jre
ENV JAVA_SHA256_SUM 8d6ead9209fd2590f3a8778abbbea6a6b68e02b8a96500e2e77eabdbcaaebcae
ENV JAVA_URL_ELEMENT 2787e4a523244c269598db4e85c51e0c
# Download and unarchive Java
RUN apk add --update curl && \
mkdir -p /opt && \
curl -jkLH "Cookie: oraclelicense=accept-securebackup-cookie" -o java.tar.gz\
http://download.oracle.com/otn-pub/java/jdk/${JAVA_VERSION_MAJOR}u${JAVA_VERSION_MINOR}-b${JAVA_VERSION_BUILD}/${JAVA_URL_ELEMENT}/${JAVA_PACKAGE}-${JAVA_VERSION_MAJOR}u${JAVA_VERSION_MINOR}-linux-x64.tar.gz && \
echo "$JAVA_SHA256_SUM java.tar.gz" | sha256sum -c - && \
gunzip -c java.tar.gz | tar -xf - -C /opt && rm -f java.tar.gz && \
ln -s /opt/jdk1.${JAVA_VERSION_MAJOR}.0_${JAVA_VERSION_MINOR} /opt/jdk && \
apk del curl && \
rm -rf /var/cache/apk/*
# Set environment
ENV JAVA_HOME /opt/jdk
ENV PATH ${PATH}:${JAVA_HOME}/bin
# We set a UID/GID for the SDC user because certain test environments require these to be consistent throughout
# the cluster. We use 20159 because it's above the default value of YARN's min.user.id property.
ARG SDC_UID=20159
RUN apk --no-cache add bash \
curl \
krb5-libs \
krb5 \
libstdc++ \
libuuid \
sed
# Begin Data Collector installation
ARG SDC_VERSION=3.2.0.0-SNAPSHOT
ARG SDC_URL=https://archives.streamsets.com/datacollector/3.2.0.0/tarball/streamsets-datacollector-core-3.2.0.0.tgz
ARG SDC_USER=${SDC_UID}
# SDC_HOME is where executables and related files are installed. Used in setup_mapr script.
ARG SDC_HOME="/opt/streamsets-datacollector-${SDC_VERSION}"
# The paths below should generally be attached to a VOLUME for persistence.
# SDC_CONF is where configuration files are stored. This can be shared.
# SDC_DATA is a volume for storing collector state. Do not share this between containers.
# SDC_LOG is an optional volume for file based logs.
# SDC_RESOURCES is where resource files such as runtime:conf resources and Hadoop configuration can be placed.
# STREAMSETS_LIBRARIES_EXTRA_DIR is where extra libraries such as JDBC drivers should go.
# USER_LIBRARIES_DIR is where custom stage libraries are installed.
ENV SDC_CONF=/etc/sdc \
SDC_DATA=/data \
SDC_DIST=${SDC_HOME} \
SDC_HOME=${SDC_HOME} \
SDC_LOG=/logs \
SDC_RESOURCES=/resources \
USER_LIBRARIES_DIR=/opt/streamsets-datacollector-user-libs
ENV STREAMSETS_LIBRARIES_EXTRA_DIR="${SDC_DIST}/streamsets-libs-extras"
# Run the SDC configuration script.
COPY sdc-configure.sh *.tgz /tmp/
RUN /tmp/sdc-configure.sh
# Install any additional stage libraries if requested
ARG SDC_LIBS
RUN if [ -n "${SDC_LIBS}" ]; then "${SDC_DIST}/bin/streamsets" stagelibs -install="${SDC_LIBS}"; fi
# Copy files in $PROJECT_ROOT/resources dir to the SDC_RESOURCES dir.
COPY resources/ ${SDC_RESOURCES}/
RUN chgrp -R 0 ${SDC_RESOURCES} && \
chmod -R g=u ${SDC_RESOURCES}
# Copy local "sdc-extras" libs to STREAMSETS_LIBRARIES_EXTRA_DIR.
# Local files should be placed in appropriate stage lib subdirectories. For example
# to add a JDBC driver like my-jdbc.jar to the JDBC stage lib, the local file my-jdbc.jar
# should be at the location $PROJECT_ROOT/sdc-extras/streamsets-datacollector-jdbc-lib/lib/my-jdbc.jar
COPY sdc-extras/ ${STREAMSETS_LIBRARIES_EXTRA_DIR}/
RUN for i in \
"${SDC_DIST}/streamsets-libs" \
"${SDC_CONF}" \
"${SDC_DATA}" \
"${SDC_LOG}" \
"${SDC_RESOURCES}" \
"${STREAMSETS_LIBRARIES_EXTRA_DIR}" \
"${USER_LIBRARIES_DIR}"; \
do \
mkdir -p $i && chgrp -R 0 $i && chmod -R g=u $i; \
done
# RUN chgrp -R 0 /etc/sdc/form-realm.properties && \
# chmod -R g=u /etc/sdc/form-realm.properties
RUN chmod go-rwx /etc/sdc/form-realm.properties && \
chown -R root:root /etc/sdc/form-realm.properties
RUN chmod g=u /etc/passwd
ENTRYPOINT [ "uid_entrypoint" ]
USER ${SDC_USER}
EXPOSE 18630
#!/usr/bin/env bash
# Copyright 2017 StreamSets Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -e
set -x
# Check if SDC dist already exists, if not create its artifact of things.
if [ ! -d "${SDC_DIST}" ]; then
# Create SDC user and group.
addgroup -S -g "${SDC_UID}" "${SDC_USER}"
adduser -S -u "${SDC_UID}" -G "${SDC_USER}" "${SDC_USER}"
# Download and extract SDC.
for f in /tmp/*.tgz; do
[ -e "$f" ] && mv "$f" /tmp/sdc.tgz || curl -o /tmp/sdc.tgz -L "${SDC_URL}"
break
done
mkdir "${SDC_DIST}"
tar xzf /tmp/sdc.tgz --strip-components 1 -C "${SDC_DIST}"
rm -rf /tmp/sdc.tgz
# Move configuration to /etc/sdc
mv "${SDC_DIST}/etc" "${SDC_CONF}"
fi;
# Add logging to stdout to make logs visible through `docker logs`.
sed -i 's|INFO, streamsets|INFO, streamsets,stdout|' "${SDC_CONF}/sdc-log4j.properties"
# Workaround to address SDC-8005.
if [ -d "${SDC_DIST}/user-libs" ]; then
cp -R "${SDC_DIST}/user-libs" "${USER_LIBRARIES_DIR}"
fi
# Create necessary directories.
mkdir -p /mnt \
"${SDC_DATA}" \
"${SDC_LOG}" \
"${SDC_RESOURCES}" \
"${USER_LIBRARIES_DIR}"
# Update sdc-security.policy to include the custom stage library directory.
cat >> "${SDC_CONF}/sdc-security.policy" << EOF
// custom stage library directory
grant codebase "file:///opt/streamsets-datacollector-user-libs/-" {
permission java.security.AllPermission;
};
EOF
# Use short option -s as long option --status is not supported on alpine linux.
sed -i 's|--status|-s|' "${SDC_DIST}/libexec/_stagelibs"
# Setup filesystem permissions.
for i in \
"${SDC_DIST}/streamsets-libs" \
"${SDC_CONF}" \
"${SDC_DATA}" \
"${SDC_LOG}" \
"${SDC_RESOURCES}" \
"${STREAMSETS_LIBRARIES_EXTRA_DIR}" \
"${USER_LIBRARIES_DIR}"; \
do
chgrp -R 0 $i && chmod -R g=u $i
done
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment