Created
December 15, 2018 00:59
-
-
Save thiagarajan-n/90d4d2800b6313732346127080367051 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# | |
# Copyright 2017 StreamSets Inc. | |
# | |
# Licensed under the Apache License, Version 2.0 (the "License"); | |
# you may not use this file except in compliance with the License. | |
# You may obtain a copy of the License at | |
# | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
# | |
# Unless required by applicable law or agreed to in writing, software | |
# distributed under the License is distributed on an "AS IS" BASIS, | |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
# See the License for the specific language governing permissions and | |
# limitations under the License. | |
# | |
FROM alpine:3.6 | |
LABEL maintainer="Adam Kunicki <adam@streamsets.com>" | |
# glibc installation courtesy https://github.com/jeanblanchard/docker-alpine-glibc | |
ENV GLIBC_VERSION 2.25-r0 | |
# Download and install glibc | |
# Note: libidn is required as a workaround for addressing AWS Kinesis Producer issue (https://github.com/awslabs/amazon-kinesis-producer/issues/86) | |
RUN apk add --update curl && \ | |
curl -Lo /etc/apk/keys/sgerrand.rsa.pub https://alpine-pkgs.sgerrand.com/sgerrand.rsa.pub && \ | |
curl -Lo glibc.apk "https://github.com/sgerrand/alpine-pkg-glibc/releases/download/${GLIBC_VERSION}/glibc-${GLIBC_VERSION}.apk" && \ | |
curl -Lo glibc-bin.apk "https://github.com/sgerrand/alpine-pkg-glibc/releases/download/${GLIBC_VERSION}/glibc-bin-${GLIBC_VERSION}.apk" && \ | |
apk add glibc-bin.apk glibc.apk && \ | |
/usr/glibc-compat/sbin/ldconfig /lib /usr/glibc-compat/lib && \ | |
echo 'hosts: files mdns4_minimal [NOTFOUND=return] dns mdns4' >> /etc/nsswitch.conf && \ | |
apk add libidn && \ | |
apk del curl && \ | |
rm -rf glibc.apk glibc-bin.apk /var/cache/apk/* | |
# JRE installation courtesy https://github.com/jeanblanchard/docker-java | |
# Java Version | |
ENV JAVA_VERSION_MAJOR 8 | |
ENV JAVA_VERSION_MINOR 191 | |
ENV JAVA_VERSION_BUILD 12 | |
ENV JAVA_PACKAGE server-jre | |
ENV JAVA_SHA256_SUM 8d6ead9209fd2590f3a8778abbbea6a6b68e02b8a96500e2e77eabdbcaaebcae | |
ENV JAVA_URL_ELEMENT 2787e4a523244c269598db4e85c51e0c | |
# Download and unarchive Java | |
RUN apk add --update curl && \ | |
mkdir -p /opt && \ | |
curl -jkLH "Cookie: oraclelicense=accept-securebackup-cookie" -o java.tar.gz\ | |
http://download.oracle.com/otn-pub/java/jdk/${JAVA_VERSION_MAJOR}u${JAVA_VERSION_MINOR}-b${JAVA_VERSION_BUILD}/${JAVA_URL_ELEMENT}/${JAVA_PACKAGE}-${JAVA_VERSION_MAJOR}u${JAVA_VERSION_MINOR}-linux-x64.tar.gz && \ | |
echo "$JAVA_SHA256_SUM java.tar.gz" | sha256sum -c - && \ | |
gunzip -c java.tar.gz | tar -xf - -C /opt && rm -f java.tar.gz && \ | |
ln -s /opt/jdk1.${JAVA_VERSION_MAJOR}.0_${JAVA_VERSION_MINOR} /opt/jdk && \ | |
apk del curl && \ | |
rm -rf /var/cache/apk/* | |
# Set environment | |
ENV JAVA_HOME /opt/jdk | |
ENV PATH ${PATH}:${JAVA_HOME}/bin | |
# We set a UID/GID for the SDC user because certain test environments require these to be consistent throughout | |
# the cluster. We use 20159 because it's above the default value of YARN's min.user.id property. | |
ARG SDC_UID=20159 | |
RUN apk --no-cache add bash \ | |
curl \ | |
krb5-libs \ | |
krb5 \ | |
libstdc++ \ | |
libuuid \ | |
sed | |
# Begin Data Collector installation | |
ARG SDC_VERSION=3.2.0.0-SNAPSHOT | |
ARG SDC_URL=https://archives.streamsets.com/datacollector/3.2.0.0/tarball/streamsets-datacollector-core-3.2.0.0.tgz | |
ARG SDC_USER=${SDC_UID} | |
# SDC_HOME is where executables and related files are installed. Used in setup_mapr script. | |
ARG SDC_HOME="/opt/streamsets-datacollector-${SDC_VERSION}" | |
# The paths below should generally be attached to a VOLUME for persistence. | |
# SDC_CONF is where configuration files are stored. This can be shared. | |
# SDC_DATA is a volume for storing collector state. Do not share this between containers. | |
# SDC_LOG is an optional volume for file based logs. | |
# SDC_RESOURCES is where resource files such as runtime:conf resources and Hadoop configuration can be placed. | |
# STREAMSETS_LIBRARIES_EXTRA_DIR is where extra libraries such as JDBC drivers should go. | |
# USER_LIBRARIES_DIR is where custom stage libraries are installed. | |
ENV SDC_CONF=/etc/sdc \ | |
SDC_DATA=/data \ | |
SDC_DIST=${SDC_HOME} \ | |
SDC_HOME=${SDC_HOME} \ | |
SDC_LOG=/logs \ | |
SDC_RESOURCES=/resources \ | |
USER_LIBRARIES_DIR=/opt/streamsets-datacollector-user-libs | |
ENV STREAMSETS_LIBRARIES_EXTRA_DIR="${SDC_DIST}/streamsets-libs-extras" | |
# Run the SDC configuration script. | |
COPY sdc-configure.sh *.tgz /tmp/ | |
RUN /tmp/sdc-configure.sh | |
# Install any additional stage libraries if requested | |
ARG SDC_LIBS | |
RUN if [ -n "${SDC_LIBS}" ]; then "${SDC_DIST}/bin/streamsets" stagelibs -install="${SDC_LIBS}"; fi | |
# Copy files in $PROJECT_ROOT/resources dir to the SDC_RESOURCES dir. | |
COPY resources/ ${SDC_RESOURCES}/ | |
RUN chgrp -R 0 ${SDC_RESOURCES} && \ | |
chmod -R g=u ${SDC_RESOURCES} | |
# Copy local "sdc-extras" libs to STREAMSETS_LIBRARIES_EXTRA_DIR. | |
# Local files should be placed in appropriate stage lib subdirectories. For example | |
# to add a JDBC driver like my-jdbc.jar to the JDBC stage lib, the local file my-jdbc.jar | |
# should be at the location $PROJECT_ROOT/sdc-extras/streamsets-datacollector-jdbc-lib/lib/my-jdbc.jar | |
COPY sdc-extras/ ${STREAMSETS_LIBRARIES_EXTRA_DIR}/ | |
RUN for i in \ | |
"${SDC_DIST}/streamsets-libs" \ | |
"${SDC_CONF}" \ | |
"${SDC_DATA}" \ | |
"${SDC_LOG}" \ | |
"${SDC_RESOURCES}" \ | |
"${STREAMSETS_LIBRARIES_EXTRA_DIR}" \ | |
"${USER_LIBRARIES_DIR}"; \ | |
do \ | |
mkdir -p $i && chgrp -R 0 $i && chmod -R g=u $i; \ | |
done | |
# RUN chgrp -R 0 /etc/sdc/form-realm.properties && \ | |
# chmod -R g=u /etc/sdc/form-realm.properties | |
RUN chmod go-rwx /etc/sdc/form-realm.properties && \ | |
chown -R root:root /etc/sdc/form-realm.properties | |
RUN chmod g=u /etc/passwd | |
ENTRYPOINT [ "uid_entrypoint" ] | |
USER ${SDC_USER} | |
EXPOSE 18630 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bash | |
# Copyright 2017 StreamSets Inc. | |
# | |
# Licensed under the Apache License, Version 2.0 (the "License"); | |
# you may not use this file except in compliance with the License. | |
# You may obtain a copy of the License at | |
# | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
# | |
# Unless required by applicable law or agreed to in writing, software | |
# distributed under the License is distributed on an "AS IS" BASIS, | |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
# See the License for the specific language governing permissions and | |
# limitations under the License. | |
set -e | |
set -x | |
# Check if SDC dist already exists, if not create its artifact of things. | |
if [ ! -d "${SDC_DIST}" ]; then | |
# Create SDC user and group. | |
addgroup -S -g "${SDC_UID}" "${SDC_USER}" | |
adduser -S -u "${SDC_UID}" -G "${SDC_USER}" "${SDC_USER}" | |
# Download and extract SDC. | |
for f in /tmp/*.tgz; do | |
[ -e "$f" ] && mv "$f" /tmp/sdc.tgz || curl -o /tmp/sdc.tgz -L "${SDC_URL}" | |
break | |
done | |
mkdir "${SDC_DIST}" | |
tar xzf /tmp/sdc.tgz --strip-components 1 -C "${SDC_DIST}" | |
rm -rf /tmp/sdc.tgz | |
# Move configuration to /etc/sdc | |
mv "${SDC_DIST}/etc" "${SDC_CONF}" | |
fi; | |
# Add logging to stdout to make logs visible through `docker logs`. | |
sed -i 's|INFO, streamsets|INFO, streamsets,stdout|' "${SDC_CONF}/sdc-log4j.properties" | |
# Workaround to address SDC-8005. | |
if [ -d "${SDC_DIST}/user-libs" ]; then | |
cp -R "${SDC_DIST}/user-libs" "${USER_LIBRARIES_DIR}" | |
fi | |
# Create necessary directories. | |
mkdir -p /mnt \ | |
"${SDC_DATA}" \ | |
"${SDC_LOG}" \ | |
"${SDC_RESOURCES}" \ | |
"${USER_LIBRARIES_DIR}" | |
# Update sdc-security.policy to include the custom stage library directory. | |
cat >> "${SDC_CONF}/sdc-security.policy" << EOF | |
// custom stage library directory | |
grant codebase "file:///opt/streamsets-datacollector-user-libs/-" { | |
permission java.security.AllPermission; | |
}; | |
EOF | |
# Use short option -s as long option --status is not supported on alpine linux. | |
sed -i 's|--status|-s|' "${SDC_DIST}/libexec/_stagelibs" | |
# Setup filesystem permissions. | |
for i in \ | |
"${SDC_DIST}/streamsets-libs" \ | |
"${SDC_CONF}" \ | |
"${SDC_DATA}" \ | |
"${SDC_LOG}" \ | |
"${SDC_RESOURCES}" \ | |
"${STREAMSETS_LIBRARIES_EXTRA_DIR}" \ | |
"${USER_LIBRARIES_DIR}"; \ | |
do | |
chgrp -R 0 $i && chmod -R g=u $i | |
done |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment