Skip to content

Instantly share code, notes, and snippets.

@morgante
Last active August 22, 2016 07:36
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save morgante/4de89f7d5fb183857c150689c5671bba to your computer and use it in GitHub Desktop.
Save morgante/4de89f7d5fb183857c150689c5671bba to your computer and use it in GitHub Desktop.
Dockerfile
# Copyright (c) 2013-2016 Snowplow Analytics Ltd. All rights reserved.
#
# This program is licensed to you under the Apache License Version 2.0, and
# you may not use this file except in compliance with the Apache License
# Version 2.0. You may obtain a copy of the Apache License Version 2.0 at
# http://www.apache.org/licenses/LICENSE-2.0.
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the Apache License Version 2.0 is distributed on an "AS
# IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied. See the Apache License Version 2.0 for the specific language
# governing permissions and limitations there under.
# This file (application.conf.example) contains a template with
# configuration options for Stream Enrich.
enrich {
# Sources currently supported are:
# 'kinesis' for reading Thrift-serialized records from a Kinesis stream
# 'stdin' for reading Base64-encoded Thrift-serialized records from stdin
source = "kinesis"
# Sinks currently supported are:
# 'kinesis' for writing enriched events to one Kinesis stream and invalid events to another.
# 'stdouterr' for writing enriched events to stdout and invalid events to stderr.
# Using "sbt assembly" and "java -jar" is recommended to disable sbt
# logging.
sink = "kinesis"
# AWS credentials
#
# If both are set to 'default', use the default AWS credentials provider chain.
#
# If both are set to 'iam', use AWS IAM Roles to provision credentials.
#
# If both are set to 'env', use environment variables AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY
aws {
access-key: "env"
secret-key: "env"
}
streams {
in: {
raw: ${KINESIS_GOOD_STREAM}
# Maximum number of records to get from Kinesis per call to GetRecords
maxRecords: 10000
# After enrichment, are accumulated in a buffer before being sent to Kinesis.
# The buffer is emptied whenever:
# - the number of stored records reaches record-limit or
# - the combined size of the stored records reaches byte-limit or
# - the time in milliseconds since it was last emptied exceeds time-limit when
# a new event enters the buffer
buffer: {
byte-limit: 4500000 # 4.5MB
record-limit: 500 # 500 records
time-limit: 1000 # 1 second
}
}
out: {
enriched: ${KINESIS_GOOD_STREAM_ENRICHED}
bad: ${KINESIS_BAD_STREAM_ENRICHED}
# Minimum and maximum backoff periods
# - Units: Milliseconds
backoffPolicy: {
minBackoff: 3000 # 3 s
maxBackoff: 600000 # 5 min
}
}
# "app-name" is used for a DynamoDB table to maintain stream state.
# You can set it automatically using: "SnowplowKinesisEnrich-$\\{enrich.streams.in.raw\\}"
app-name: ${ENRICHER_APP_NAME}
# LATEST: most recent data.
# TRIM_HORIZON: oldest available data.
# Note: This only effects the first run of this application
# on a stream.
initial-position = "TRIM_HORIZON"
region: "us-east-1"
}
# Optional section for tracking endpoints
# monitoring {
# snowplow {
# collector-uri: "{{collectorUri}}"
# collector-port: 80
# app-id: "{{enrichAppName}}"
# method: "GET"
# }
# }
}
FROM williamyeh/sbt
RUN mkdir /var/app
WORKDIR /var/app
COPY project /var/app/project
COPY src /var/app/src
# Run the build
RUN sbt assembly
# Link the binary
RUN ln -s target/scala-2.10/snowplow-stream-enrich-0.8.1 enricher
# Include our config file
COPY application.conf .
COPY enrichments ./enrichments
COPY resolver.js .
# Specify the command
ENTRYPOINT ["./enricher"]
CMD ["--config", "application.conf", "--resolver", "file:resolver.js", "--enrichments", "file:enrichments"]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment