Skip to content

Instantly share code, notes, and snippets.

/flume.conf Secret

Created August 9, 2013 00:18
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save anonymous/9f88209d8ab9443aebe8 to your computer and use it in GitHub Desktop.
Save anonymous/9f88209d8ab9443aebe8 to your computer and use it in GitHub Desktop.
# DECLARE
agent.sources = ArchiveSource
agent.channels = ArchiveChannel
agent.sinks = ArchiveSink
# DEFINE SOURCES
agent.sources.ArchiveSource.type = spooldir
agent.sources.ArchiveSource.spoolDir = /data/1/DLStats_Flume/archive
agent.sources.ArchiveSource.batchSize = 5000
# DEFINE INTERCEPTORS
agent.sources.ArchiveSource.interceptors = ArchiveDateExtractorInterceptor
agent.sources.ArchiveSource.interceptors.ArchiveDateExtractorInterceptor.type = regex_extractor
agent.sources.ArchiveSource.interceptors.ArchiveDateExtractorInterceptor.regex = (\\d\\d/\\D\\D\\D/\\d\\d\\d\\d:\\d\\d:\\d\\d)
agent.sources.ArchiveSource.interceptors.ArchiveDateExtractorInterceptor.serializers = s1
agent.sources.ArchiveSource.interceptors.ArchiveDateExtractorInterceptor.serializers.s1.type = org.apache.flume.interceptor.RegexExtractorInterceptorMillisSerializer
agent.sources.ArchiveSource.interceptors.ArchiveDateExtractorInterceptor.serializers.s1.name = timestamp
agent.sources.ArchiveSource.interceptors.ArchiveDateExtractorInterceptor.serializers.s1.pattern = dd/MMM/yyyy:HH:mm
# DEFINE CHANNELS
agent.channels.ArchiveChannel.type = memory
agent.channels.ArchiveChannel.capacity = 1000000
agent.channels.ArchiveChannel.transactionCapacity = 10000
# DEFINE SINKS
agent.sinks.ArchiveSink.type = hdfs
agent.sinks.ArchiveSink.hdfs.path = hdfs://m0106.mtv.cloudera.com:8020/user/cops/DLStats/archive/%Y/%m/%d
agent.sinks.ArchiveSink.hdfs.inUsePrefix = .
agent.sinks.ArchiveSink.hdfs.fileType = DataStream
agent.sinks.ArchiveSink.hdfs.rollSize = 268435456
agent.sinks.ArchiveSink.hdfs.rollCount = 0
agent.sinks.ArchiveSink.hdfs.batchSize = 10000
agent.sinks.ArchiveSink.hdfs.idleTimeout = 600
# CONNECT
agent.sources.ArchiveSource.channels = ArchiveChannel
agent.sinks.ArchiveSink.channel = ArchiveChannel
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment