Skip to content

Instantly share code, notes, and snippets.

@ottomata
Created January 16, 2013 21:11
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save ottomata/4551001 to your computer and use it in GitHub Desktop.
Save ottomata/4551001 to your computer and use it in GitHub Desktop.
webrequest.channels = file-channel
webrequest.sources = udp2log
webrequest.sinks = hdfs-sink
# Channel which buffers events on disk
webrequest.channels.file-channel.type = file
webrequest.channels.file-channel.checkpointDir = /var/lib/hadoop/data/e/flume/file-channel/checkpoint
webrequest.channels.file-channel.dataDirs = /var/lib/hadoop/data/e/flume/file-channel/data
webrequest.channels.file-channel.checkpointInterval = 1000
# UDPSource Multicast (custom)
# See: https://issues.apache.org/jira/browse/FLUME-1838
webrequest.sources.udp2log.channels = file-channel
webrequest.sources.udp2log.type = org.apache.flume.source.udp.UDPSource
webrequest.sources.udp2log.host = 233.58.59.1
webrequest.sources.udp2log.port = 8420
webrequest.sources.udp2log.multicast = true
# timestamp interceptor extractor
webrequest.sources.udp2log.interceptors = request-timestamp
webrequest.sources.udp2log.interceptors.request-timestamp.type = regex_extractor
webrequest.sources.udp2log.interceptors.request-timestamp.regex = ^.+\\s\\d+\\s(\\d\\d\\d\\d-\\d\\d-\\d\\dT\\d\\d:\\d\\d:\\d\\d)
webrequest.sources.udp2log.interceptors.request-timestamp.serializers = s1
webrequest.sources.udp2log.interceptors.request-timestamp.serializers.s1.type = org.apache.flume.interceptor.RegexExtractorInterceptorMillisSerializer
webrequest.sources.udp2log.interceptors.request-timestamp.serializers.s1.name = timestamp
webrequest.sources.udp2log.interceptors.request-timestamp.serializers.s1.pattern = yyyy-MM-dd'T'HH:mm:ss
# hdfs sink
webrequest.sinks.hdfs-sink.channel = file-channel
webrequest.sinks.hdfs-sink.type = hdfs
webrequest.sinks.hdfs-sink.hdfs.path = /user/otto/tmp/flume/%Y-%m-%d/%H.%M.%S
webrequest.sinks.hdfs-sink.hdfs.filePrefix = webrequest
webrequest.sinks.hdfs-sink.hdfs.fileType = DataStream
webrequest.sinks.hdfs-sink.hdfs.round = true
webrequest.sinks.hdfs-sink.hdfs.roundValue = 15
webrequest.sinks.hdfs-sink.hdfs.roundUnit = minute
webrequest.sinks.hdfs-sink.hdfs.rollInterval = 60
webrequest.sinks.hdfs-sink.hdfs.rollCount = 0
webrequest.sinks.hdfs-sink.hdfs.rollSize = 0
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment