public
Created

  • Download Gist
flume.conf
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40
webrequest.channels = file-channel
webrequest.sources = udp2log
webrequest.sinks = hdfs-sink
 
# Channel which buffers events on disk
webrequest.channels.file-channel.type = file
webrequest.channels.file-channel.checkpointDir = /var/lib/hadoop/data/e/flume/file-channel/checkpoint
webrequest.channels.file-channel.dataDirs = /var/lib/hadoop/data/e/flume/file-channel/data
webrequest.channels.file-channel.checkpointInterval = 1000
 
 
# UDPSource Multicast (custom)
# See: https://issues.apache.org/jira/browse/FLUME-1838
webrequest.sources.udp2log.channels = file-channel
webrequest.sources.udp2log.type = org.apache.flume.source.udp.UDPSource
webrequest.sources.udp2log.host = 233.58.59.1
webrequest.sources.udp2log.port = 8420
webrequest.sources.udp2log.multicast = true
# timestamp interceptor extractor
webrequest.sources.udp2log.interceptors = request-timestamp
webrequest.sources.udp2log.interceptors.request-timestamp.type = regex_extractor
webrequest.sources.udp2log.interceptors.request-timestamp.regex = ^.+\\s\\d+\\s(\\d\\d\\d\\d-\\d\\d-\\d\\dT\\d\\d:\\d\\d:\\d\\d)
webrequest.sources.udp2log.interceptors.request-timestamp.serializers = s1
webrequest.sources.udp2log.interceptors.request-timestamp.serializers.s1.type = org.apache.flume.interceptor.RegexExtractorInterceptorMillisSerializer
webrequest.sources.udp2log.interceptors.request-timestamp.serializers.s1.name = timestamp
webrequest.sources.udp2log.interceptors.request-timestamp.serializers.s1.pattern = yyyy-MM-dd'T'HH:mm:ss
 
 
# hdfs sink
webrequest.sinks.hdfs-sink.channel = file-channel
webrequest.sinks.hdfs-sink.type = hdfs
webrequest.sinks.hdfs-sink.hdfs.path = /user/otto/tmp/flume/%Y-%m-%d/%H.%M.%S
webrequest.sinks.hdfs-sink.hdfs.filePrefix = webrequest
webrequest.sinks.hdfs-sink.hdfs.fileType = DataStream
webrequest.sinks.hdfs-sink.hdfs.round = true
webrequest.sinks.hdfs-sink.hdfs.roundValue = 15
webrequest.sinks.hdfs-sink.hdfs.roundUnit = minute
webrequest.sinks.hdfs-sink.hdfs.rollInterval = 60
webrequest.sinks.hdfs-sink.hdfs.rollCount = 0
webrequest.sinks.hdfs-sink.hdfs.rollSize = 0

Please sign in to comment on this gist.

Something went wrong with that request. Please try again.