wget http://www.mirrorservice.org/sites/ftp.apache.org/flume/1.8.0/apache-flume-1.8.0-bin.tar.gz
tar xvf apache-flume-1.8.0-bin.tar.gz
cd apache-flume-1.8.0-bin/
准备配置文件:
cd conf/
cp flume-env.sh.template flume-env.sh
cp flume-conf.properties.template flume.conf
cd ..
设置环境变量:
echo "export PATH=`pwd`/bin:\$PATH" | sudo tee /etc/profile.d/flume.sh
source /etc/profile.d/flume.sh
修改配置 conf/flume.conf
:
agent1.channels.ch1.type = memory
agent1.sources.avro-source1.channels = ch1
agent1.sources.avro-source1.type = avro
agent1.sources.avro-source1.bind = 0.0.0.0
agent1.sources.avro-source1.port = 41414
agent1.sinks.log-sink1.channel = ch1
agent1.sinks.log-sink1.type = logger
agent1.channels = ch1
agent1.sources = avro-source1
agent1.sinks = log-sink1
启动服务:
flume-ng agent --conf ./conf/ -f conf/flume.conf -Dflume.root.logger=DEBUG,console -n agent1
用另一个终端启动客户端:
flume-ng avro-client --conf conf -H localhost -p 41414 -F /etc/passwd -Dflume.root.logger=DEBUG,console
输出如下,表示正常:
... Finished
... Closing reader
... Closing RPC client
... Exiting
修改配置 conf/flume.conf
:
agent1.channels.ch1.type = memory
agent1.sources.spooldir-source1.channels = ch1
agent1.sources.spooldir-source1.type = spooldir
agent1.sources.spooldir-source1.spoolDir=/home/vagrant/apps/apache-flume-1.8.0-bin/spool # 存放待上传文件
agent1.sources.spooldir-source1.bind = 0.0.0.0
agent1.sources.spooldir-source1.port = 41414
agent1.sinks.hdfs-sink1.channel = ch1
agent1.sinks.hdfs-sink1.type = hdfs
agent1.sinks.hdfs-sink1.hdfs.path = hdfs://localhost:9000/flume # HDFS 存放路径
agent1.sinks.hdfs-sink1.hdfs.filePrefix = test- # 文件加前缀
agent1.sinks.hdfs-sink1.hdfs.useLocalTimeStamp = true
agent1.sinks.hdfs-sink1.hdfs.round = true
agent1.sinks.hdfs-sink1.hdfs.roundValue = 10
agent1.channels = ch1
agent1.sources = spooldir-source1
agent1.sinks = hdfs-sink1
重启服务。
然后,将文件放到指定的 (spool) 目录中,将自动被检测到并上传,完成以后自动加上 .COMPLETED
后缀。
用浏览器打开 Hadoop 页面 http://localhost:9870/explorer.html#/flume 可以看到已上传的文件。
文件都被切成了小片,貌似还增加了副本。