|
<?xml version="1.0" encoding="UTF-8" standalone="yes"?> |
|
<template> |
|
<description>This template generates messages, puts it to Kafka topic. Then another processor gets messages from |
|
Kafka and put it on HDFS. |
|
</description> |
|
<name>Kerberized Kafka and HDFS</name> |
|
<snippet> |
|
<connections> |
|
<id>2b93ffcd-0698-44a9-86f6-ce0ea6fc4145</id> |
|
<parentGroupId>3bdd324d-db87-4a21-8149-f88d7a46741e</parentGroupId> |
|
<backPressureDataSizeThreshold>0 MB</backPressureDataSizeThreshold> |
|
<backPressureObjectThreshold>0</backPressureObjectThreshold> |
|
<destination> |
|
<groupId>3bdd324d-db87-4a21-8149-f88d7a46741e</groupId> |
|
<id>dbbe5d57-471e-466c-a63e-3c8c018172b6</id> |
|
<type>PROCESSOR</type> |
|
</destination> |
|
<flowFileExpiration>0 sec</flowFileExpiration> |
|
<labelIndex>1</labelIndex> |
|
<name></name> |
|
<selectedRelationships>success</selectedRelationships> |
|
<source> |
|
<groupId>3bdd324d-db87-4a21-8149-f88d7a46741e</groupId> |
|
<id>80df14eb-d6f3-4edd-9ea5-6fc993ade36d</id> |
|
<type>PROCESSOR</type> |
|
</source> |
|
<zIndex>0</zIndex> |
|
</connections> |
|
<connections> |
|
<id>41bc9df4-a296-4481-a896-b4ea5b58dcf3</id> |
|
<parentGroupId>3bdd324d-db87-4a21-8149-f88d7a46741e</parentGroupId> |
|
<backPressureDataSizeThreshold>0 MB</backPressureDataSizeThreshold> |
|
<backPressureObjectThreshold>0</backPressureObjectThreshold> |
|
<destination> |
|
<groupId>3bdd324d-db87-4a21-8149-f88d7a46741e</groupId> |
|
<id>80df14eb-d6f3-4edd-9ea5-6fc993ade36d</id> |
|
<type>PROCESSOR</type> |
|
</destination> |
|
<flowFileExpiration>0 sec</flowFileExpiration> |
|
<labelIndex>1</labelIndex> |
|
<name></name> |
|
<selectedRelationships>success</selectedRelationships> |
|
<source> |
|
<groupId>3bdd324d-db87-4a21-8149-f88d7a46741e</groupId> |
|
<id>b2e49a74-9a08-4232-81d2-7e19fcea8591</id> |
|
<type>PROCESSOR</type> |
|
</source> |
|
<zIndex>0</zIndex> |
|
</connections> |
|
<connections> |
|
<id>ad52b0df-c120-4d62-bcac-bc33c3c2f801</id> |
|
<parentGroupId>3bdd324d-db87-4a21-8149-f88d7a46741e</parentGroupId> |
|
<backPressureDataSizeThreshold>0 MB</backPressureDataSizeThreshold> |
|
<backPressureObjectThreshold>0</backPressureObjectThreshold> |
|
<destination> |
|
<groupId>3bdd324d-db87-4a21-8149-f88d7a46741e</groupId> |
|
<id>8f833885-fb73-4bc7-a212-a33d06c2f8bf</id> |
|
<type>PROCESSOR</type> |
|
</destination> |
|
<flowFileExpiration>0 sec</flowFileExpiration> |
|
<labelIndex>1</labelIndex> |
|
<name></name> |
|
<selectedRelationships>success</selectedRelationships> |
|
<source> |
|
<groupId>3bdd324d-db87-4a21-8149-f88d7a46741e</groupId> |
|
<id>3fd8ca84-5c1d-41d9-9b4d-f2aa31e98db3</id> |
|
<type>PROCESSOR</type> |
|
</source> |
|
<zIndex>0</zIndex> |
|
</connections> |
|
<connections> |
|
<id>3a90a3c8-422a-44d5-8ccd-cc3365b989ec</id> |
|
<parentGroupId>3bdd324d-db87-4a21-8149-f88d7a46741e</parentGroupId> |
|
<backPressureDataSizeThreshold>0 MB</backPressureDataSizeThreshold> |
|
<backPressureObjectThreshold>0</backPressureObjectThreshold> |
|
<destination> |
|
<groupId>3bdd324d-db87-4a21-8149-f88d7a46741e</groupId> |
|
<id>3fd8ca84-5c1d-41d9-9b4d-f2aa31e98db3</id> |
|
<type>PROCESSOR</type> |
|
</destination> |
|
<flowFileExpiration>0 sec</flowFileExpiration> |
|
<labelIndex>1</labelIndex> |
|
<name></name> |
|
<selectedRelationships>success</selectedRelationships> |
|
<source> |
|
<groupId>3bdd324d-db87-4a21-8149-f88d7a46741e</groupId> |
|
<id>861ee80a-c91a-4a19-a53f-a2bc472a824e</id> |
|
<type>PROCESSOR</type> |
|
</source> |
|
<zIndex>0</zIndex> |
|
</connections> |
|
<processors> |
|
<id>b2e49a74-9a08-4232-81d2-7e19fcea8591</id> |
|
<parentGroupId>3bdd324d-db87-4a21-8149-f88d7a46741e</parentGroupId> |
|
<position> |
|
<x>713.5837199119459</x> |
|
<y>174.0684605996851</y> |
|
</position> |
|
<config> |
|
<bulletinLevel>WARN</bulletinLevel> |
|
<comments></comments> |
|
<concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount> |
|
<defaultConcurrentTasks> |
|
<entry> |
|
<key>TIMER_DRIVEN</key> |
|
<value>1</value> |
|
</entry> |
|
<entry> |
|
<key>EVENT_DRIVEN</key> |
|
<value>0</value> |
|
</entry> |
|
<entry> |
|
<key>CRON_DRIVEN</key> |
|
<value>1</value> |
|
</entry> |
|
</defaultConcurrentTasks> |
|
<defaultSchedulingPeriod> |
|
<entry> |
|
<key>TIMER_DRIVEN</key> |
|
<value>0 sec</value> |
|
</entry> |
|
<entry> |
|
<key>CRON_DRIVEN</key> |
|
<value>* * * * * ?</value> |
|
</entry> |
|
</defaultSchedulingPeriod> |
|
<descriptors> |
|
<entry> |
|
<key>ZooKeeper Connection String</key> |
|
<value> |
|
<description>The Connection String to use in order to connect to ZooKeeper. This is often a |
|
comma-separated list of <host>:<port> combinations. For example, |
|
host1:2181,host2:2181,host3:2188 |
|
</description> |
|
<displayName>ZooKeeper Connection String</displayName> |
|
<dynamic>false</dynamic> |
|
<name>ZooKeeper Connection String</name> |
|
<required>true</required> |
|
<sensitive>false</sensitive> |
|
<supportsEl>false</supportsEl> |
|
</value> |
|
</entry> |
|
<entry> |
|
<key>Topic Name</key> |
|
<value> |
|
<description>The Kafka Topic to pull messages from</description> |
|
<displayName>Topic Name</displayName> |
|
<dynamic>false</dynamic> |
|
<name>Topic Name</name> |
|
<required>true</required> |
|
<sensitive>false</sensitive> |
|
<supportsEl>false</supportsEl> |
|
</value> |
|
</entry> |
|
<entry> |
|
<key>Zookeeper Commit Frequency</key> |
|
<value> |
|
<defaultValue>60 secs</defaultValue> |
|
<description>Specifies how often to communicate with ZooKeeper to indicate which messages |
|
have been pulled. A longer time period will result in better overall performance but can |
|
result in more data duplication if a NiFi node is lost |
|
</description> |
|
<displayName>Zookeeper Commit Frequency</displayName> |
|
<dynamic>false</dynamic> |
|
<name>Zookeeper Commit Frequency</name> |
|
<required>true</required> |
|
<sensitive>false</sensitive> |
|
<supportsEl>false</supportsEl> |
|
</value> |
|
</entry> |
|
<entry> |
|
<key>Batch Size</key> |
|
<value> |
|
<defaultValue>1</defaultValue> |
|
<description>Specifies the maximum number of messages to combine into a single FlowFile. |
|
These messages will be concatenated together with the <Message Demarcator> string |
|
placed between the content of each message. If the messages from Kafka should not be |
|
concatenated together, leave this value at 1. |
|
</description> |
|
<displayName>Batch Size</displayName> |
|
<dynamic>false</dynamic> |
|
<name>Batch Size</name> |
|
<required>true</required> |
|
<sensitive>false</sensitive> |
|
<supportsEl>false</supportsEl> |
|
</value> |
|
</entry> |
|
<entry> |
|
<key>Message Demarcator</key> |
|
<value> |
|
<defaultValue>\n</defaultValue> |
|
<description>Specifies the characters to use in order to demarcate multiple messages from |
|
Kafka. If the <Batch Size> property is set to 1, this value is ignored. Otherwise, |
|
for each two subsequent messages in the batch, this value will be placed in between |
|
them. |
|
</description> |
|
<displayName>Message Demarcator</displayName> |
|
<dynamic>false</dynamic> |
|
<name>Message Demarcator</name> |
|
<required>true</required> |
|
<sensitive>false</sensitive> |
|
<supportsEl>false</supportsEl> |
|
</value> |
|
</entry> |
|
<entry> |
|
<key>Client Name</key> |
|
<value> |
|
<defaultValue>NiFi-b2e49a74-9a08-4232-81d2-7e19fcea8591</defaultValue> |
|
<description>Client Name to use when communicating with Kafka</description> |
|
<displayName>Client Name</displayName> |
|
<dynamic>false</dynamic> |
|
<name>Client Name</name> |
|
<required>true</required> |
|
<sensitive>false</sensitive> |
|
<supportsEl>false</supportsEl> |
|
</value> |
|
</entry> |
|
<entry> |
|
<key>Group ID</key> |
|
<value> |
|
<defaultValue>b2e49a74-9a08-4232-81d2-7e19fcea8591</defaultValue> |
|
<description>A Group ID is used to identify consumers that are within the same consumer |
|
group |
|
</description> |
|
<displayName>Group ID</displayName> |
|
<dynamic>false</dynamic> |
|
<name>Group ID</name> |
|
<required>true</required> |
|
<sensitive>false</sensitive> |
|
<supportsEl>false</supportsEl> |
|
</value> |
|
</entry> |
|
<entry> |
|
<key>Kafka Communications Timeout</key> |
|
<value> |
|
<defaultValue>30 secs</defaultValue> |
|
<description>The amount of time to wait for a response from Kafka before determining that |
|
there is a communications error |
|
</description> |
|
<displayName>Kafka Communications Timeout</displayName> |
|
<dynamic>false</dynamic> |
|
<name>Kafka Communications Timeout</name> |
|
<required>true</required> |
|
<sensitive>false</sensitive> |
|
<supportsEl>false</supportsEl> |
|
</value> |
|
</entry> |
|
<entry> |
|
<key>ZooKeeper Communications Timeout</key> |
|
<value> |
|
<defaultValue>30 secs</defaultValue> |
|
<description>The amount of time to wait for a response from ZooKeeper before determining |
|
that there is a communications error |
|
</description> |
|
<displayName>ZooKeeper Communications Timeout</displayName> |
|
<dynamic>false</dynamic> |
|
<name>ZooKeeper Communications Timeout</name> |
|
<required>true</required> |
|
<sensitive>false</sensitive> |
|
<supportsEl>false</supportsEl> |
|
</value> |
|
</entry> |
|
<entry> |
|
<key>Auto Offset Reset</key> |
|
<value> |
|
<allowableValues> |
|
<displayName>smallest</displayName> |
|
<value>smallest</value> |
|
</allowableValues> |
|
<allowableValues> |
|
<displayName>largest</displayName> |
|
<value>largest</value> |
|
</allowableValues> |
|
<defaultValue>largest</defaultValue> |
|
<description>Automatically reset the offset to the smallest or largest offset available on |
|
the broker |
|
</description> |
|
<displayName>Auto Offset Reset</displayName> |
|
<dynamic>false</dynamic> |
|
<name>Auto Offset Reset</name> |
|
<required>true</required> |
|
<sensitive>false</sensitive> |
|
<supportsEl>false</supportsEl> |
|
</value> |
|
</entry> |
|
<entry> |
|
<key>security.protocol</key> |
|
<value> |
|
<description>Specifies the value for 'security.protocol' Kafka Configuration.</description> |
|
<displayName>security.protocol</displayName> |
|
<dynamic>true</dynamic> |
|
<name>security.protocol</name> |
|
<required>false</required> |
|
<sensitive>false</sensitive> |
|
<supportsEl>false</supportsEl> |
|
</value> |
|
</entry> |
|
</descriptors> |
|
<lossTolerant>false</lossTolerant> |
|
<penaltyDuration>30 sec</penaltyDuration> |
|
<properties> |
|
<entry> |
|
<key>ZooKeeper Connection String</key> |
|
<value>sandbox.hortonworks.com:2181</value> |
|
</entry> |
|
<entry> |
|
<key>Topic Name</key> |
|
<value>test</value> |
|
</entry> |
|
<entry> |
|
<key>Zookeeper Commit Frequency</key> |
|
<value>60 secs</value> |
|
</entry> |
|
<entry> |
|
<key>Batch Size</key> |
|
<value>1</value> |
|
</entry> |
|
<entry> |
|
<key>Message Demarcator</key> |
|
<value>\n</value> |
|
</entry> |
|
<entry> |
|
<key>Client Name</key> |
|
<value>NiFi-b2e49a74-9a08-4232-81d2-7e19fcea8591</value> |
|
</entry> |
|
<entry> |
|
<key>Group ID</key> |
|
<value>b2e49a74-9a08-4232-81d2-7e19fcea8591</value> |
|
</entry> |
|
<entry> |
|
<key>Kafka Communications Timeout</key> |
|
<value>30 secs</value> |
|
</entry> |
|
<entry> |
|
<key>ZooKeeper Communications Timeout</key> |
|
<value>30 secs</value> |
|
</entry> |
|
<entry> |
|
<key>Auto Offset Reset</key> |
|
<value>largest</value> |
|
</entry> |
|
<entry> |
|
<key>security.protocol</key> |
|
<value>PLAINTEXTSASL</value> |
|
</entry> |
|
</properties> |
|
<runDurationMillis>0</runDurationMillis> |
|
<schedulingPeriod>0 sec</schedulingPeriod> |
|
<schedulingStrategy>TIMER_DRIVEN</schedulingStrategy> |
|
<yieldDuration>1 sec</yieldDuration> |
|
</config> |
|
<name>GetKafka</name> |
|
<relationships> |
|
<autoTerminate>false</autoTerminate> |
|
<description>All FlowFiles that are created are routed to this relationship</description> |
|
<name>success</name> |
|
</relationships> |
|
<state>STOPPED</state> |
|
<style/> |
|
<supportsEventDriven>false</supportsEventDriven> |
|
<supportsParallelProcessing>true</supportsParallelProcessing> |
|
<type>org.apache.nifi.processors.kafka.GetKafka</type> |
|
</processors> |
|
<processors> |
|
<id>8f833885-fb73-4bc7-a212-a33d06c2f8bf</id> |
|
<parentGroupId>3bdd324d-db87-4a21-8149-f88d7a46741e</parentGroupId> |
|
<position> |
|
<x>300.72454833984375</x> |
|
<y>362.5385437011719</y> |
|
</position> |
|
<config> |
|
<bulletinLevel>WARN</bulletinLevel> |
|
<comments></comments> |
|
<concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount> |
|
<defaultConcurrentTasks> |
|
<entry> |
|
<key>TIMER_DRIVEN</key> |
|
<value>1</value> |
|
</entry> |
|
<entry> |
|
<key>EVENT_DRIVEN</key> |
|
<value>0</value> |
|
</entry> |
|
<entry> |
|
<key>CRON_DRIVEN</key> |
|
<value>1</value> |
|
</entry> |
|
</defaultConcurrentTasks> |
|
<defaultSchedulingPeriod> |
|
<entry> |
|
<key>TIMER_DRIVEN</key> |
|
<value>0 sec</value> |
|
</entry> |
|
<entry> |
|
<key>CRON_DRIVEN</key> |
|
<value>* * * * * ?</value> |
|
</entry> |
|
</defaultSchedulingPeriod> |
|
<descriptors> |
|
<entry> |
|
<key>Known Brokers</key> |
|
<value> |
|
<description>A comma-separated list of known Kafka Brokers in the format <host>:<port></description> |
|
<displayName>Known Brokers</displayName> |
|
<dynamic>false</dynamic> |
|
<name>Known Brokers</name> |
|
<required>true</required> |
|
<sensitive>false</sensitive> |
|
<supportsEl>false</supportsEl> |
|
</value> |
|
</entry> |
|
<entry> |
|
<key>Topic Name</key> |
|
<value> |
|
<description>The Kafka Topic of interest</description> |
|
<displayName>Topic Name</displayName> |
|
<dynamic>false</dynamic> |
|
<name>Topic Name</name> |
|
<required>true</required> |
|
<sensitive>false</sensitive> |
|
<supportsEl>true</supportsEl> |
|
</value> |
|
</entry> |
|
<entry> |
|
<key>Partition Strategy</key> |
|
<value> |
|
<allowableValues> |
|
<description>Messages will be assigned partitions in a round-robin fashion, sending the |
|
first message to Partition 1, the next Partition to Partition 2, and so on, wrapping |
|
as necessary. |
|
</description> |
|
<displayName>Round Robin</displayName> |
|
<value>Round Robin</value> |
|
</allowableValues> |
|
<allowableValues> |
|
<description>Messages will be assigned to random partitions.</description> |
|
<displayName>Random</displayName> |
|
<value>Random Robin</value> |
|
</allowableValues> |
|
<allowableValues> |
|
<description>The <Partition> property will be used to determine the partition. All |
|
messages within the same FlowFile will be assigned to the same partition. |
|
</description> |
|
<displayName>User-Defined</displayName> |
|
<value>User-Defined</value> |
|
</allowableValues> |
|
<defaultValue>Round Robin</defaultValue> |
|
<description>Specifies how messages should be partitioned when sent to Kafka</description> |
|
<displayName>Partition Strategy</displayName> |
|
<dynamic>false</dynamic> |
|
<name>Partition Strategy</name> |
|
<required>true</required> |
|
<sensitive>false</sensitive> |
|
<supportsEl>false</supportsEl> |
|
</value> |
|
</entry> |
|
<entry> |
|
<key>Partition</key> |
|
<value> |
|
<description>Specifies which Kafka Partition to add the message to. If using a message |
|
delimiter, all messages in the same FlowFile will be sent to the same partition. If a |
|
partition is specified but is not valid, then all messages within the same FlowFile will |
|
use the same partition but it remains undefined which partition is used. |
|
</description> |
|
<displayName>Partition</displayName> |
|
<dynamic>false</dynamic> |
|
<name>Partition</name> |
|
<required>false</required> |
|
<sensitive>false</sensitive> |
|
<supportsEl>true</supportsEl> |
|
</value> |
|
</entry> |
|
<entry> |
|
<key>Kafka Key</key> |
|
<value> |
|
<description>The Key to use for the Message</description> |
|
<displayName>Kafka Key</displayName> |
|
<dynamic>false</dynamic> |
|
<name>Kafka Key</name> |
|
<required>false</required> |
|
<sensitive>false</sensitive> |
|
<supportsEl>true</supportsEl> |
|
</value> |
|
</entry> |
|
<entry> |
|
<key>Delivery Guarantee</key> |
|
<value> |
|
<allowableValues> |
|
<description>FlowFile will be routed to success after successfully writing the content |
|
to a Kafka node, without waiting for a response. This provides the best performance |
|
but may result in data loss. |
|
</description> |
|
<displayName>Best Effort</displayName> |
|
<value>0</value> |
|
</allowableValues> |
|
<allowableValues> |
|
<description>FlowFile will be routed to success if the message is received by a single |
|
Kafka node, whether or not it is replicated. This is faster than <Guarantee |
|
Replicated Delivery> but can result in data loss if a Kafka node crashes |
|
</description> |
|
<displayName>Guarantee Single Node Delivery</displayName> |
|
<value>1</value> |
|
</allowableValues> |
|
<allowableValues> |
|
<description>FlowFile will be routed to failure unless the message is replicated to the |
|
appropriate number of Kafka Nodes according to the Topic configuration |
|
</description> |
|
<displayName>Guarantee Replicated Delivery</displayName> |
|
<value>all</value> |
|
</allowableValues> |
|
<defaultValue>0</defaultValue> |
|
<description>Specifies the requirement for guaranteeing that a message is sent to Kafka |
|
</description> |
|
<displayName>Delivery Guarantee</displayName> |
|
<dynamic>false</dynamic> |
|
<name>Delivery Guarantee</name> |
|
<required>true</required> |
|
<sensitive>false</sensitive> |
|
<supportsEl>false</supportsEl> |
|
</value> |
|
</entry> |
|
<entry> |
|
<key>Message Delimiter</key> |
|
<value> |
|
<description>Specifies the delimiter (interpreted in its UTF-8 byte representation) to use |
|
for splitting apart multiple messages within a single FlowFile. If not specified, the |
|
entire content of the FlowFile will be used as a single message. If specified, the |
|
contents of the FlowFile will be split on this delimiter and each section sent as a |
|
separate Kafka message. Note that if messages are delimited and some messages for a |
|
given FlowFile are transferred successfully while others are not, the messages will be |
|
split into individual FlowFiles, such that those messages that were successfully sent |
|
are routed to the 'success' relationship while other messages are sent to the 'failure' |
|
relationship. |
|
</description> |
|
<displayName>Message Delimiter</displayName> |
|
<dynamic>false</dynamic> |
|
<name>Message Delimiter</name> |
|
<required>false</required> |
|
<sensitive>false</sensitive> |
|
<supportsEl>true</supportsEl> |
|
</value> |
|
</entry> |
|
<entry> |
|
<key>Max Buffer Size</key> |
|
<value> |
|
<defaultValue>5 MB</defaultValue> |
|
<description>The maximum amount of data to buffer in memory before sending to Kafka |
|
</description> |
|
<displayName>Max Buffer Size</displayName> |
|
<dynamic>false</dynamic> |
|
<name>Max Buffer Size</name> |
|
<required>true</required> |
|
<sensitive>false</sensitive> |
|
<supportsEl>false</supportsEl> |
|
</value> |
|
</entry> |
|
<entry> |
|
<key>Max Record Size</key> |
|
<value> |
|
<defaultValue>1 MB</defaultValue> |
|
<description>The maximum size that any individual record can be.</description> |
|
<displayName>Max Record Size</displayName> |
|
<dynamic>false</dynamic> |
|
<name>Max Record Size</name> |
|
<required>true</required> |
|
<sensitive>false</sensitive> |
|
<supportsEl>false</supportsEl> |
|
</value> |
|
</entry> |
|
<entry> |
|
<key>Communications Timeout</key> |
|
<value> |
|
<defaultValue>30 secs</defaultValue> |
|
<description>The amount of time to wait for a response from Kafka before determining that |
|
there is a communications error |
|
</description> |
|
<displayName>Communications Timeout</displayName> |
|
<dynamic>false</dynamic> |
|
<name>Communications Timeout</name> |
|
<required>true</required> |
|
<sensitive>false</sensitive> |
|
<supportsEl>false</supportsEl> |
|
</value> |
|
</entry> |
|
<entry> |
|
<key>Async Batch Size</key> |
|
<value> |
|
<defaultValue>200</defaultValue> |
|
<description>The number of messages to send in one batch. The producer will wait until |
|
either this number of messages are ready to send or "Queue Buffering Max Time" |
|
is reached. NOTE: This property will be ignored unless the 'Message Delimiter' property |
|
is specified. |
|
</description> |
|
<displayName>Batch Size</displayName> |
|
<dynamic>false</dynamic> |
|
<name>Async Batch Size</name> |
|
<required>true</required> |
|
<sensitive>false</sensitive> |
|
<supportsEl>false</supportsEl> |
|
</value> |
|
</entry> |
|
<entry> |
|
<key>Queue Buffering Max Time</key> |
|
<value> |
|
<defaultValue>5 secs</defaultValue> |
|
<description>Maximum time to buffer data before sending to Kafka. For example a setting of |
|
100 ms will try to batch together 100 milliseconds' worth of messages to send at once. |
|
This will improve throughput but adds message delivery latency due to the buffering. |
|
</description> |
|
<displayName>Queue Buffering Max Time</displayName> |
|
<dynamic>false</dynamic> |
|
<name>Queue Buffering Max Time</name> |
|
<required>true</required> |
|
<sensitive>false</sensitive> |
|
<supportsEl>false</supportsEl> |
|
</value> |
|
</entry> |
|
<entry> |
|
<key>Compression Codec</key> |
|
<value> |
|
<allowableValues> |
|
<description>Compression will not be used for any topic.</description> |
|
<displayName>None</displayName> |
|
<value>none</value> |
|
</allowableValues> |
|
<allowableValues> |
|
<description>Compress messages using GZIP</description> |
|
<displayName>GZIP</displayName> |
|
<value>gzip</value> |
|
</allowableValues> |
|
<allowableValues> |
|
<description>Compress messages using Snappy</description> |
|
<displayName>Snappy</displayName> |
|
<value>snappy</value> |
|
</allowableValues> |
|
<defaultValue>none</defaultValue> |
|
<description>This parameter allows you to specify the compression codec for all data |
|
generated by this producer. |
|
</description> |
|
<displayName>Compression Codec</displayName> |
|
<dynamic>false</dynamic> |
|
<name>Compression Codec</name> |
|
<required>true</required> |
|
<sensitive>false</sensitive> |
|
<supportsEl>false</supportsEl> |
|
</value> |
|
</entry> |
|
<entry> |
|
<key>Client Name</key> |
|
<value> |
|
<description>Client Name to use when communicating with Kafka</description> |
|
<displayName>Client Name</displayName> |
|
<dynamic>false</dynamic> |
|
<name>Client Name</name> |
|
<required>true</required> |
|
<sensitive>false</sensitive> |
|
<supportsEl>false</supportsEl> |
|
</value> |
|
</entry> |
|
<entry> |
|
<key>security.protocol</key> |
|
<value> |
|
<description>Specifies the value for 'security.protocol' Kafka Configuration.</description> |
|
<displayName>security.protocol</displayName> |
|
<dynamic>true</dynamic> |
|
<name>security.protocol</name> |
|
<required>false</required> |
|
<sensitive>false</sensitive> |
|
<supportsEl>false</supportsEl> |
|
</value> |
|
</entry> |
|
</descriptors> |
|
<lossTolerant>false</lossTolerant> |
|
<penaltyDuration>30 sec</penaltyDuration> |
|
<properties> |
|
<entry> |
|
<key>Known Brokers</key> |
|
<value>sandbox.hortonworks.com:6667</value> |
|
</entry> |
|
<entry> |
|
<key>Topic Name</key> |
|
<value>test</value> |
|
</entry> |
|
<entry> |
|
<key>Partition Strategy</key> |
|
<value>Round Robin</value> |
|
</entry> |
|
<entry> |
|
<key>Partition</key> |
|
</entry> |
|
<entry> |
|
<key>Kafka Key</key> |
|
</entry> |
|
<entry> |
|
<key>Delivery Guarantee</key> |
|
<value>0</value> |
|
</entry> |
|
<entry> |
|
<key>Message Delimiter</key> |
|
</entry> |
|
<entry> |
|
<key>Max Buffer Size</key> |
|
<value>5 MB</value> |
|
</entry> |
|
<entry> |
|
<key>Max Record Size</key> |
|
<value>1 MB</value> |
|
</entry> |
|
<entry> |
|
<key>Communications Timeout</key> |
|
<value>30 secs</value> |
|
</entry> |
|
<entry> |
|
<key>Async Batch Size</key> |
|
<value>200</value> |
|
</entry> |
|
<entry> |
|
<key>Queue Buffering Max Time</key> |
|
<value>5 secs</value> |
|
</entry> |
|
<entry> |
|
<key>Compression Codec</key> |
|
<value>none</value> |
|
</entry> |
|
<entry> |
|
<key>Client Name</key> |
|
<value>nifi</value> |
|
</entry> |
|
<entry> |
|
<key>security.protocol</key> |
|
<value>PLAINTEXTSASL</value> |
|
</entry> |
|
</properties> |
|
<runDurationMillis>0</runDurationMillis> |
|
<schedulingPeriod>0 sec</schedulingPeriod> |
|
<schedulingStrategy>TIMER_DRIVEN</schedulingStrategy> |
|
<yieldDuration>1 sec</yieldDuration> |
|
</config> |
|
<name>PutKafka</name> |
|
<relationships> |
|
<autoTerminate>true</autoTerminate> |
|
<description>Any FlowFile that cannot be sent to Kafka will be routed to this Relationship</description> |
|
<name>failure</name> |
|
</relationships> |
|
<relationships> |
|
<autoTerminate>true</autoTerminate> |
|
<description>Any FlowFile that is successfully sent to Kafka will be routed to this Relationship |
|
</description> |
|
<name>success</name> |
|
</relationships> |
|
<state>STOPPED</state> |
|
<style/> |
|
<supportsEventDriven>false</supportsEventDriven> |
|
<supportsParallelProcessing>true</supportsParallelProcessing> |
|
<type>org.apache.nifi.processors.kafka.PutKafka</type> |
|
</processors> |
|
<processors> |
|
<id>3fd8ca84-5c1d-41d9-9b4d-f2aa31e98db3</id> |
|
<parentGroupId>3bdd324d-db87-4a21-8149-f88d7a46741e</parentGroupId> |
|
<position> |
|
<x>298.3679766956599</x> |
|
<y>158.79279314187158</y> |
|
</position> |
|
<config> |
|
<bulletinLevel>WARN</bulletinLevel> |
|
<comments></comments> |
|
<concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount> |
|
<defaultConcurrentTasks> |
|
<entry> |
|
<key>TIMER_DRIVEN</key> |
|
<value>1</value> |
|
</entry> |
|
<entry> |
|
<key>EVENT_DRIVEN</key> |
|
<value>0</value> |
|
</entry> |
|
<entry> |
|
<key>CRON_DRIVEN</key> |
|
<value>1</value> |
|
</entry> |
|
</defaultConcurrentTasks> |
|
<defaultSchedulingPeriod> |
|
<entry> |
|
<key>TIMER_DRIVEN</key> |
|
<value>0 sec</value> |
|
</entry> |
|
<entry> |
|
<key>CRON_DRIVEN</key> |
|
<value>* * * * * ?</value> |
|
</entry> |
|
</defaultSchedulingPeriod> |
|
<descriptors> |
|
<entry> |
|
<key>Regular Expression</key> |
|
<value> |
|
<defaultValue>(?s:^.*$)</defaultValue> |
|
<description>The Search Value to search for in the FlowFile content. Only used for 'Literal |
|
Replace' and 'Regex Replace' matching strategies |
|
</description> |
|
<displayName>Search Value</displayName> |
|
<dynamic>false</dynamic> |
|
<name>Regular Expression</name> |
|
<required>true</required> |
|
<sensitive>false</sensitive> |
|
<supportsEl>true</supportsEl> |
|
</value> |
|
</entry> |
|
<entry> |
|
<key>Replacement Value</key> |
|
<value> |
|
<defaultValue>$1</defaultValue> |
|
<description>The value to insert using the 'Replacement Strategy'. Using "Regex Replace" |
|
back-references to Regular Expression capturing groups are supported, but |
|
back-references that reference capturing groups that do not exist in the regular |
|
expression will be treated as literal value. Back References may also be referenced |
|
using the Expression Language, as '$1', '$2', etc. The single-tick marks MUST be |
|
included, as these variables are not "Standard" attribute names (attribute |
|
names must be quoted unless they contain only numbers, letters, and _). |
|
</description> |
|
<displayName>Replacement Value</displayName> |
|
<dynamic>false</dynamic> |
|
<name>Replacement Value</name> |
|
<required>true</required> |
|
<sensitive>false</sensitive> |
|
<supportsEl>true</supportsEl> |
|
</value> |
|
</entry> |
|
<entry> |
|
<key>Character Set</key> |
|
<value> |
|
<defaultValue>UTF-8</defaultValue> |
|
<description>The Character Set in which the file is encoded</description> |
|
<displayName>Character Set</displayName> |
|
<dynamic>false</dynamic> |
|
<name>Character Set</name> |
|
<required>true</required> |
|
<sensitive>false</sensitive> |
|
<supportsEl>false</supportsEl> |
|
</value> |
|
</entry> |
|
<entry> |
|
<key>Maximum Buffer Size</key> |
|
<value> |
|
<defaultValue>1 MB</defaultValue> |
|
<description>Specifies the maximum amount of data to buffer (per file or per line, depending |
|
on the Evaluation Mode) in order to apply the replacement. If 'Entire Text' (in |
|
Evaluation Mode) is selected and the FlowFile is larger than this value, the FlowFile |
|
will be routed to 'failure'. In 'Line-by-Line' Mode, if a single line is larger than |
|
this value, the FlowFile will be routed to 'failure'. A default value of 1 MB is |
|
provided, primarily for 'Entire Text' mode. In 'Line-by-Line' Mode, a value such as 8 KB |
|
or 16 KB is suggested. This value is ignored if the <Replacement Strategy> |
|
property is set to one of: Append, Prepend, Always Replace |
|
</description> |
|
<displayName>Maximum Buffer Size</displayName> |
|
<dynamic>false</dynamic> |
|
<name>Maximum Buffer Size</name> |
|
<required>true</required> |
|
<sensitive>false</sensitive> |
|
<supportsEl>false</supportsEl> |
|
</value> |
|
</entry> |
|
<entry> |
|
<key>Replacement Strategy</key> |
|
<value> |
|
<allowableValues> |
|
<description>Insert the Replacement Value at the beginning of the FlowFile or the |
|
beginning of each line (depending on the Evaluation Mode). For "Line-by-Line" |
|
Evaluation Mode, the value will be prepended to each line. For "Entire Text" |
|
evaluation mode, the value will be prepended to the entire text. |
|
</description> |
|
<displayName>Prepend</displayName> |
|
<value>Prepend</value> |
|
</allowableValues> |
|
<allowableValues> |
|
<description>Insert the Replacement Value at the end of the FlowFile or the end of each |
|
line (depending on the Evluation Mode). For "Line-by-Line" Evaluation |
|
Mode, the value will be appended to each line. For "Entire Text" |
|
evaluation mode, the value will be appended to the entire text. |
|
</description> |
|
<displayName>Append</displayName> |
|
<value>Append</value> |
|
</allowableValues> |
|
<allowableValues> |
|
<description>Interpret the Search Value as a Regular Expression and replace all matches |
|
with the Replacement Value. The Replacement Value may reference Capturing Groups |
|
used in the Search Value by using a dollar-sign followed by the Capturing Group |
|
number, such as $1 or $2. If the Search Value is set to .* then everything is |
|
replaced without even evaluating the Regular Expression. |
|
</description> |
|
<displayName>Regex Replace</displayName> |
|
<value>Regex Replace</value> |
|
</allowableValues> |
|
<allowableValues> |
|
<description>Search for all instances of the Search Value and replace the matches with |
|
the Replacement Value. |
|
</description> |
|
<displayName>Literal Replace</displayName> |
|
<value>Literal Replace</value> |
|
</allowableValues> |
|
<allowableValues> |
|
<description>Always replaces the entire line or the entire contents of the FlowFile |
|
(depending on the value of the <Evaluation Mode> property) and does not bother |
|
searching for any value. When this strategy is chosen, the <Search Value> |
|
property is ignored. |
|
</description> |
|
<displayName>Always Replace</displayName> |
|
<value>Always Replace</value> |
|
</allowableValues> |
|
<defaultValue>Regex Replace</defaultValue> |
|
<description>The strategy for how and what to replace within the FlowFile's text content. |
|
</description> |
|
<displayName>Replacement Strategy</displayName> |
|
<dynamic>false</dynamic> |
|
<name>Replacement Strategy</name> |
|
<required>true</required> |
|
<sensitive>false</sensitive> |
|
<supportsEl>false</supportsEl> |
|
</value> |
|
</entry> |
|
<entry> |
|
<key>Evaluation Mode</key> |
|
<value> |
|
<allowableValues> |
|
<displayName>Line-by-Line</displayName> |
|
<value>Line-by-Line</value> |
|
</allowableValues> |
|
<allowableValues> |
|
<displayName>Entire text</displayName> |
|
<value>Entire text</value> |
|
</allowableValues> |
|
<defaultValue>Entire text</defaultValue> |
|
<description>Run the 'Replacement Strategy' against each line separately (Line-by-Line) or |
|
buffer the entire file into memory (Entire Text) and run against that. |
|
</description> |
|
<displayName>Evaluation Mode</displayName> |
|
<dynamic>false</dynamic> |
|
<name>Evaluation Mode</name> |
|
<required>true</required> |
|
<sensitive>false</sensitive> |
|
<supportsEl>false</supportsEl> |
|
</value> |
|
</entry> |
|
</descriptors> |
|
<lossTolerant>false</lossTolerant> |
|
<penaltyDuration>30 sec</penaltyDuration> |
|
<properties> |
|
<entry> |
|
<key>Regular Expression</key> |
|
<value>(^.*$)</value> |
|
</entry> |
|
<entry> |
|
<key>Replacement Value</key> |
|
<value>Message from NiFi ${now()}</value> |
|
</entry> |
|
<entry> |
|
<key>Character Set</key> |
|
<value>UTF-8</value> |
|
</entry> |
|
<entry> |
|
<key>Maximum Buffer Size</key> |
|
<value>1 MB</value> |
|
</entry> |
|
<entry> |
|
<key>Replacement Strategy</key> |
|
<value>Regex Replace</value> |
|
</entry> |
|
<entry> |
|
<key>Evaluation Mode</key> |
|
<value>Entire text</value> |
|
</entry> |
|
</properties> |
|
<runDurationMillis>0</runDurationMillis> |
|
<schedulingPeriod>0 sec</schedulingPeriod> |
|
<schedulingStrategy>TIMER_DRIVEN</schedulingStrategy> |
|
<yieldDuration>1 sec</yieldDuration> |
|
</config> |
|
<name>ReplaceText</name> |
|
<relationships> |
|
<autoTerminate>true</autoTerminate> |
|
<description>FlowFiles that could not be updated are routed to this relationship</description> |
|
<name>failure</name> |
|
</relationships> |
|
<relationships> |
|
<autoTerminate>false</autoTerminate> |
|
<description>FlowFiles that have been successfully processed are routed to this relationship. This |
|
includes both FlowFiles that had text replaced and those that did not. |
|
</description> |
|
<name>success</name> |
|
</relationships> |
|
<state>STOPPED</state> |
|
<style/> |
|
<supportsEventDriven>true</supportsEventDriven> |
|
<supportsParallelProcessing>true</supportsParallelProcessing> |
|
<type>org.apache.nifi.processors.standard.ReplaceText</type> |
|
</processors> |
|
<processors> |
|
<id>861ee80a-c91a-4a19-a53f-a2bc472a824e</id> |
|
<parentGroupId>3bdd324d-db87-4a21-8149-f88d7a46741e</parentGroupId> |
|
<position> |
|
<x>300.12060546875</x> |
|
<y>-21.01484227180481</y> |
|
</position> |
|
<config> |
|
<bulletinLevel>WARN</bulletinLevel> |
|
<comments></comments> |
|
<concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount> |
|
<defaultConcurrentTasks> |
|
<entry> |
|
<key>TIMER_DRIVEN</key> |
|
<value>1</value> |
|
</entry> |
|
<entry> |
|
<key>EVENT_DRIVEN</key> |
|
<value>0</value> |
|
</entry> |
|
<entry> |
|
<key>CRON_DRIVEN</key> |
|
<value>1</value> |
|
</entry> |
|
</defaultConcurrentTasks> |
|
<defaultSchedulingPeriod> |
|
<entry> |
|
<key>TIMER_DRIVEN</key> |
|
<value>0 sec</value> |
|
</entry> |
|
<entry> |
|
<key>CRON_DRIVEN</key> |
|
<value>* * * * * ?</value> |
|
</entry> |
|
</defaultSchedulingPeriod> |
|
<descriptors> |
|
<entry> |
|
<key>File Size</key> |
|
<value> |
|
<description>The size of the file that will be used</description> |
|
<displayName>File Size</displayName> |
|
<dynamic>false</dynamic> |
|
<name>File Size</name> |
|
<required>true</required> |
|
<sensitive>false</sensitive> |
|
<supportsEl>false</supportsEl> |
|
</value> |
|
</entry> |
|
<entry> |
|
<key>Batch Size</key> |
|
<value> |
|
<defaultValue>1</defaultValue> |
|
<description>The number of FlowFiles to be transferred in each invocation</description> |
|
<displayName>Batch Size</displayName> |
|
<dynamic>false</dynamic> |
|
<name>Batch Size</name> |
|
<required>true</required> |
|
<sensitive>false</sensitive> |
|
<supportsEl>false</supportsEl> |
|
</value> |
|
</entry> |
|
<entry> |
|
<key>Data Format</key> |
|
<value> |
|
<allowableValues> |
|
<displayName>Binary</displayName> |
|
<value>Binary</value> |
|
</allowableValues> |
|
<allowableValues> |
|
<displayName>Text</displayName> |
|
<value>Text</value> |
|
</allowableValues> |
|
<defaultValue>Binary</defaultValue> |
|
<description>Specifies whether the data should be Text or Binary</description> |
|
<displayName>Data Format</displayName> |
|
<dynamic>false</dynamic> |
|
<name>Data Format</name> |
|
<required>true</required> |
|
<sensitive>false</sensitive> |
|
<supportsEl>false</supportsEl> |
|
</value> |
|
</entry> |
|
<entry> |
|
<key>Unique FlowFiles</key> |
|
<value> |
|
<allowableValues> |
|
<displayName>true</displayName> |
|
<value>true</value> |
|
</allowableValues> |
|
<allowableValues> |
|
<displayName>false</displayName> |
|
<value>false</value> |
|
</allowableValues> |
|
<defaultValue>false</defaultValue> |
|
<description>If true, each FlowFile that is generated will be unique. If false, a random |
|
value will be generated and all FlowFiles will get the same content but this offers much |
|
higher throughput |
|
</description> |
|
<displayName>Unique FlowFiles</displayName> |
|
<dynamic>false</dynamic> |
|
<name>Unique FlowFiles</name> |
|
<required>true</required> |
|
<sensitive>false</sensitive> |
|
<supportsEl>false</supportsEl> |
|
</value> |
|
</entry> |
|
</descriptors> |
|
<lossTolerant>false</lossTolerant> |
|
<penaltyDuration>30 sec</penaltyDuration> |
|
<properties> |
|
<entry> |
|
<key>File Size</key> |
|
<value>10b</value> |
|
</entry> |
|
<entry> |
|
<key>Batch Size</key> |
|
<value>1</value> |
|
</entry> |
|
<entry> |
|
<key>Data Format</key> |
|
<value>Text</value> |
|
</entry> |
|
<entry> |
|
<key>Unique FlowFiles</key> |
|
<value>false</value> |
|
</entry> |
|
</properties> |
|
<runDurationMillis>0</runDurationMillis> |
|
<schedulingPeriod>10 sec</schedulingPeriod> |
|
<schedulingStrategy>TIMER_DRIVEN</schedulingStrategy> |
|
<yieldDuration>1 sec</yieldDuration> |
|
</config> |
|
<name>GenerateFlowFile</name> |
|
<relationships> |
|
<autoTerminate>false</autoTerminate> |
|
<description></description> |
|
<name>success</name> |
|
</relationships> |
|
<state>STOPPED</state> |
|
<style/> |
|
<supportsEventDriven>false</supportsEventDriven> |
|
<supportsParallelProcessing>true</supportsParallelProcessing> |
|
<type>org.apache.nifi.processors.standard.GenerateFlowFile</type> |
|
</processors> |
|
<processors> |
|
<id>80df14eb-d6f3-4edd-9ea5-6fc993ade36d</id> |
|
<parentGroupId>3bdd324d-db87-4a21-8149-f88d7a46741e</parentGroupId> |
|
<position> |
|
<x>713.0535438255441</x> |
|
<y>397.78634606092874</y> |
|
</position> |
|
<config> |
|
<bulletinLevel>WARN</bulletinLevel> |
|
<comments></comments> |
|
<concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount> |
|
<defaultConcurrentTasks> |
|
<entry> |
|
<key>TIMER_DRIVEN</key> |
|
<value>1</value> |
|
</entry> |
|
<entry> |
|
<key>EVENT_DRIVEN</key> |
|
<value>0</value> |
|
</entry> |
|
<entry> |
|
<key>CRON_DRIVEN</key> |
|
<value>1</value> |
|
</entry> |
|
</defaultConcurrentTasks> |
|
<defaultSchedulingPeriod> |
|
<entry> |
|
<key>TIMER_DRIVEN</key> |
|
<value>0 sec</value> |
|
</entry> |
|
<entry> |
|
<key>CRON_DRIVEN</key> |
|
<value>* * * * * ?</value> |
|
</entry> |
|
</defaultSchedulingPeriod> |
|
<descriptors> |
|
<entry> |
|
<key>Hadoop Configuration Resources</key> |
|
<value> |
|
<description>A file or comma separated list of files which contains the Hadoop file system |
|
configuration. Without this, Hadoop will search the classpath for a 'core-site.xml' and |
|
'hdfs-site.xml' file or will revert to a default configuration. |
|
</description> |
|
<displayName>Hadoop Configuration Resources</displayName> |
|
<dynamic>false</dynamic> |
|
<name>Hadoop Configuration Resources</name> |
|
<required>false</required> |
|
<sensitive>false</sensitive> |
|
<supportsEl>false</supportsEl> |
|
</value> |
|
</entry> |
|
<entry> |
|
<key>Kerberos Principal</key> |
|
<value> |
|
<description>Kerberos principal to authenticate as. Requires nifi.kerberos.krb5.file to be |
|
set in your nifi.properties |
|
</description> |
|
<displayName>Kerberos Principal</displayName> |
|
<dynamic>false</dynamic> |
|
<name>Kerberos Principal</name> |
|
<required>false</required> |
|
<sensitive>false</sensitive> |
|
<supportsEl>false</supportsEl> |
|
</value> |
|
</entry> |
|
<entry> |
|
<key>Kerberos Keytab</key> |
|
<value> |
|
<description>Kerberos keytab associated with the principal. Requires nifi.kerberos.krb5.file |
|
to be set in your nifi.properties |
|
</description> |
|
<displayName>Kerberos Keytab</displayName> |
|
<dynamic>false</dynamic> |
|
<name>Kerberos Keytab</name> |
|
<required>false</required> |
|
<sensitive>false</sensitive> |
|
<supportsEl>false</supportsEl> |
|
</value> |
|
</entry> |
|
<entry> |
|
<key>Kerberos Relogin Period</key> |
|
<value> |
|
<defaultValue>4 hours</defaultValue> |
|
<description>Period of time which should pass before attempting a kerberos relogin |
|
</description> |
|
<displayName>Kerberos Relogin Period</displayName> |
|
<dynamic>false</dynamic> |
|
<name>Kerberos Relogin Period</name> |
|
<required>false</required> |
|
<sensitive>false</sensitive> |
|
<supportsEl>false</supportsEl> |
|
</value> |
|
</entry> |
|
<entry> |
|
<key>Directory</key> |
|
<value> |
|
<description>The parent HDFS directory to which files should be written</description> |
|
<displayName>Directory</displayName> |
|
<dynamic>false</dynamic> |
|
<name>Directory</name> |
|
<required>true</required> |
|
<sensitive>false</sensitive> |
|
<supportsEl>true</supportsEl> |
|
</value> |
|
</entry> |
|
<entry> |
|
<key>Conflict Resolution Strategy</key> |
|
<value> |
|
<allowableValues> |
|
<displayName>replace</displayName> |
|
<value>replace</value> |
|
</allowableValues> |
|
<allowableValues> |
|
<displayName>ignore</displayName> |
|
<value>ignore</value> |
|
</allowableValues> |
|
<allowableValues> |
|
<displayName>fail</displayName> |
|
<value>fail</value> |
|
</allowableValues> |
|
<defaultValue>fail</defaultValue> |
|
<description>Indicates what should happen when a file with the same name already exists in |
|
the output directory |
|
</description> |
|
<displayName>Conflict Resolution Strategy</displayName> |
|
<dynamic>false</dynamic> |
|
<name>Conflict Resolution Strategy</name> |
|
<required>true</required> |
|
<sensitive>false</sensitive> |
|
<supportsEl>false</supportsEl> |
|
</value> |
|
</entry> |
|
<entry> |
|
<key>Block Size</key> |
|
<value> |
|
<description>Size of each block as written to HDFS. This overrides the Hadoop |
|
Configuration |
|
</description> |
|
<displayName>Block Size</displayName> |
|
<dynamic>false</dynamic> |
|
<name>Block Size</name> |
|
<required>false</required> |
|
<sensitive>false</sensitive> |
|
<supportsEl>false</supportsEl> |
|
</value> |
|
</entry> |
|
<entry> |
|
<key>IO Buffer Size</key> |
|
<value> |
|
<description>Amount of memory to use to buffer file contents during IO. This overrides the |
|
Hadoop Configuration |
|
</description> |
|
<displayName>IO Buffer Size</displayName> |
|
<dynamic>false</dynamic> |
|
<name>IO Buffer Size</name> |
|
<required>false</required> |
|
<sensitive>false</sensitive> |
|
<supportsEl>false</supportsEl> |
|
</value> |
|
</entry> |
|
<entry> |
|
<key>Replication</key> |
|
<value> |
|
<description>Number of times that HDFS will replicate each file. This overrides the Hadoop |
|
Configuration |
|
</description> |
|
<displayName>Replication</displayName> |
|
<dynamic>false</dynamic> |
|
<name>Replication</name> |
|
<required>false</required> |
|
<sensitive>false</sensitive> |
|
<supportsEl>false</supportsEl> |
|
</value> |
|
</entry> |
|
<entry> |
|
<key>Permissions umask</key> |
|
<value> |
|
<description>A umask represented as an octal number which determines the permissions of |
|
files written to HDFS. This overrides the Hadoop Configuration dfs.umaskmode |
|
</description> |
|
<displayName>Permissions umask</displayName> |
|
<dynamic>false</dynamic> |
|
<name>Permissions umask</name> |
|
<required>false</required> |
|
<sensitive>false</sensitive> |
|
<supportsEl>false</supportsEl> |
|
</value> |
|
</entry> |
|
<entry> |
|
<key>Remote Owner</key> |
|
<value> |
|
<description>Changes the owner of the HDFS file to this value after it is written. This only |
|
works if NiFi is running as a user that has HDFS super user privilege to change owner |
|
</description> |
|
<displayName>Remote Owner</displayName> |
|
<dynamic>false</dynamic> |
|
<name>Remote Owner</name> |
|
<required>false</required> |
|
<sensitive>false</sensitive> |
|
<supportsEl>false</supportsEl> |
|
</value> |
|
</entry> |
|
<entry> |
|
<key>Remote Group</key> |
|
<value> |
|
<description>Changes the group of the HDFS file to this value after it is written. This only |
|
works if NiFi is running as a user that has HDFS super user privilege to change group |
|
</description> |
|
<displayName>Remote Group</displayName> |
|
<dynamic>false</dynamic> |
|
<name>Remote Group</name> |
|
<required>false</required> |
|
<sensitive>false</sensitive> |
|
<supportsEl>false</supportsEl> |
|
</value> |
|
</entry> |
|
<entry> |
|
<key>Compression codec</key> |
|
<value> |
|
<allowableValues> |
|
<displayName>NONE</displayName> |
|
<value>NONE</value> |
|
</allowableValues> |
|
<allowableValues> |
|
<displayName>DEFAULT</displayName> |
|
<value>DEFAULT</value> |
|
</allowableValues> |
|
<allowableValues> |
|
<displayName>BZIP</displayName> |
|
<value>BZIP</value> |
|
</allowableValues> |
|
<allowableValues> |
|
<displayName>GZIP</displayName> |
|
<value>GZIP</value> |
|
</allowableValues> |
|
<allowableValues> |
|
<displayName>LZ4</displayName> |
|
<value>LZ4</value> |
|
</allowableValues> |
|
<allowableValues> |
|
<displayName>SNAPPY</displayName> |
|
<value>SNAPPY</value> |
|
</allowableValues> |
|
<allowableValues> |
|
<displayName>AUTOMATIC</displayName> |
|
<value>AUTOMATIC</value> |
|
</allowableValues> |
|
<defaultValue>NONE</defaultValue> |
|
<description></description> |
|
<displayName>Compression codec</displayName> |
|
<dynamic>false</dynamic> |
|
<name>Compression codec</name> |
|
<required>true</required> |
|
<sensitive>false</sensitive> |
|
<supportsEl>false</supportsEl> |
|
</value> |
|
</entry> |
|
</descriptors> |
|
<lossTolerant>false</lossTolerant> |
|
<penaltyDuration>30 sec</penaltyDuration> |
|
<properties> |
|
<entry> |
|
<key>Hadoop Configuration Resources</key> |
|
<value>/etc/hadoop/conf/core-site.xml,/etc/hadoop/conf/hdfs-site.xml</value> |
|
</entry> |
|
<entry> |
|
<key>Kerberos Principal</key> |
|
<value>hive/sandbox.hortonworks.com@EXAMPLE.COM</value> |
|
</entry> |
|
<entry> |
|
<key>Kerberos Keytab</key> |
|
<value>/etc/security/keytabs/hive.service.keytab</value> |
|
</entry> |
|
<entry> |
|
<key>Kerberos Relogin Period</key> |
|
<value>4 hours</value> |
|
</entry> |
|
<entry> |
|
<key>Directory</key> |
|
<value>/tmp/nifi</value> |
|
</entry> |
|
<entry> |
|
<key>Conflict Resolution Strategy</key> |
|
<value>fail</value> |
|
</entry> |
|
<entry> |
|
<key>Block Size</key> |
|
</entry> |
|
<entry> |
|
<key>IO Buffer Size</key> |
|
</entry> |
|
<entry> |
|
<key>Replication</key> |
|
</entry> |
|
<entry> |
|
<key>Permissions umask</key> |
|
</entry> |
|
<entry> |
|
<key>Remote Owner</key> |
|
</entry> |
|
<entry> |
|
<key>Remote Group</key> |
|
</entry> |
|
<entry> |
|
<key>Compression codec</key> |
|
<value>NONE</value> |
|
</entry> |
|
</properties> |
|
<runDurationMillis>0</runDurationMillis> |
|
<schedulingPeriod>0 sec</schedulingPeriod> |
|
<schedulingStrategy>TIMER_DRIVEN</schedulingStrategy> |
|
<yieldDuration>1 sec</yieldDuration> |
|
</config> |
|
<name>PutHDFS</name> |
|
<relationships> |
|
<autoTerminate>true</autoTerminate> |
|
<description>Files that could not be written to HDFS for some reason are transferred to this |
|
relationship |
|
</description> |
|
<name>failure</name> |
|
</relationships> |
|
<relationships> |
|
<autoTerminate>false</autoTerminate> |
|
<description>Files that have been successfully written to HDFS are transferred to this relationship |
|
</description> |
|
<name>success</name> |
|
</relationships> |
|
<state>STOPPED</state> |
|
<style/> |
|
<supportsEventDriven>false</supportsEventDriven> |
|
<supportsParallelProcessing>true</supportsParallelProcessing> |
|
<type>org.apache.nifi.processors.hadoop.PutHDFS</type> |
|
</processors> |
|
<processors> |
|
<id>dbbe5d57-471e-466c-a63e-3c8c018172b6</id> |
|
<parentGroupId>3bdd324d-db87-4a21-8149-f88d7a46741e</parentGroupId> |
|
<position> |
|
<x>711.80810546875</x> |
|
<y>610.7080993652344</y> |
|
</position> |
|
<config> |
|
<bulletinLevel>WARN</bulletinLevel> |
|
<comments></comments> |
|
<concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount> |
|
<defaultConcurrentTasks> |
|
<entry> |
|
<key>TIMER_DRIVEN</key> |
|
<value>1</value> |
|
</entry> |
|
<entry> |
|
<key>EVENT_DRIVEN</key> |
|
<value>0</value> |
|
</entry> |
|
<entry> |
|
<key>CRON_DRIVEN</key> |
|
<value>1</value> |
|
</entry> |
|
</defaultConcurrentTasks> |
|
<defaultSchedulingPeriod> |
|
<entry> |
|
<key>TIMER_DRIVEN</key> |
|
<value>0 sec</value> |
|
</entry> |
|
<entry> |
|
<key>CRON_DRIVEN</key> |
|
<value>* * * * * ?</value> |
|
</entry> |
|
</defaultSchedulingPeriod> |
|
<descriptors> |
|
<entry> |
|
<key>Log Level</key> |
|
<value> |
|
<allowableValues> |
|
<displayName>trace</displayName> |
|
<value>trace</value> |
|
</allowableValues> |
|
<allowableValues> |
|
<displayName>debug</displayName> |
|
<value>debug</value> |
|
</allowableValues> |
|
<allowableValues> |
|
<displayName>info</displayName> |
|
<value>info</value> |
|
</allowableValues> |
|
<allowableValues> |
|
<displayName>warn</displayName> |
|
<value>warn</value> |
|
</allowableValues> |
|
<allowableValues> |
|
<displayName>error</displayName> |
|
<value>error</value> |
|
</allowableValues> |
|
<defaultValue>info</defaultValue> |
|
<description>The Log Level to use when logging the Attributes</description> |
|
<displayName>Log Level</displayName> |
|
<dynamic>false</dynamic> |
|
<name>Log Level</name> |
|
<required>true</required> |
|
<sensitive>false</sensitive> |
|
<supportsEl>false</supportsEl> |
|
</value> |
|
</entry> |
|
<entry> |
|
<key>Log Payload</key> |
|
<value> |
|
<allowableValues> |
|
<displayName>true</displayName> |
|
<value>true</value> |
|
</allowableValues> |
|
<allowableValues> |
|
<displayName>false</displayName> |
|
<value>false</value> |
|
</allowableValues> |
|
<defaultValue>false</defaultValue> |
|
<description>If true, the FlowFile's payload will be logged, in addition to its attributes; |
|
otherwise, just the Attributes will be logged. |
|
</description> |
|
<displayName>Log Payload</displayName> |
|
<dynamic>false</dynamic> |
|
<name>Log Payload</name> |
|
<required>true</required> |
|
<sensitive>false</sensitive> |
|
<supportsEl>false</supportsEl> |
|
</value> |
|
</entry> |
|
<entry> |
|
<key>Attributes to Log</key> |
|
<value> |
|
<description>A comma-separated list of Attributes to Log. If not specified, all attributes |
|
will be logged. |
|
</description> |
|
<displayName>Attributes to Log</displayName> |
|
<dynamic>false</dynamic> |
|
<name>Attributes to Log</name> |
|
<required>false</required> |
|
<sensitive>false</sensitive> |
|
<supportsEl>false</supportsEl> |
|
</value> |
|
</entry> |
|
<entry> |
|
<key>Attributes to Ignore</key> |
|
<value> |
|
<description>A comma-separated list of Attributes to ignore. If not specified, no attributes |
|
will be ignored. |
|
</description> |
|
<displayName>Attributes to Ignore</displayName> |
|
<dynamic>false</dynamic> |
|
<name>Attributes to Ignore</name> |
|
<required>false</required> |
|
<sensitive>false</sensitive> |
|
<supportsEl>false</supportsEl> |
|
</value> |
|
</entry> |
|
<entry> |
|
<key>Log prefix</key> |
|
<value> |
|
<description>Log prefix appended to the log lines. It helps to distinguish the output of |
|
multiple LogAttribute processors. |
|
</description> |
|
<displayName>Log prefix</displayName> |
|
<dynamic>false</dynamic> |
|
<name>Log prefix</name> |
|
<required>false</required> |
|
<sensitive>false</sensitive> |
|
<supportsEl>true</supportsEl> |
|
</value> |
|
</entry> |
|
</descriptors> |
|
<lossTolerant>false</lossTolerant> |
|
<penaltyDuration>30 sec</penaltyDuration> |
|
<properties> |
|
<entry> |
|
<key>Log Level</key> |
|
<value>info</value> |
|
</entry> |
|
<entry> |
|
<key>Log Payload</key> |
|
<value>true</value> |
|
</entry> |
|
<entry> |
|
<key>Attributes to Log</key> |
|
</entry> |
|
<entry> |
|
<key>Attributes to Ignore</key> |
|
</entry> |
|
<entry> |
|
<key>Log prefix</key> |
|
</entry> |
|
</properties> |
|
<runDurationMillis>0</runDurationMillis> |
|
<schedulingPeriod>0 sec</schedulingPeriod> |
|
<schedulingStrategy>TIMER_DRIVEN</schedulingStrategy> |
|
<yieldDuration>1 sec</yieldDuration> |
|
</config> |
|
<name>LogAttribute</name> |
|
<relationships> |
|
<autoTerminate>true</autoTerminate> |
|
<description>All FlowFiles are routed to this relationship</description> |
|
<name>success</name> |
|
</relationships> |
|
<state>STOPPED</state> |
|
<style/> |
|
<supportsEventDriven>true</supportsEventDriven> |
|
<supportsParallelProcessing>true</supportsParallelProcessing> |
|
<type>org.apache.nifi.processors.standard.LogAttribute</type> |
|
</processors> |
|
</snippet> |
|
<timestamp>07/05/2016 11:53:25 UTC</timestamp> |
|
</template> |