Skip to content

Instantly share code, notes, and snippets.

@alopresto
Created February 17, 2017 06:15
Show Gist options
  • Save alopresto/09b30565c9c2632658bc2423a6b3a752 to your computer and use it in GitHub Desktop.
Save alopresto/09b30565c9c2632658bc2423a6b3a752 to your computer and use it in GitHub Desktop.
Generates a flowfile with example CSV multiline text with newlines inside quoted strings. Replaces the newlines inside quotes with an escaped character, then splits each line. Includes LogAttributes processors to debug.
<?xml version="1.0" ?>
<template encoding-version="1.0">
<description>Generates a flowfile with example CSV multiline text with newlines inside quoted strings. Replaces the newlines inside quotes with an escaped character, then splits each line. Includes LogAttributes processors to debug. </description>
<groupId>4aa66ece-015a-1000-4348-efd94f63c4df</groupId>
<name>ReplaceText and SplitText</name>
<snippet>
<connections>
<id>015a1001-919e-1aa8-0000-000000000000</id>
<parentGroupId>4aa66ece-015a-1000-0000-000000000000</parentGroupId>
<backPressureDataSizeThreshold>1 GB</backPressureDataSizeThreshold>
<backPressureObjectThreshold>10000</backPressureObjectThreshold>
<destination>
<groupId>4aa66ece-015a-1000-0000-000000000000</groupId>
<id>015a1000-919e-1aa8-0000-000000000000</id>
<type>PROCESSOR</type>
</destination>
<flowFileExpiration>0 sec</flowFileExpiration>
<labelIndex>1</labelIndex>
<name></name>
<selectedRelationships>success</selectedRelationships>
<source>
<groupId>4aa66ece-015a-1000-0000-000000000000</groupId>
<id>4aa731e3-015a-1000-0000-000000000000</id>
<type>PROCESSOR</type>
</source>
<zIndex>0</zIndex>
</connections>
<connections>
<id>015a1002-919e-1aa8-0000-000000000000</id>
<parentGroupId>4aa66ece-015a-1000-0000-000000000000</parentGroupId>
<backPressureDataSizeThreshold>1 GB</backPressureDataSizeThreshold>
<backPressureObjectThreshold>10000</backPressureObjectThreshold>
<destination>
<groupId>4aa66ece-015a-1000-0000-000000000000</groupId>
<id>4aa7e7dc-015a-1000-0000-000000000000</id>
<type>PROCESSOR</type>
</destination>
<flowFileExpiration>0 sec</flowFileExpiration>
<labelIndex>1</labelIndex>
<name></name>
<selectedRelationships>success</selectedRelationships>
<source>
<groupId>4aa66ece-015a-1000-0000-000000000000</groupId>
<id>015a1000-919e-1aa8-0000-000000000000</id>
<type>PROCESSOR</type>
</source>
<zIndex>0</zIndex>
</connections>
<connections>
<id>4aa7ca80-015a-1000-0000-000000000000</id>
<parentGroupId>4aa66ece-015a-1000-0000-000000000000</parentGroupId>
<backPressureDataSizeThreshold>1 GB</backPressureDataSizeThreshold>
<backPressureObjectThreshold>10000</backPressureObjectThreshold>
<destination>
<groupId>4aa66ece-015a-1000-0000-000000000000</groupId>
<id>4aa731e3-015a-1000-0000-000000000000</id>
<type>PROCESSOR</type>
</destination>
<flowFileExpiration>0 sec</flowFileExpiration>
<labelIndex>1</labelIndex>
<name></name>
<selectedRelationships>success</selectedRelationships>
<source>
<groupId>4aa66ece-015a-1000-0000-000000000000</groupId>
<id>4aa69dbb-015a-1000-0000-000000000000</id>
<type>PROCESSOR</type>
</source>
<zIndex>0</zIndex>
</connections>
<connections>
<id>4aa8e1bb-015a-1000-0000-000000000000</id>
<parentGroupId>4aa66ece-015a-1000-0000-000000000000</parentGroupId>
<backPressureDataSizeThreshold>1 GB</backPressureDataSizeThreshold>
<backPressureObjectThreshold>10000</backPressureObjectThreshold>
<destination>
<groupId>4aa66ece-015a-1000-0000-000000000000</groupId>
<id>4aa8919e-015a-1000-0000-000000000000</id>
<type>PROCESSOR</type>
</destination>
<flowFileExpiration>0 sec</flowFileExpiration>
<labelIndex>1</labelIndex>
<name></name>
<selectedRelationships>splits</selectedRelationships>
<source>
<groupId>4aa66ece-015a-1000-0000-000000000000</groupId>
<id>4aa7e7dc-015a-1000-0000-000000000000</id>
<type>PROCESSOR</type>
</source>
<zIndex>0</zIndex>
</connections>
<processors>
<id>015a1000-919e-1aa8-0000-000000000000</id>
<parentGroupId>4aa66ece-015a-1000-0000-000000000000</parentGroupId>
<position>
<x>7.0</x>
<y>410.0</y>
</position>
<config>
<bulletinLevel>WARN</bulletinLevel>
<comments></comments>
<concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount>
<descriptors>
<entry>
<key>Log Level</key>
<value>
<name>Log Level</name>
</value>
</entry>
<entry>
<key>Log Payload</key>
<value>
<name>Log Payload</name>
</value>
</entry>
<entry>
<key>Attributes to Log</key>
<value>
<name>Attributes to Log</name>
</value>
</entry>
<entry>
<key>Attributes to Ignore</key>
<value>
<name>Attributes to Ignore</name>
</value>
</entry>
<entry>
<key>Log prefix</key>
<value>
<name>Log prefix</name>
</value>
</entry>
</descriptors>
<executionNode>ALL</executionNode>
<lossTolerant>false</lossTolerant>
<penaltyDuration>30 sec</penaltyDuration>
<properties>
<entry>
<key>Log Level</key>
<value>info</value>
</entry>
<entry>
<key>Log Payload</key>
<value>true</value>
</entry>
<entry>
<key>Attributes to Log</key>
</entry>
<entry>
<key>Attributes to Ignore</key>
</entry>
<entry>
<key>Log prefix</key>
</entry>
</properties>
<runDurationMillis>0</runDurationMillis>
<schedulingPeriod>0 sec</schedulingPeriod>
<schedulingStrategy>TIMER_DRIVEN</schedulingStrategy>
<yieldDuration>1 sec</yieldDuration>
</config>
<name>LogAttribute</name>
<relationships>
<autoTerminate>false</autoTerminate>
<name>success</name>
</relationships>
<style></style>
<type>org.apache.nifi.processors.standard.LogAttribute</type>
</processors>
<processors>
<id>4aa69dbb-015a-1000-0000-000000000000</id>
<parentGroupId>4aa66ece-015a-1000-0000-000000000000</parentGroupId>
<position>
<x>0.0</x>
<y>0.0</y>
</position>
<config>
<bulletinLevel>WARN</bulletinLevel>
<comments></comments>
<concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount>
<descriptors>
<entry>
<key>File Size</key>
<value>
<name>File Size</name>
</value>
</entry>
<entry>
<key>Batch Size</key>
<value>
<name>Batch Size</name>
</value>
</entry>
<entry>
<key>Data Format</key>
<value>
<name>Data Format</name>
</value>
</entry>
<entry>
<key>Unique FlowFiles</key>
<value>
<name>Unique FlowFiles</name>
</value>
</entry>
<entry>
<key>generate-ff-custom-text</key>
<value>
<name>generate-ff-custom-text</name>
</value>
</entry>
</descriptors>
<executionNode>ALL</executionNode>
<lossTolerant>false</lossTolerant>
<penaltyDuration>30 sec</penaltyDuration>
<properties>
<entry>
<key>File Size</key>
<value>0B</value>
</entry>
<entry>
<key>Batch Size</key>
<value>1</value>
</entry>
<entry>
<key>Data Format</key>
<value>Text</value>
</entry>
<entry>
<key>Unique FlowFiles</key>
<value>false</value>
</entry>
<entry>
<key>generate-ff-custom-text</key>
<value>No,NAme,ID,Description
1,Stack,232,"ABCDEFGHIJKLMNO
-- Jiuaslkm asdasdasd"
2,Queue,454,"PQRSTUVWXYZ
-- Other words here"</value>
</entry>
</properties>
<runDurationMillis>0</runDurationMillis>
<schedulingPeriod>2 sec</schedulingPeriod>
<schedulingStrategy>TIMER_DRIVEN</schedulingStrategy>
<yieldDuration>1 sec</yieldDuration>
</config>
<name>GenerateFlowFile</name>
<relationships>
<autoTerminate>false</autoTerminate>
<name>success</name>
</relationships>
<style></style>
<type>org.apache.nifi.processors.standard.GenerateFlowFile</type>
</processors>
<processors>
<id>4aa731e3-015a-1000-0000-000000000000</id>
<parentGroupId>4aa66ece-015a-1000-0000-000000000000</parentGroupId>
<position>
<x>1.0</x>
<y>228.0</y>
</position>
<config>
<bulletinLevel>WARN</bulletinLevel>
<comments></comments>
<concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount>
<descriptors>
<entry>
<key>Regular Expression</key>
<value>
<name>Regular Expression</name>
</value>
</entry>
<entry>
<key>Replacement Value</key>
<value>
<name>Replacement Value</name>
</value>
</entry>
<entry>
<key>Character Set</key>
<value>
<name>Character Set</name>
</value>
</entry>
<entry>
<key>Maximum Buffer Size</key>
<value>
<name>Maximum Buffer Size</name>
</value>
</entry>
<entry>
<key>Replacement Strategy</key>
<value>
<name>Replacement Strategy</name>
</value>
</entry>
<entry>
<key>Evaluation Mode</key>
<value>
<name>Evaluation Mode</name>
</value>
</entry>
</descriptors>
<executionNode>ALL</executionNode>
<lossTolerant>false</lossTolerant>
<penaltyDuration>30 sec</penaltyDuration>
<properties>
<entry>
<key>Regular Expression</key>
<value>"(.*?)(\n)(.*?)"</value>
</entry>
<entry>
<key>Replacement Value</key>
<value>"$1\\n$3"</value>
</entry>
<entry>
<key>Character Set</key>
<value>UTF-8</value>
</entry>
<entry>
<key>Maximum Buffer Size</key>
<value>1 MB</value>
</entry>
<entry>
<key>Replacement Strategy</key>
<value>Regex Replace</value>
</entry>
<entry>
<key>Evaluation Mode</key>
<value>Entire text</value>
</entry>
</properties>
<runDurationMillis>0</runDurationMillis>
<schedulingPeriod>0 sec</schedulingPeriod>
<schedulingStrategy>TIMER_DRIVEN</schedulingStrategy>
<yieldDuration>1 sec</yieldDuration>
</config>
<name>ReplaceText</name>
<relationships>
<autoTerminate>true</autoTerminate>
<name>failure</name>
</relationships>
<relationships>
<autoTerminate>false</autoTerminate>
<name>success</name>
</relationships>
<style></style>
<type>org.apache.nifi.processors.standard.ReplaceText</type>
</processors>
<processors>
<id>4aa7e7dc-015a-1000-0000-000000000000</id>
<parentGroupId>4aa66ece-015a-1000-0000-000000000000</parentGroupId>
<position>
<x>485.0</x>
<y>408.0</y>
</position>
<config>
<bulletinLevel>WARN</bulletinLevel>
<comments></comments>
<concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount>
<descriptors>
<entry>
<key>Line Split Count</key>
<value>
<name>Line Split Count</name>
</value>
</entry>
<entry>
<key>Maximum Fragment Size</key>
<value>
<name>Maximum Fragment Size</name>
</value>
</entry>
<entry>
<key>Header Line Count</key>
<value>
<name>Header Line Count</name>
</value>
</entry>
<entry>
<key>Header Line Marker Characters</key>
<value>
<name>Header Line Marker Characters</name>
</value>
</entry>
<entry>
<key>Remove Trailing Newlines</key>
<value>
<name>Remove Trailing Newlines</name>
</value>
</entry>
</descriptors>
<executionNode>ALL</executionNode>
<lossTolerant>false</lossTolerant>
<penaltyDuration>30 sec</penaltyDuration>
<properties>
<entry>
<key>Line Split Count</key>
<value>1</value>
</entry>
<entry>
<key>Maximum Fragment Size</key>
</entry>
<entry>
<key>Header Line Count</key>
<value>1</value>
</entry>
<entry>
<key>Header Line Marker Characters</key>
</entry>
<entry>
<key>Remove Trailing Newlines</key>
<value>true</value>
</entry>
</properties>
<runDurationMillis>0</runDurationMillis>
<schedulingPeriod>0 sec</schedulingPeriod>
<schedulingStrategy>TIMER_DRIVEN</schedulingStrategy>
<yieldDuration>1 sec</yieldDuration>
</config>
<name>SplitText</name>
<relationships>
<autoTerminate>true</autoTerminate>
<name>failure</name>
</relationships>
<relationships>
<autoTerminate>true</autoTerminate>
<name>original</name>
</relationships>
<relationships>
<autoTerminate>false</autoTerminate>
<name>splits</name>
</relationships>
<style></style>
<type>org.apache.nifi.processors.standard.SplitText</type>
</processors>
<processors>
<id>4aa8919e-015a-1000-0000-000000000000</id>
<parentGroupId>4aa66ece-015a-1000-0000-000000000000</parentGroupId>
<position>
<x>3.0</x>
<y>669.0</y>
</position>
<config>
<bulletinLevel>WARN</bulletinLevel>
<comments></comments>
<concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount>
<descriptors>
<entry>
<key>Log Level</key>
<value>
<name>Log Level</name>
</value>
</entry>
<entry>
<key>Log Payload</key>
<value>
<name>Log Payload</name>
</value>
</entry>
<entry>
<key>Attributes to Log</key>
<value>
<name>Attributes to Log</name>
</value>
</entry>
<entry>
<key>Attributes to Ignore</key>
<value>
<name>Attributes to Ignore</name>
</value>
</entry>
<entry>
<key>Log prefix</key>
<value>
<name>Log prefix</name>
</value>
</entry>
</descriptors>
<executionNode>ALL</executionNode>
<lossTolerant>false</lossTolerant>
<penaltyDuration>30 sec</penaltyDuration>
<properties>
<entry>
<key>Log Level</key>
<value>info</value>
</entry>
<entry>
<key>Log Payload</key>
<value>true</value>
</entry>
<entry>
<key>Attributes to Log</key>
</entry>
<entry>
<key>Attributes to Ignore</key>
</entry>
<entry>
<key>Log prefix</key>
</entry>
</properties>
<runDurationMillis>0</runDurationMillis>
<schedulingPeriod>0 sec</schedulingPeriod>
<schedulingStrategy>TIMER_DRIVEN</schedulingStrategy>
<yieldDuration>1 sec</yieldDuration>
</config>
<name>LogAttribute</name>
<relationships>
<autoTerminate>true</autoTerminate>
<name>success</name>
</relationships>
<style></style>
<type>org.apache.nifi.processors.standard.LogAttribute</type>
</processors>
</snippet>
<timestamp>02/16/2017 22:15:07 PST</timestamp>
</template>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment