Skip to content

Instantly share code, notes, and snippets.

@ijokarumawak
Created November 29, 2018 01:27
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save ijokarumawak/eeaf519a7ceea476fa452f7aa2ee5671 to your computer and use it in GitHub Desktop.
Save ijokarumawak/eeaf519a7ceea476fa452f7aa2ee5671 to your computer and use it in GitHub Desktop.
A NiFi example template to illustrate how to merge multiple XML files.
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<template encoding-version="1.2">
<description></description>
<groupId>39379f66-0167-1000-9951-3cf7c004e310</groupId>
<name>Merge XML Records</name>
<snippet>
<controllerServices>
<id>36c4d83a-ff47-38e2-0000-000000000000</id>
<parentGroupId>376efa9a-48fc-3e3d-0000-000000000000</parentGroupId>
<bundle>
<artifact>nifi-record-serialization-services-nar</artifact>
<group>org.apache.nifi</group>
<version>1.9.0-SNAPSHOT</version>
</bundle>
<comments></comments>
<descriptors>
<entry>
<key>schema-access-strategy</key>
<value>
<name>schema-access-strategy</name>
</value>
</entry>
<entry>
<key>schema-registry</key>
<value>
<identifiesControllerService>org.apache.nifi.schemaregistry.services.SchemaRegistry</identifiesControllerService>
<name>schema-registry</name>
</value>
</entry>
<entry>
<key>schema-name</key>
<value>
<name>schema-name</name>
</value>
</entry>
<entry>
<key>schema-version</key>
<value>
<name>schema-version</name>
</value>
</entry>
<entry>
<key>schema-branch</key>
<value>
<name>schema-branch</name>
</value>
</entry>
<entry>
<key>schema-text</key>
<value>
<name>schema-text</name>
</value>
</entry>
<entry>
<key>record_format</key>
<value>
<name>record_format</name>
</value>
</entry>
<entry>
<key>attribute_prefix</key>
<value>
<name>attribute_prefix</name>
</value>
</entry>
<entry>
<key>content_field_name</key>
<value>
<name>content_field_name</name>
</value>
</entry>
<entry>
<key>Date Format</key>
<value>
<name>Date Format</name>
</value>
</entry>
<entry>
<key>Time Format</key>
<value>
<name>Time Format</name>
</value>
</entry>
<entry>
<key>Timestamp Format</key>
<value>
<name>Timestamp Format</name>
</value>
</entry>
</descriptors>
<name>XMLReader</name>
<persistsState>false</persistsState>
<properties>
<entry>
<key>schema-access-strategy</key>
<value>schema-text-property</value>
</entry>
<entry>
<key>schema-registry</key>
</entry>
<entry>
<key>schema-name</key>
</entry>
<entry>
<key>schema-version</key>
</entry>
<entry>
<key>schema-branch</key>
</entry>
<entry>
<key>schema-text</key>
</entry>
<entry>
<key>record_format</key>
</entry>
<entry>
<key>attribute_prefix</key>
</entry>
<entry>
<key>content_field_name</key>
</entry>
<entry>
<key>Date Format</key>
</entry>
<entry>
<key>Time Format</key>
</entry>
<entry>
<key>Timestamp Format</key>
</entry>
</properties>
<state>ENABLED</state>
<type>org.apache.nifi.xml.XMLReader</type>
</controllerServices>
<controllerServices>
<id>c3413461-b440-3fa3-0000-000000000000</id>
<parentGroupId>376efa9a-48fc-3e3d-0000-000000000000</parentGroupId>
<bundle>
<artifact>nifi-record-serialization-services-nar</artifact>
<group>org.apache.nifi</group>
<version>1.9.0-SNAPSHOT</version>
</bundle>
<comments></comments>
<descriptors>
<entry>
<key>Schema Write Strategy</key>
<value>
<name>Schema Write Strategy</name>
</value>
</entry>
<entry>
<key>schema-access-strategy</key>
<value>
<name>schema-access-strategy</name>
</value>
</entry>
<entry>
<key>schema-registry</key>
<value>
<identifiesControllerService>org.apache.nifi.schemaregistry.services.SchemaRegistry</identifiesControllerService>
<name>schema-registry</name>
</value>
</entry>
<entry>
<key>schema-name</key>
<value>
<name>schema-name</name>
</value>
</entry>
<entry>
<key>schema-version</key>
<value>
<name>schema-version</name>
</value>
</entry>
<entry>
<key>schema-branch</key>
<value>
<name>schema-branch</name>
</value>
</entry>
<entry>
<key>schema-text</key>
<value>
<name>schema-text</name>
</value>
</entry>
<entry>
<key>Date Format</key>
<value>
<name>Date Format</name>
</value>
</entry>
<entry>
<key>Time Format</key>
<value>
<name>Time Format</name>
</value>
</entry>
<entry>
<key>Timestamp Format</key>
<value>
<name>Timestamp Format</name>
</value>
</entry>
<entry>
<key>suppress_nulls</key>
<value>
<name>suppress_nulls</name>
</value>
</entry>
<entry>
<key>pretty_print_xml</key>
<value>
<name>pretty_print_xml</name>
</value>
</entry>
<entry>
<key>root_tag_name</key>
<value>
<name>root_tag_name</name>
</value>
</entry>
<entry>
<key>record_tag_name</key>
<value>
<name>record_tag_name</name>
</value>
</entry>
<entry>
<key>array_wrapping</key>
<value>
<name>array_wrapping</name>
</value>
</entry>
<entry>
<key>array_tag_name</key>
<value>
<name>array_tag_name</name>
</value>
</entry>
<entry>
<key>Character Set</key>
<value>
<name>Character Set</name>
</value>
</entry>
</descriptors>
<name>XMLRecordSetWriter</name>
<persistsState>false</persistsState>
<properties>
<entry>
<key>Schema Write Strategy</key>
<value>no-schema</value>
</entry>
<entry>
<key>schema-access-strategy</key>
</entry>
<entry>
<key>schema-registry</key>
</entry>
<entry>
<key>schema-name</key>
</entry>
<entry>
<key>schema-version</key>
</entry>
<entry>
<key>schema-branch</key>
</entry>
<entry>
<key>schema-text</key>
</entry>
<entry>
<key>Date Format</key>
</entry>
<entry>
<key>Time Format</key>
</entry>
<entry>
<key>Timestamp Format</key>
</entry>
<entry>
<key>suppress_nulls</key>
</entry>
<entry>
<key>pretty_print_xml</key>
</entry>
<entry>
<key>root_tag_name</key>
<value>names</value>
</entry>
<entry>
<key>record_tag_name</key>
</entry>
<entry>
<key>array_wrapping</key>
</entry>
<entry>
<key>array_tag_name</key>
</entry>
<entry>
<key>Character Set</key>
</entry>
</properties>
<state>ENABLED</state>
<type>org.apache.nifi.xml.XMLRecordSetWriter</type>
</controllerServices>
<processGroups>
<id>ec43db5a-b2b5-3702-0000-000000000000</id>
<parentGroupId>376efa9a-48fc-3e3d-0000-000000000000</parentGroupId>
<position>
<x>0.0</x>
<y>0.0</y>
</position>
<comments></comments>
<contents>
<connections>
<id>59dd9b8a-63e0-3ef6-0000-000000000000</id>
<parentGroupId>ec43db5a-b2b5-3702-0000-000000000000</parentGroupId>
<backPressureDataSizeThreshold>1 GB</backPressureDataSizeThreshold>
<backPressureObjectThreshold>10000</backPressureObjectThreshold>
<bends>
<x>474.40303108214266</x>
<y>412.2104512544876</y>
</bends>
<destination>
<groupId>ec43db5a-b2b5-3702-0000-000000000000</groupId>
<id>69201528-51d6-30e3-0000-000000000000</id>
<type>PROCESSOR</type>
</destination>
<flowFileExpiration>0 sec</flowFileExpiration>
<labelIndex>1</labelIndex>
<loadBalanceCompression>DO_NOT_COMPRESS</loadBalanceCompression>
<loadBalancePartitionAttribute></loadBalancePartitionAttribute>
<loadBalanceStatus>LOAD_BALANCE_NOT_CONFIGURED</loadBalanceStatus>
<loadBalanceStrategy>DO_NOT_LOAD_BALANCE</loadBalanceStrategy>
<name></name>
<selectedRelationships>failure</selectedRelationships>
<source>
<groupId>ec43db5a-b2b5-3702-0000-000000000000</groupId>
<id>7a076db4-2f6c-3ddf-0000-000000000000</id>
<type>PROCESSOR</type>
</source>
<zIndex>0</zIndex>
</connections>
<connections>
<id>5b093e34-9e77-39ef-0000-000000000000</id>
<parentGroupId>ec43db5a-b2b5-3702-0000-000000000000</parentGroupId>
<backPressureDataSizeThreshold>1 GB</backPressureDataSizeThreshold>
<backPressureObjectThreshold>10000</backPressureObjectThreshold>
<destination>
<groupId>ec43db5a-b2b5-3702-0000-000000000000</groupId>
<id>7a076db4-2f6c-3ddf-0000-000000000000</id>
<type>PROCESSOR</type>
</destination>
<flowFileExpiration>0 sec</flowFileExpiration>
<labelIndex>1</labelIndex>
<loadBalanceCompression>DO_NOT_COMPRESS</loadBalanceCompression>
<loadBalancePartitionAttribute></loadBalancePartitionAttribute>
<loadBalanceStatus>LOAD_BALANCE_NOT_CONFIGURED</loadBalanceStatus>
<loadBalanceStrategy>DO_NOT_LOAD_BALANCE</loadBalanceStrategy>
<name></name>
<selectedRelationships>success</selectedRelationships>
<source>
<groupId>ec43db5a-b2b5-3702-0000-000000000000</groupId>
<id>da443c09-0843-3d74-0000-000000000000</id>
<type>PROCESSOR</type>
</source>
<zIndex>0</zIndex>
</connections>
<connections>
<id>b79a6367-beb8-32de-0000-000000000000</id>
<parentGroupId>ec43db5a-b2b5-3702-0000-000000000000</parentGroupId>
<backPressureDataSizeThreshold>1 GB</backPressureDataSizeThreshold>
<backPressureObjectThreshold>10000</backPressureObjectThreshold>
<destination>
<groupId>ec43db5a-b2b5-3702-0000-000000000000</groupId>
<id>69201528-51d6-30e3-0000-000000000000</id>
<type>PROCESSOR</type>
</destination>
<flowFileExpiration>0 sec</flowFileExpiration>
<labelIndex>1</labelIndex>
<loadBalanceCompression>DO_NOT_COMPRESS</loadBalanceCompression>
<loadBalancePartitionAttribute></loadBalancePartitionAttribute>
<loadBalanceStatus>LOAD_BALANCE_NOT_CONFIGURED</loadBalanceStatus>
<loadBalanceStrategy>DO_NOT_LOAD_BALANCE</loadBalanceStrategy>
<name></name>
<selectedRelationships>merged</selectedRelationships>
<source>
<groupId>ec43db5a-b2b5-3702-0000-000000000000</groupId>
<id>7a076db4-2f6c-3ddf-0000-000000000000</id>
<type>PROCESSOR</type>
</source>
<zIndex>0</zIndex>
</connections>
<processors>
<id>69201528-51d6-30e3-0000-000000000000</id>
<parentGroupId>ec43db5a-b2b5-3702-0000-000000000000</parentGroupId>
<position>
<x>499.9120948028458</x>
<y>487.3475077335846</y>
</position>
<bundle>
<artifact>nifi-standard-nar</artifact>
<group>org.apache.nifi</group>
<version>1.9.0-SNAPSHOT</version>
</bundle>
<config>
<bulletinLevel>WARN</bulletinLevel>
<comments></comments>
<concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount>
<descriptors>
<entry>
<key>Log Level</key>
<value>
<name>Log Level</name>
</value>
</entry>
<entry>
<key>Log Payload</key>
<value>
<name>Log Payload</name>
</value>
</entry>
<entry>
<key>Attributes to Log</key>
<value>
<name>Attributes to Log</name>
</value>
</entry>
<entry>
<key>attributes-to-log-regex</key>
<value>
<name>attributes-to-log-regex</name>
</value>
</entry>
<entry>
<key>Attributes to Ignore</key>
<value>
<name>Attributes to Ignore</name>
</value>
</entry>
<entry>
<key>attributes-to-ignore-regex</key>
<value>
<name>attributes-to-ignore-regex</name>
</value>
</entry>
<entry>
<key>Log prefix</key>
<value>
<name>Log prefix</name>
</value>
</entry>
<entry>
<key>character-set</key>
<value>
<name>character-set</name>
</value>
</entry>
</descriptors>
<executionNode>ALL</executionNode>
<lossTolerant>false</lossTolerant>
<penaltyDuration>30 sec</penaltyDuration>
<properties>
<entry>
<key>Log Level</key>
<value>info</value>
</entry>
<entry>
<key>Log Payload</key>
<value>false</value>
</entry>
<entry>
<key>Attributes to Log</key>
</entry>
<entry>
<key>attributes-to-log-regex</key>
<value>.*</value>
</entry>
<entry>
<key>Attributes to Ignore</key>
</entry>
<entry>
<key>attributes-to-ignore-regex</key>
</entry>
<entry>
<key>Log prefix</key>
</entry>
<entry>
<key>character-set</key>
<value>UTF-8</value>
</entry>
</properties>
<runDurationMillis>0</runDurationMillis>
<schedulingPeriod>0 sec</schedulingPeriod>
<schedulingStrategy>TIMER_DRIVEN</schedulingStrategy>
<yieldDuration>1 sec</yieldDuration>
</config>
<executionNodeRestricted>false</executionNodeRestricted>
<name>LogAttribute</name>
<relationships>
<autoTerminate>false</autoTerminate>
<name>success</name>
</relationships>
<state>STOPPED</state>
<style/>
<type>org.apache.nifi.processors.standard.LogAttribute</type>
</processors>
<processors>
<id>7a076db4-2f6c-3ddf-0000-000000000000</id>
<parentGroupId>ec43db5a-b2b5-3702-0000-000000000000</parentGroupId>
<position>
<x>499.9120948028458</x>
<y>207.07339477539062</y>
</position>
<bundle>
<artifact>nifi-standard-nar</artifact>
<group>org.apache.nifi</group>
<version>1.9.0-SNAPSHOT</version>
</bundle>
<config>
<bulletinLevel>WARN</bulletinLevel>
<comments></comments>
<concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount>
<descriptors>
<entry>
<key>record-reader</key>
<value>
<identifiesControllerService>org.apache.nifi.serialization.RecordReaderFactory</identifiesControllerService>
<name>record-reader</name>
</value>
</entry>
<entry>
<key>record-writer</key>
<value>
<identifiesControllerService>org.apache.nifi.serialization.RecordSetWriterFactory</identifiesControllerService>
<name>record-writer</name>
</value>
</entry>
<entry>
<key>merge-strategy</key>
<value>
<name>merge-strategy</name>
</value>
</entry>
<entry>
<key>correlation-attribute-name</key>
<value>
<name>correlation-attribute-name</name>
</value>
</entry>
<entry>
<key>Attribute Strategy</key>
<value>
<name>Attribute Strategy</name>
</value>
</entry>
<entry>
<key>min-records</key>
<value>
<name>min-records</name>
</value>
</entry>
<entry>
<key>max-records</key>
<value>
<name>max-records</name>
</value>
</entry>
<entry>
<key>min-bin-size</key>
<value>
<name>min-bin-size</name>
</value>
</entry>
<entry>
<key>max-bin-size</key>
<value>
<name>max-bin-size</name>
</value>
</entry>
<entry>
<key>max-bin-age</key>
<value>
<name>max-bin-age</name>
</value>
</entry>
<entry>
<key>max.bin.count</key>
<value>
<name>max.bin.count</name>
</value>
</entry>
</descriptors>
<executionNode>ALL</executionNode>
<lossTolerant>false</lossTolerant>
<penaltyDuration>30 sec</penaltyDuration>
<properties>
<entry>
<key>record-reader</key>
<value>36c4d83a-ff47-38e2-0000-000000000000</value>
</entry>
<entry>
<key>record-writer</key>
<value>c3413461-b440-3fa3-0000-000000000000</value>
</entry>
<entry>
<key>merge-strategy</key>
<value>Bin-Packing Algorithm</value>
</entry>
<entry>
<key>correlation-attribute-name</key>
</entry>
<entry>
<key>Attribute Strategy</key>
<value>Keep Only Common Attributes</value>
</entry>
<entry>
<key>min-records</key>
<value>1</value>
</entry>
<entry>
<key>max-records</key>
<value>1000</value>
</entry>
<entry>
<key>min-bin-size</key>
<value>0 B</value>
</entry>
<entry>
<key>max-bin-size</key>
</entry>
<entry>
<key>max-bin-age</key>
</entry>
<entry>
<key>max.bin.count</key>
<value>10</value>
</entry>
</properties>
<runDurationMillis>0</runDurationMillis>
<schedulingPeriod>0 sec</schedulingPeriod>
<schedulingStrategy>TIMER_DRIVEN</schedulingStrategy>
<yieldDuration>1 sec</yieldDuration>
</config>
<executionNodeRestricted>false</executionNodeRestricted>
<name>MergeRecord</name>
<relationships>
<autoTerminate>false</autoTerminate>
<name>failure</name>
</relationships>
<relationships>
<autoTerminate>false</autoTerminate>
<name>merged</name>
</relationships>
<relationships>
<autoTerminate>true</autoTerminate>
<name>original</name>
</relationships>
<state>RUNNING</state>
<style/>
<type>org.apache.nifi.processors.standard.MergeRecord</type>
</processors>
<processors>
<id>da443c09-0843-3d74-0000-000000000000</id>
<parentGroupId>ec43db5a-b2b5-3702-0000-000000000000</parentGroupId>
<position>
<x>499.9120948028458</x>
<y>-55.0</y>
</position>
<bundle>
<artifact>nifi-standard-nar</artifact>
<group>org.apache.nifi</group>
<version>1.9.0-SNAPSHOT</version>
</bundle>
<config>
<bulletinLevel>WARN</bulletinLevel>
<comments></comments>
<concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount>
<descriptors>
<entry>
<key>File Size</key>
<value>
<name>File Size</name>
</value>
</entry>
<entry>
<key>Batch Size</key>
<value>
<name>Batch Size</name>
</value>
</entry>
<entry>
<key>Data Format</key>
<value>
<name>Data Format</name>
</value>
</entry>
<entry>
<key>Unique FlowFiles</key>
<value>
<name>Unique FlowFiles</name>
</value>
</entry>
<entry>
<key>generate-ff-custom-text</key>
<value>
<name>generate-ff-custom-text</name>
</value>
</entry>
<entry>
<key>character-set</key>
<value>
<name>character-set</name>
</value>
</entry>
<entry>
<key>avro.schema</key>
<value>
<name>avro.schema</name>
</value>
</entry>
</descriptors>
<executionNode>ALL</executionNode>
<lossTolerant>false</lossTolerant>
<penaltyDuration>30 sec</penaltyDuration>
<properties>
<entry>
<key>File Size</key>
<value>0B</value>
</entry>
<entry>
<key>Batch Size</key>
<value>3</value>
</entry>
<entry>
<key>Data Format</key>
<value>Text</value>
</entry>
<entry>
<key>Unique FlowFiles</key>
<value>false</value>
</entry>
<entry>
<key>generate-ff-custom-text</key>
<value>&lt;?xml version="1.0" encoding="UTF-8"?&gt;
&lt;name&gt;
&lt;first&gt;foo&lt;/first&gt;
&lt;last&gt;bar&lt;/last&gt;
&lt;/name&gt;</value>
</entry>
<entry>
<key>character-set</key>
<value>UTF-8</value>
</entry>
<entry>
<key>avro.schema</key>
<value>{
"type": "record",
"name": "name",
"namespace": "example.com",
"fields": [
{
"name": "first",
"type": "string"
},
{
"name": "last",
"type": "string"
}
]
}</value>
</entry>
</properties>
<runDurationMillis>0</runDurationMillis>
<schedulingPeriod>1d</schedulingPeriod>
<schedulingStrategy>TIMER_DRIVEN</schedulingStrategy>
<yieldDuration>1 sec</yieldDuration>
</config>
<executionNodeRestricted>false</executionNodeRestricted>
<name>GenerateFlowFile</name>
<relationships>
<autoTerminate>false</autoTerminate>
<name>success</name>
</relationships>
<state>RUNNING</state>
<style/>
<type>org.apache.nifi.processors.standard.GenerateFlowFile</type>
</processors>
</contents>
<name>MergeXMLRecord</name>
<variables/>
</processGroups>
</snippet>
<timestamp>11/29/2018 10:26:50 JST</timestamp>
</template>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment