Created
November 29, 2018 01:27
-
-
Save ijokarumawak/eeaf519a7ceea476fa452f7aa2ee5671 to your computer and use it in GitHub Desktop.
A NiFi example template to illustrate how to merge multiple XML files.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?xml version="1.0" encoding="UTF-8" standalone="yes"?> | |
<template encoding-version="1.2"> | |
<description></description> | |
<groupId>39379f66-0167-1000-9951-3cf7c004e310</groupId> | |
<name>Merge XML Records</name> | |
<snippet> | |
<controllerServices> | |
<id>36c4d83a-ff47-38e2-0000-000000000000</id> | |
<parentGroupId>376efa9a-48fc-3e3d-0000-000000000000</parentGroupId> | |
<bundle> | |
<artifact>nifi-record-serialization-services-nar</artifact> | |
<group>org.apache.nifi</group> | |
<version>1.9.0-SNAPSHOT</version> | |
</bundle> | |
<comments></comments> | |
<descriptors> | |
<entry> | |
<key>schema-access-strategy</key> | |
<value> | |
<name>schema-access-strategy</name> | |
</value> | |
</entry> | |
<entry> | |
<key>schema-registry</key> | |
<value> | |
<identifiesControllerService>org.apache.nifi.schemaregistry.services.SchemaRegistry</identifiesControllerService> | |
<name>schema-registry</name> | |
</value> | |
</entry> | |
<entry> | |
<key>schema-name</key> | |
<value> | |
<name>schema-name</name> | |
</value> | |
</entry> | |
<entry> | |
<key>schema-version</key> | |
<value> | |
<name>schema-version</name> | |
</value> | |
</entry> | |
<entry> | |
<key>schema-branch</key> | |
<value> | |
<name>schema-branch</name> | |
</value> | |
</entry> | |
<entry> | |
<key>schema-text</key> | |
<value> | |
<name>schema-text</name> | |
</value> | |
</entry> | |
<entry> | |
<key>record_format</key> | |
<value> | |
<name>record_format</name> | |
</value> | |
</entry> | |
<entry> | |
<key>attribute_prefix</key> | |
<value> | |
<name>attribute_prefix</name> | |
</value> | |
</entry> | |
<entry> | |
<key>content_field_name</key> | |
<value> | |
<name>content_field_name</name> | |
</value> | |
</entry> | |
<entry> | |
<key>Date Format</key> | |
<value> | |
<name>Date Format</name> | |
</value> | |
</entry> | |
<entry> | |
<key>Time Format</key> | |
<value> | |
<name>Time Format</name> | |
</value> | |
</entry> | |
<entry> | |
<key>Timestamp Format</key> | |
<value> | |
<name>Timestamp Format</name> | |
</value> | |
</entry> | |
</descriptors> | |
<name>XMLReader</name> | |
<persistsState>false</persistsState> | |
<properties> | |
<entry> | |
<key>schema-access-strategy</key> | |
<value>schema-text-property</value> | |
</entry> | |
<entry> | |
<key>schema-registry</key> | |
</entry> | |
<entry> | |
<key>schema-name</key> | |
</entry> | |
<entry> | |
<key>schema-version</key> | |
</entry> | |
<entry> | |
<key>schema-branch</key> | |
</entry> | |
<entry> | |
<key>schema-text</key> | |
</entry> | |
<entry> | |
<key>record_format</key> | |
</entry> | |
<entry> | |
<key>attribute_prefix</key> | |
</entry> | |
<entry> | |
<key>content_field_name</key> | |
</entry> | |
<entry> | |
<key>Date Format</key> | |
</entry> | |
<entry> | |
<key>Time Format</key> | |
</entry> | |
<entry> | |
<key>Timestamp Format</key> | |
</entry> | |
</properties> | |
<state>ENABLED</state> | |
<type>org.apache.nifi.xml.XMLReader</type> | |
</controllerServices> | |
<controllerServices> | |
<id>c3413461-b440-3fa3-0000-000000000000</id> | |
<parentGroupId>376efa9a-48fc-3e3d-0000-000000000000</parentGroupId> | |
<bundle> | |
<artifact>nifi-record-serialization-services-nar</artifact> | |
<group>org.apache.nifi</group> | |
<version>1.9.0-SNAPSHOT</version> | |
</bundle> | |
<comments></comments> | |
<descriptors> | |
<entry> | |
<key>Schema Write Strategy</key> | |
<value> | |
<name>Schema Write Strategy</name> | |
</value> | |
</entry> | |
<entry> | |
<key>schema-access-strategy</key> | |
<value> | |
<name>schema-access-strategy</name> | |
</value> | |
</entry> | |
<entry> | |
<key>schema-registry</key> | |
<value> | |
<identifiesControllerService>org.apache.nifi.schemaregistry.services.SchemaRegistry</identifiesControllerService> | |
<name>schema-registry</name> | |
</value> | |
</entry> | |
<entry> | |
<key>schema-name</key> | |
<value> | |
<name>schema-name</name> | |
</value> | |
</entry> | |
<entry> | |
<key>schema-version</key> | |
<value> | |
<name>schema-version</name> | |
</value> | |
</entry> | |
<entry> | |
<key>schema-branch</key> | |
<value> | |
<name>schema-branch</name> | |
</value> | |
</entry> | |
<entry> | |
<key>schema-text</key> | |
<value> | |
<name>schema-text</name> | |
</value> | |
</entry> | |
<entry> | |
<key>Date Format</key> | |
<value> | |
<name>Date Format</name> | |
</value> | |
</entry> | |
<entry> | |
<key>Time Format</key> | |
<value> | |
<name>Time Format</name> | |
</value> | |
</entry> | |
<entry> | |
<key>Timestamp Format</key> | |
<value> | |
<name>Timestamp Format</name> | |
</value> | |
</entry> | |
<entry> | |
<key>suppress_nulls</key> | |
<value> | |
<name>suppress_nulls</name> | |
</value> | |
</entry> | |
<entry> | |
<key>pretty_print_xml</key> | |
<value> | |
<name>pretty_print_xml</name> | |
</value> | |
</entry> | |
<entry> | |
<key>root_tag_name</key> | |
<value> | |
<name>root_tag_name</name> | |
</value> | |
</entry> | |
<entry> | |
<key>record_tag_name</key> | |
<value> | |
<name>record_tag_name</name> | |
</value> | |
</entry> | |
<entry> | |
<key>array_wrapping</key> | |
<value> | |
<name>array_wrapping</name> | |
</value> | |
</entry> | |
<entry> | |
<key>array_tag_name</key> | |
<value> | |
<name>array_tag_name</name> | |
</value> | |
</entry> | |
<entry> | |
<key>Character Set</key> | |
<value> | |
<name>Character Set</name> | |
</value> | |
</entry> | |
</descriptors> | |
<name>XMLRecordSetWriter</name> | |
<persistsState>false</persistsState> | |
<properties> | |
<entry> | |
<key>Schema Write Strategy</key> | |
<value>no-schema</value> | |
</entry> | |
<entry> | |
<key>schema-access-strategy</key> | |
</entry> | |
<entry> | |
<key>schema-registry</key> | |
</entry> | |
<entry> | |
<key>schema-name</key> | |
</entry> | |
<entry> | |
<key>schema-version</key> | |
</entry> | |
<entry> | |
<key>schema-branch</key> | |
</entry> | |
<entry> | |
<key>schema-text</key> | |
</entry> | |
<entry> | |
<key>Date Format</key> | |
</entry> | |
<entry> | |
<key>Time Format</key> | |
</entry> | |
<entry> | |
<key>Timestamp Format</key> | |
</entry> | |
<entry> | |
<key>suppress_nulls</key> | |
</entry> | |
<entry> | |
<key>pretty_print_xml</key> | |
</entry> | |
<entry> | |
<key>root_tag_name</key> | |
<value>names</value> | |
</entry> | |
<entry> | |
<key>record_tag_name</key> | |
</entry> | |
<entry> | |
<key>array_wrapping</key> | |
</entry> | |
<entry> | |
<key>array_tag_name</key> | |
</entry> | |
<entry> | |
<key>Character Set</key> | |
</entry> | |
</properties> | |
<state>ENABLED</state> | |
<type>org.apache.nifi.xml.XMLRecordSetWriter</type> | |
</controllerServices> | |
<processGroups> | |
<id>ec43db5a-b2b5-3702-0000-000000000000</id> | |
<parentGroupId>376efa9a-48fc-3e3d-0000-000000000000</parentGroupId> | |
<position> | |
<x>0.0</x> | |
<y>0.0</y> | |
</position> | |
<comments></comments> | |
<contents> | |
<connections> | |
<id>59dd9b8a-63e0-3ef6-0000-000000000000</id> | |
<parentGroupId>ec43db5a-b2b5-3702-0000-000000000000</parentGroupId> | |
<backPressureDataSizeThreshold>1 GB</backPressureDataSizeThreshold> | |
<backPressureObjectThreshold>10000</backPressureObjectThreshold> | |
<bends> | |
<x>474.40303108214266</x> | |
<y>412.2104512544876</y> | |
</bends> | |
<destination> | |
<groupId>ec43db5a-b2b5-3702-0000-000000000000</groupId> | |
<id>69201528-51d6-30e3-0000-000000000000</id> | |
<type>PROCESSOR</type> | |
</destination> | |
<flowFileExpiration>0 sec</flowFileExpiration> | |
<labelIndex>1</labelIndex> | |
<loadBalanceCompression>DO_NOT_COMPRESS</loadBalanceCompression> | |
<loadBalancePartitionAttribute></loadBalancePartitionAttribute> | |
<loadBalanceStatus>LOAD_BALANCE_NOT_CONFIGURED</loadBalanceStatus> | |
<loadBalanceStrategy>DO_NOT_LOAD_BALANCE</loadBalanceStrategy> | |
<name></name> | |
<selectedRelationships>failure</selectedRelationships> | |
<source> | |
<groupId>ec43db5a-b2b5-3702-0000-000000000000</groupId> | |
<id>7a076db4-2f6c-3ddf-0000-000000000000</id> | |
<type>PROCESSOR</type> | |
</source> | |
<zIndex>0</zIndex> | |
</connections> | |
<connections> | |
<id>5b093e34-9e77-39ef-0000-000000000000</id> | |
<parentGroupId>ec43db5a-b2b5-3702-0000-000000000000</parentGroupId> | |
<backPressureDataSizeThreshold>1 GB</backPressureDataSizeThreshold> | |
<backPressureObjectThreshold>10000</backPressureObjectThreshold> | |
<destination> | |
<groupId>ec43db5a-b2b5-3702-0000-000000000000</groupId> | |
<id>7a076db4-2f6c-3ddf-0000-000000000000</id> | |
<type>PROCESSOR</type> | |
</destination> | |
<flowFileExpiration>0 sec</flowFileExpiration> | |
<labelIndex>1</labelIndex> | |
<loadBalanceCompression>DO_NOT_COMPRESS</loadBalanceCompression> | |
<loadBalancePartitionAttribute></loadBalancePartitionAttribute> | |
<loadBalanceStatus>LOAD_BALANCE_NOT_CONFIGURED</loadBalanceStatus> | |
<loadBalanceStrategy>DO_NOT_LOAD_BALANCE</loadBalanceStrategy> | |
<name></name> | |
<selectedRelationships>success</selectedRelationships> | |
<source> | |
<groupId>ec43db5a-b2b5-3702-0000-000000000000</groupId> | |
<id>da443c09-0843-3d74-0000-000000000000</id> | |
<type>PROCESSOR</type> | |
</source> | |
<zIndex>0</zIndex> | |
</connections> | |
<connections> | |
<id>b79a6367-beb8-32de-0000-000000000000</id> | |
<parentGroupId>ec43db5a-b2b5-3702-0000-000000000000</parentGroupId> | |
<backPressureDataSizeThreshold>1 GB</backPressureDataSizeThreshold> | |
<backPressureObjectThreshold>10000</backPressureObjectThreshold> | |
<destination> | |
<groupId>ec43db5a-b2b5-3702-0000-000000000000</groupId> | |
<id>69201528-51d6-30e3-0000-000000000000</id> | |
<type>PROCESSOR</type> | |
</destination> | |
<flowFileExpiration>0 sec</flowFileExpiration> | |
<labelIndex>1</labelIndex> | |
<loadBalanceCompression>DO_NOT_COMPRESS</loadBalanceCompression> | |
<loadBalancePartitionAttribute></loadBalancePartitionAttribute> | |
<loadBalanceStatus>LOAD_BALANCE_NOT_CONFIGURED</loadBalanceStatus> | |
<loadBalanceStrategy>DO_NOT_LOAD_BALANCE</loadBalanceStrategy> | |
<name></name> | |
<selectedRelationships>merged</selectedRelationships> | |
<source> | |
<groupId>ec43db5a-b2b5-3702-0000-000000000000</groupId> | |
<id>7a076db4-2f6c-3ddf-0000-000000000000</id> | |
<type>PROCESSOR</type> | |
</source> | |
<zIndex>0</zIndex> | |
</connections> | |
<processors> | |
<id>69201528-51d6-30e3-0000-000000000000</id> | |
<parentGroupId>ec43db5a-b2b5-3702-0000-000000000000</parentGroupId> | |
<position> | |
<x>499.9120948028458</x> | |
<y>487.3475077335846</y> | |
</position> | |
<bundle> | |
<artifact>nifi-standard-nar</artifact> | |
<group>org.apache.nifi</group> | |
<version>1.9.0-SNAPSHOT</version> | |
</bundle> | |
<config> | |
<bulletinLevel>WARN</bulletinLevel> | |
<comments></comments> | |
<concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount> | |
<descriptors> | |
<entry> | |
<key>Log Level</key> | |
<value> | |
<name>Log Level</name> | |
</value> | |
</entry> | |
<entry> | |
<key>Log Payload</key> | |
<value> | |
<name>Log Payload</name> | |
</value> | |
</entry> | |
<entry> | |
<key>Attributes to Log</key> | |
<value> | |
<name>Attributes to Log</name> | |
</value> | |
</entry> | |
<entry> | |
<key>attributes-to-log-regex</key> | |
<value> | |
<name>attributes-to-log-regex</name> | |
</value> | |
</entry> | |
<entry> | |
<key>Attributes to Ignore</key> | |
<value> | |
<name>Attributes to Ignore</name> | |
</value> | |
</entry> | |
<entry> | |
<key>attributes-to-ignore-regex</key> | |
<value> | |
<name>attributes-to-ignore-regex</name> | |
</value> | |
</entry> | |
<entry> | |
<key>Log prefix</key> | |
<value> | |
<name>Log prefix</name> | |
</value> | |
</entry> | |
<entry> | |
<key>character-set</key> | |
<value> | |
<name>character-set</name> | |
</value> | |
</entry> | |
</descriptors> | |
<executionNode>ALL</executionNode> | |
<lossTolerant>false</lossTolerant> | |
<penaltyDuration>30 sec</penaltyDuration> | |
<properties> | |
<entry> | |
<key>Log Level</key> | |
<value>info</value> | |
</entry> | |
<entry> | |
<key>Log Payload</key> | |
<value>false</value> | |
</entry> | |
<entry> | |
<key>Attributes to Log</key> | |
</entry> | |
<entry> | |
<key>attributes-to-log-regex</key> | |
<value>.*</value> | |
</entry> | |
<entry> | |
<key>Attributes to Ignore</key> | |
</entry> | |
<entry> | |
<key>attributes-to-ignore-regex</key> | |
</entry> | |
<entry> | |
<key>Log prefix</key> | |
</entry> | |
<entry> | |
<key>character-set</key> | |
<value>UTF-8</value> | |
</entry> | |
</properties> | |
<runDurationMillis>0</runDurationMillis> | |
<schedulingPeriod>0 sec</schedulingPeriod> | |
<schedulingStrategy>TIMER_DRIVEN</schedulingStrategy> | |
<yieldDuration>1 sec</yieldDuration> | |
</config> | |
<executionNodeRestricted>false</executionNodeRestricted> | |
<name>LogAttribute</name> | |
<relationships> | |
<autoTerminate>false</autoTerminate> | |
<name>success</name> | |
</relationships> | |
<state>STOPPED</state> | |
<style/> | |
<type>org.apache.nifi.processors.standard.LogAttribute</type> | |
</processors> | |
<processors> | |
<id>7a076db4-2f6c-3ddf-0000-000000000000</id> | |
<parentGroupId>ec43db5a-b2b5-3702-0000-000000000000</parentGroupId> | |
<position> | |
<x>499.9120948028458</x> | |
<y>207.07339477539062</y> | |
</position> | |
<bundle> | |
<artifact>nifi-standard-nar</artifact> | |
<group>org.apache.nifi</group> | |
<version>1.9.0-SNAPSHOT</version> | |
</bundle> | |
<config> | |
<bulletinLevel>WARN</bulletinLevel> | |
<comments></comments> | |
<concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount> | |
<descriptors> | |
<entry> | |
<key>record-reader</key> | |
<value> | |
<identifiesControllerService>org.apache.nifi.serialization.RecordReaderFactory</identifiesControllerService> | |
<name>record-reader</name> | |
</value> | |
</entry> | |
<entry> | |
<key>record-writer</key> | |
<value> | |
<identifiesControllerService>org.apache.nifi.serialization.RecordSetWriterFactory</identifiesControllerService> | |
<name>record-writer</name> | |
</value> | |
</entry> | |
<entry> | |
<key>merge-strategy</key> | |
<value> | |
<name>merge-strategy</name> | |
</value> | |
</entry> | |
<entry> | |
<key>correlation-attribute-name</key> | |
<value> | |
<name>correlation-attribute-name</name> | |
</value> | |
</entry> | |
<entry> | |
<key>Attribute Strategy</key> | |
<value> | |
<name>Attribute Strategy</name> | |
</value> | |
</entry> | |
<entry> | |
<key>min-records</key> | |
<value> | |
<name>min-records</name> | |
</value> | |
</entry> | |
<entry> | |
<key>max-records</key> | |
<value> | |
<name>max-records</name> | |
</value> | |
</entry> | |
<entry> | |
<key>min-bin-size</key> | |
<value> | |
<name>min-bin-size</name> | |
</value> | |
</entry> | |
<entry> | |
<key>max-bin-size</key> | |
<value> | |
<name>max-bin-size</name> | |
</value> | |
</entry> | |
<entry> | |
<key>max-bin-age</key> | |
<value> | |
<name>max-bin-age</name> | |
</value> | |
</entry> | |
<entry> | |
<key>max.bin.count</key> | |
<value> | |
<name>max.bin.count</name> | |
</value> | |
</entry> | |
</descriptors> | |
<executionNode>ALL</executionNode> | |
<lossTolerant>false</lossTolerant> | |
<penaltyDuration>30 sec</penaltyDuration> | |
<properties> | |
<entry> | |
<key>record-reader</key> | |
<value>36c4d83a-ff47-38e2-0000-000000000000</value> | |
</entry> | |
<entry> | |
<key>record-writer</key> | |
<value>c3413461-b440-3fa3-0000-000000000000</value> | |
</entry> | |
<entry> | |
<key>merge-strategy</key> | |
<value>Bin-Packing Algorithm</value> | |
</entry> | |
<entry> | |
<key>correlation-attribute-name</key> | |
</entry> | |
<entry> | |
<key>Attribute Strategy</key> | |
<value>Keep Only Common Attributes</value> | |
</entry> | |
<entry> | |
<key>min-records</key> | |
<value>1</value> | |
</entry> | |
<entry> | |
<key>max-records</key> | |
<value>1000</value> | |
</entry> | |
<entry> | |
<key>min-bin-size</key> | |
<value>0 B</value> | |
</entry> | |
<entry> | |
<key>max-bin-size</key> | |
</entry> | |
<entry> | |
<key>max-bin-age</key> | |
</entry> | |
<entry> | |
<key>max.bin.count</key> | |
<value>10</value> | |
</entry> | |
</properties> | |
<runDurationMillis>0</runDurationMillis> | |
<schedulingPeriod>0 sec</schedulingPeriod> | |
<schedulingStrategy>TIMER_DRIVEN</schedulingStrategy> | |
<yieldDuration>1 sec</yieldDuration> | |
</config> | |
<executionNodeRestricted>false</executionNodeRestricted> | |
<name>MergeRecord</name> | |
<relationships> | |
<autoTerminate>false</autoTerminate> | |
<name>failure</name> | |
</relationships> | |
<relationships> | |
<autoTerminate>false</autoTerminate> | |
<name>merged</name> | |
</relationships> | |
<relationships> | |
<autoTerminate>true</autoTerminate> | |
<name>original</name> | |
</relationships> | |
<state>RUNNING</state> | |
<style/> | |
<type>org.apache.nifi.processors.standard.MergeRecord</type> | |
</processors> | |
<processors> | |
<id>da443c09-0843-3d74-0000-000000000000</id> | |
<parentGroupId>ec43db5a-b2b5-3702-0000-000000000000</parentGroupId> | |
<position> | |
<x>499.9120948028458</x> | |
<y>-55.0</y> | |
</position> | |
<bundle> | |
<artifact>nifi-standard-nar</artifact> | |
<group>org.apache.nifi</group> | |
<version>1.9.0-SNAPSHOT</version> | |
</bundle> | |
<config> | |
<bulletinLevel>WARN</bulletinLevel> | |
<comments></comments> | |
<concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount> | |
<descriptors> | |
<entry> | |
<key>File Size</key> | |
<value> | |
<name>File Size</name> | |
</value> | |
</entry> | |
<entry> | |
<key>Batch Size</key> | |
<value> | |
<name>Batch Size</name> | |
</value> | |
</entry> | |
<entry> | |
<key>Data Format</key> | |
<value> | |
<name>Data Format</name> | |
</value> | |
</entry> | |
<entry> | |
<key>Unique FlowFiles</key> | |
<value> | |
<name>Unique FlowFiles</name> | |
</value> | |
</entry> | |
<entry> | |
<key>generate-ff-custom-text</key> | |
<value> | |
<name>generate-ff-custom-text</name> | |
</value> | |
</entry> | |
<entry> | |
<key>character-set</key> | |
<value> | |
<name>character-set</name> | |
</value> | |
</entry> | |
<entry> | |
<key>avro.schema</key> | |
<value> | |
<name>avro.schema</name> | |
</value> | |
</entry> | |
</descriptors> | |
<executionNode>ALL</executionNode> | |
<lossTolerant>false</lossTolerant> | |
<penaltyDuration>30 sec</penaltyDuration> | |
<properties> | |
<entry> | |
<key>File Size</key> | |
<value>0B</value> | |
</entry> | |
<entry> | |
<key>Batch Size</key> | |
<value>3</value> | |
</entry> | |
<entry> | |
<key>Data Format</key> | |
<value>Text</value> | |
</entry> | |
<entry> | |
<key>Unique FlowFiles</key> | |
<value>false</value> | |
</entry> | |
<entry> | |
<key>generate-ff-custom-text</key> | |
<value><?xml version="1.0" encoding="UTF-8"?> | |
<name> | |
<first>foo</first> | |
<last>bar</last> | |
</name></value> | |
</entry> | |
<entry> | |
<key>character-set</key> | |
<value>UTF-8</value> | |
</entry> | |
<entry> | |
<key>avro.schema</key> | |
<value>{ | |
"type": "record", | |
"name": "name", | |
"namespace": "example.com", | |
"fields": [ | |
{ | |
"name": "first", | |
"type": "string" | |
}, | |
{ | |
"name": "last", | |
"type": "string" | |
} | |
] | |
}</value> | |
</entry> | |
</properties> | |
<runDurationMillis>0</runDurationMillis> | |
<schedulingPeriod>1d</schedulingPeriod> | |
<schedulingStrategy>TIMER_DRIVEN</schedulingStrategy> | |
<yieldDuration>1 sec</yieldDuration> | |
</config> | |
<executionNodeRestricted>false</executionNodeRestricted> | |
<name>GenerateFlowFile</name> | |
<relationships> | |
<autoTerminate>false</autoTerminate> | |
<name>success</name> | |
</relationships> | |
<state>RUNNING</state> | |
<style/> | |
<type>org.apache.nifi.processors.standard.GenerateFlowFile</type> | |
</processors> | |
</contents> | |
<name>MergeXMLRecord</name> | |
<variables/> | |
</processGroups> | |
</snippet> | |
<timestamp>11/29/2018 10:26:50 JST</timestamp> | |
</template> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment