Skip to content

Instantly share code, notes, and snippets.

@alopresto
Created September 3, 2018 20:49
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save alopresto/6ff0edbf913c0ef667d757ce13c02362 to your computer and use it in GitHub Desktop.
Save alopresto/6ff0edbf913c0ef667d757ce13c02362 to your computer and use it in GitHub Desktop.
Apache NiFi template that replaces JSON keys that contain '.' as a delimiter with '_' while leaving JSON values unmodified.
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<template encoding-version="1.2">
<description>Replaces JSON keys that contain '.' as a delimiter with '_' while leaving JSON values unmodified. </description>
<groupId>a0f3dfb7-0165-1000-8310-4fb72628adaa</groupId>
<name>Replace dots in JSON keys with underscores</name>
<snippet>
<connections>
<id>359b6ca9-02ac-37c3-0000-000000000000</id>
<parentGroupId>e223d59f-ac39-3ea4-0000-000000000000</parentGroupId>
<backPressureDataSizeThreshold>1 GB</backPressureDataSizeThreshold>
<backPressureObjectThreshold>10000</backPressureObjectThreshold>
<destination>
<groupId>e223d59f-ac39-3ea4-0000-000000000000</groupId>
<id>5fc55bfd-96ac-33a8-0000-000000000000</id>
<type>PROCESSOR</type>
</destination>
<flowFileExpiration>0 sec</flowFileExpiration>
<labelIndex>1</labelIndex>
<name></name>
<selectedRelationships>success</selectedRelationships>
<source>
<groupId>e223d59f-ac39-3ea4-0000-000000000000</groupId>
<id>66a41999-4351-33f7-0000-000000000000</id>
<type>PROCESSOR</type>
</source>
<zIndex>0</zIndex>
</connections>
<connections>
<id>6c087193-b8a7-3be0-0000-000000000000</id>
<parentGroupId>e223d59f-ac39-3ea4-0000-000000000000</parentGroupId>
<backPressureDataSizeThreshold>1 GB</backPressureDataSizeThreshold>
<backPressureObjectThreshold>10000</backPressureObjectThreshold>
<bends>
<x>502.0</x>
<y>542.0</y>
</bends>
<bends>
<x>499.0</x>
<y>311.0</y>
</bends>
<destination>
<groupId>e223d59f-ac39-3ea4-0000-000000000000</groupId>
<id>5fc55bfd-96ac-33a8-0000-000000000000</id>
<type>PROCESSOR</type>
</destination>
<flowFileExpiration>0 sec</flowFileExpiration>
<labelIndex>0</labelIndex>
<name></name>
<selectedRelationships>key_with_dot</selectedRelationships>
<source>
<groupId>e223d59f-ac39-3ea4-0000-000000000000</groupId>
<id>dc2d6202-63f0-3875-0000-000000000000</id>
<type>PROCESSOR</type>
</source>
<zIndex>0</zIndex>
</connections>
<connections>
<id>a95477cd-adb0-3d91-0000-000000000000</id>
<parentGroupId>e223d59f-ac39-3ea4-0000-000000000000</parentGroupId>
<backPressureDataSizeThreshold>1 GB</backPressureDataSizeThreshold>
<backPressureObjectThreshold>10000</backPressureObjectThreshold>
<destination>
<groupId>e223d59f-ac39-3ea4-0000-000000000000</groupId>
<id>5fc55bfd-96ac-33a8-0000-000000000000</id>
<type>PROCESSOR</type>
</destination>
<flowFileExpiration>0 sec</flowFileExpiration>
<labelIndex>1</labelIndex>
<name></name>
<selectedRelationships>success</selectedRelationships>
<source>
<groupId>e223d59f-ac39-3ea4-0000-000000000000</groupId>
<id>354cfd00-c682-35f6-0000-000000000000</id>
<type>PROCESSOR</type>
</source>
<zIndex>0</zIndex>
</connections>
<connections>
<id>d6c96cbb-38b5-37ef-0000-000000000000</id>
<parentGroupId>e223d59f-ac39-3ea4-0000-000000000000</parentGroupId>
<backPressureDataSizeThreshold>1 GB</backPressureDataSizeThreshold>
<backPressureObjectThreshold>10000</backPressureObjectThreshold>
<destination>
<groupId>e223d59f-ac39-3ea4-0000-000000000000</groupId>
<id>7f34ff95-61eb-375f-0000-000000000000</id>
<type>PROCESSOR</type>
</destination>
<flowFileExpiration>0 sec</flowFileExpiration>
<labelIndex>1</labelIndex>
<name></name>
<selectedRelationships>unmatched</selectedRelationships>
<source>
<groupId>e223d59f-ac39-3ea4-0000-000000000000</groupId>
<id>dc2d6202-63f0-3875-0000-000000000000</id>
<type>PROCESSOR</type>
</source>
<zIndex>0</zIndex>
</connections>
<connections>
<id>e9b508f4-9807-399e-0000-000000000000</id>
<parentGroupId>e223d59f-ac39-3ea4-0000-000000000000</parentGroupId>
<backPressureDataSizeThreshold>1 GB</backPressureDataSizeThreshold>
<backPressureObjectThreshold>10000</backPressureObjectThreshold>
<destination>
<groupId>e223d59f-ac39-3ea4-0000-000000000000</groupId>
<id>dc2d6202-63f0-3875-0000-000000000000</id>
<type>PROCESSOR</type>
</destination>
<flowFileExpiration>0 sec</flowFileExpiration>
<labelIndex>1</labelIndex>
<name></name>
<selectedRelationships>success</selectedRelationships>
<source>
<groupId>e223d59f-ac39-3ea4-0000-000000000000</groupId>
<id>5fc55bfd-96ac-33a8-0000-000000000000</id>
<type>PROCESSOR</type>
</source>
<zIndex>0</zIndex>
</connections>
<processors>
<id>354cfd00-c682-35f6-0000-000000000000</id>
<parentGroupId>e223d59f-ac39-3ea4-0000-000000000000</parentGroupId>
<position>
<x>460.0</x>
<y>1.0</y>
</position>
<bundle>
<artifact>nifi-standard-nar</artifact>
<group>org.apache.nifi</group>
<version>1.8.0-SNAPSHOT</version>
</bundle>
<config>
<bulletinLevel>WARN</bulletinLevel>
<comments></comments>
<concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount>
<descriptors>
<entry>
<key>File Size</key>
<value>
<name>File Size</name>
</value>
</entry>
<entry>
<key>Batch Size</key>
<value>
<name>Batch Size</name>
</value>
</entry>
<entry>
<key>Data Format</key>
<value>
<name>Data Format</name>
</value>
</entry>
<entry>
<key>Unique FlowFiles</key>
<value>
<name>Unique FlowFiles</name>
</value>
</entry>
<entry>
<key>generate-ff-custom-text</key>
<value>
<name>generate-ff-custom-text</name>
</value>
</entry>
<entry>
<key>character-set</key>
<value>
<name>character-set</name>
</value>
</entry>
</descriptors>
<executionNode>ALL</executionNode>
<lossTolerant>false</lossTolerant>
<penaltyDuration>30 sec</penaltyDuration>
<properties>
<entry>
<key>File Size</key>
<value>0B</value>
</entry>
<entry>
<key>Batch Size</key>
<value>1</value>
</entry>
<entry>
<key>Data Format</key>
<value>Text</value>
</entry>
<entry>
<key>Unique FlowFiles</key>
<value>false</value>
</entry>
<entry>
<key>generate-ff-custom-text</key>
<value>{
"events": [
{
"cluster_id": "0717-035521-puny598",
"timestamp": 1531896847915,
"type": "EDITED",
"details": {
"previous_attributes": {
"cluster_name": "Kylo",
"spark_version": "4.1.x-scala2.11",
"spark_conf": {
"spark.databricks.acl.dfAclsEnabled":
"true",
"spark.databricks.repl.allowedLanguages"
: "python,sql"
},
"node_type_id": "Standard_DS3_v2",
"driver_node_type_id": "Standard_DS3_v2",
"autotermination_minutes": 10,
"enable_elastic_disk": true,
"cluster_source": "UI"
},
"attributes": {
"cluster_name": "Kylo",
"spark_version": "4.1.x-scala2.11",
"node_type_id": "Standard_DS3_v2",
"driver_node_type_id": "Standard_DS3_v2",
"autotermination_minutes": 10,
"enable_elastic_disk": true,
"cluster_source": "UI"
},
"previous_cluster_size": {
"autoscale": {
"min_workers": 1,
"max_workers": 8
}
},
"cluster_size": {
"autoscale": {
"min_workers": 1,
"max_workers": 8
}
},
"user": ""
}
},
{
"cluster_id": "0717-035521-puny598",
"timestamp": 1535540053785,
"type": "TERMINATING",
"details": {
"reason": {
"code": "INACTIVITY",
"parameters": {
"inactivity_duration_min": "15"
}
}
}
},
{
"cluster_id": "0717-035521-puny598",
"timestamp": 1535537117300,
"type": "EXPANDED_DISK",
"details": {
"previous_disk_size": 29454626816,
"disk_size": 136828809216,
"free_space": 17151311872,
"instance_id": "6cea5c332af94d7f85aff23e5d8cea37"
}
}
]
}</value>
</entry>
<entry>
<key>character-set</key>
<value>UTF-8</value>
</entry>
</properties>
<runDurationMillis>0</runDurationMillis>
<schedulingPeriod>10 sec</schedulingPeriod>
<schedulingStrategy>TIMER_DRIVEN</schedulingStrategy>
<yieldDuration>1 sec</yieldDuration>
</config>
<executionNodeRestricted>false</executionNodeRestricted>
<name>Create JSON with split lines</name>
<relationships>
<autoTerminate>false</autoTerminate>
<name>success</name>
</relationships>
<state>STOPPED</state>
<style/>
<type>org.apache.nifi.processors.standard.GenerateFlowFile</type>
</processors>
<processors>
<id>5fc55bfd-96ac-33a8-0000-000000000000</id>
<parentGroupId>e223d59f-ac39-3ea4-0000-000000000000</parentGroupId>
<position>
<x>7.0</x>
<y>246.0</y>
</position>
<bundle>
<artifact>nifi-standard-nar</artifact>
<group>org.apache.nifi</group>
<version>1.8.0-SNAPSHOT</version>
</bundle>
<config>
<bulletinLevel>WARN</bulletinLevel>
<comments></comments>
<concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount>
<descriptors>
<entry>
<key>Regular Expression</key>
<value>
<name>Regular Expression</name>
</value>
</entry>
<entry>
<key>Replacement Value</key>
<value>
<name>Replacement Value</name>
</value>
</entry>
<entry>
<key>Character Set</key>
<value>
<name>Character Set</name>
</value>
</entry>
<entry>
<key>Maximum Buffer Size</key>
<value>
<name>Maximum Buffer Size</name>
</value>
</entry>
<entry>
<key>Replacement Strategy</key>
<value>
<name>Replacement Strategy</name>
</value>
</entry>
<entry>
<key>Evaluation Mode</key>
<value>
<name>Evaluation Mode</name>
</value>
</entry>
</descriptors>
<executionNode>ALL</executionNode>
<lossTolerant>false</lossTolerant>
<penaltyDuration>30 sec</penaltyDuration>
<properties>
<entry>
<key>Regular Expression</key>
<value>(?&lt;=")(.*?)\.(.*?)(?="\s*:)</value>
</entry>
<entry>
<key>Replacement Value</key>
<value>$1_$2</value>
</entry>
<entry>
<key>Character Set</key>
<value>UTF-8</value>
</entry>
<entry>
<key>Maximum Buffer Size</key>
<value>1 MB</value>
</entry>
<entry>
<key>Replacement Strategy</key>
<value>Regex Replace</value>
</entry>
<entry>
<key>Evaluation Mode</key>
<value>Entire text</value>
</entry>
</properties>
<runDurationMillis>0</runDurationMillis>
<schedulingPeriod>0 sec</schedulingPeriod>
<schedulingStrategy>TIMER_DRIVEN</schedulingStrategy>
<yieldDuration>1 sec</yieldDuration>
</config>
<executionNodeRestricted>false</executionNodeRestricted>
<name>ReplaceText</name>
<relationships>
<autoTerminate>true</autoTerminate>
<name>failure</name>
</relationships>
<relationships>
<autoTerminate>false</autoTerminate>
<name>success</name>
</relationships>
<state>STOPPED</state>
<style/>
<type>org.apache.nifi.processors.standard.ReplaceText</type>
</processors>
<processors>
<id>66a41999-4351-33f7-0000-000000000000</id>
<parentGroupId>e223d59f-ac39-3ea4-0000-000000000000</parentGroupId>
<position>
<x>7.0</x>
<y>0.0</y>
</position>
<bundle>
<artifact>nifi-standard-nar</artifact>
<group>org.apache.nifi</group>
<version>1.8.0-SNAPSHOT</version>
</bundle>
<config>
<bulletinLevel>WARN</bulletinLevel>
<comments></comments>
<concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount>
<descriptors>
<entry>
<key>File Size</key>
<value>
<name>File Size</name>
</value>
</entry>
<entry>
<key>Batch Size</key>
<value>
<name>Batch Size</name>
</value>
</entry>
<entry>
<key>Data Format</key>
<value>
<name>Data Format</name>
</value>
</entry>
<entry>
<key>Unique FlowFiles</key>
<value>
<name>Unique FlowFiles</name>
</value>
</entry>
<entry>
<key>generate-ff-custom-text</key>
<value>
<name>generate-ff-custom-text</name>
</value>
</entry>
<entry>
<key>character-set</key>
<value>
<name>character-set</name>
</value>
</entry>
</descriptors>
<executionNode>ALL</executionNode>
<lossTolerant>false</lossTolerant>
<penaltyDuration>30 sec</penaltyDuration>
<properties>
<entry>
<key>File Size</key>
<value>0B</value>
</entry>
<entry>
<key>Batch Size</key>
<value>1</value>
</entry>
<entry>
<key>Data Format</key>
<value>Text</value>
</entry>
<entry>
<key>Unique FlowFiles</key>
<value>false</value>
</entry>
<entry>
<key>generate-ff-custom-text</key>
<value>{
"events": [
{
"cluster_id": "0717-035521-puny598",
"timestamp": 1531896847915,
"type": "EDITED",
"details": {
"previous_attributes": {
"cluster_name": "Kylo",
"spark_version": "4.1.x-scala2.11",
"spark_conf": {
"spark.databricks.acl.dfAclsEnabled": "true",
"spark.databricks.repl.allowedLanguages": "python,sql"
},
"node_type_id": "Standard_DS3_v2",
"driver_node_type_id": "Standard_DS3_v2",
"autotermination_minutes": 10,
"enable_elastic_disk": true,
"cluster_source": "UI"
},
"attributes": {
"cluster_name": "Kylo",
"spark_version": "4.1.x-scala2.11",
"node_type_id": "Standard_DS3_v2",
"driver_node_type_id": "Standard_DS3_v2",
"autotermination_minutes": 10,
"enable_elastic_disk": true,
"cluster_source": "UI"
},
"previous_cluster_size": {
"autoscale": {
"min_workers": 1,
"max_workers": 8
}
},
"cluster_size": {
"autoscale": {
"min_workers": 1,
"max_workers": 8
}
},
"user": ""
}
},
{
"cluster_id": "0717-035521-puny598",
"timestamp": 1535540053785,
"type": "TERMINATING",
"details": {
"reason": {
"code": "INACTIVITY",
"parameters": {
"inactivity_duration_min": "15"
}
}
}
},
{
"cluster_id": "0717-035521-puny598",
"timestamp": 1535537117300,
"type": "EXPANDED_DISK",
"details": {
"previous_disk_size": 29454626816,
"disk_size": 136828809216,
"free_space": 17151311872,
"instance_id": "6cea5c332af94d7f85aff23e5d8cea37"
}
}
]
}</value>
</entry>
<entry>
<key>character-set</key>
<value>UTF-8</value>
</entry>
</properties>
<runDurationMillis>0</runDurationMillis>
<schedulingPeriod>10 sec</schedulingPeriod>
<schedulingStrategy>TIMER_DRIVEN</schedulingStrategy>
<yieldDuration>1 sec</yieldDuration>
</config>
<executionNodeRestricted>false</executionNodeRestricted>
<name>Create original JSON</name>
<relationships>
<autoTerminate>false</autoTerminate>
<name>success</name>
</relationships>
<state>STOPPED</state>
<style/>
<type>org.apache.nifi.processors.standard.GenerateFlowFile</type>
</processors>
<processors>
<id>7f34ff95-61eb-375f-0000-000000000000</id>
<parentGroupId>e223d59f-ac39-3ea4-0000-000000000000</parentGroupId>
<position>
<x>0.0</x>
<y>671.0</y>
</position>
<bundle>
<artifact>nifi-standard-nar</artifact>
<group>org.apache.nifi</group>
<version>1.8.0-SNAPSHOT</version>
</bundle>
<config>
<bulletinLevel>WARN</bulletinLevel>
<comments></comments>
<concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount>
<descriptors>
<entry>
<key>Log Level</key>
<value>
<name>Log Level</name>
</value>
</entry>
<entry>
<key>Log Payload</key>
<value>
<name>Log Payload</name>
</value>
</entry>
<entry>
<key>Attributes to Log</key>
<value>
<name>Attributes to Log</name>
</value>
</entry>
<entry>
<key>attributes-to-log-regex</key>
<value>
<name>attributes-to-log-regex</name>
</value>
</entry>
<entry>
<key>Attributes to Ignore</key>
<value>
<name>Attributes to Ignore</name>
</value>
</entry>
<entry>
<key>attributes-to-ignore-regex</key>
<value>
<name>attributes-to-ignore-regex</name>
</value>
</entry>
<entry>
<key>Log prefix</key>
<value>
<name>Log prefix</name>
</value>
</entry>
<entry>
<key>character-set</key>
<value>
<name>character-set</name>
</value>
</entry>
</descriptors>
<executionNode>ALL</executionNode>
<lossTolerant>false</lossTolerant>
<penaltyDuration>30 sec</penaltyDuration>
<properties>
<entry>
<key>Log Level</key>
<value>info</value>
</entry>
<entry>
<key>Log Payload</key>
<value>true</value>
</entry>
<entry>
<key>Attributes to Log</key>
</entry>
<entry>
<key>attributes-to-log-regex</key>
<value>.*</value>
</entry>
<entry>
<key>Attributes to Ignore</key>
</entry>
<entry>
<key>attributes-to-ignore-regex</key>
</entry>
<entry>
<key>Log prefix</key>
</entry>
<entry>
<key>character-set</key>
<value>UTF-8</value>
</entry>
</properties>
<runDurationMillis>0</runDurationMillis>
<schedulingPeriod>0 sec</schedulingPeriod>
<schedulingStrategy>TIMER_DRIVEN</schedulingStrategy>
<yieldDuration>1 sec</yieldDuration>
</config>
<executionNodeRestricted>false</executionNodeRestricted>
<name>LogAttribute</name>
<relationships>
<autoTerminate>true</autoTerminate>
<name>success</name>
</relationships>
<state>STOPPED</state>
<style/>
<type>org.apache.nifi.processors.standard.LogAttribute</type>
</processors>
<processors>
<id>dc2d6202-63f0-3875-0000-000000000000</id>
<parentGroupId>e223d59f-ac39-3ea4-0000-000000000000</parentGroupId>
<position>
<x>11.0</x>
<y>472.0</y>
</position>
<bundle>
<artifact>nifi-standard-nar</artifact>
<group>org.apache.nifi</group>
<version>1.8.0-SNAPSHOT</version>
</bundle>
<config>
<bulletinLevel>WARN</bulletinLevel>
<comments></comments>
<concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount>
<descriptors>
<entry>
<key>Match Requirement</key>
<value>
<name>Match Requirement</name>
</value>
</entry>
<entry>
<key>Character Set</key>
<value>
<name>Character Set</name>
</value>
</entry>
<entry>
<key>Content Buffer Size</key>
<value>
<name>Content Buffer Size</name>
</value>
</entry>
<entry>
<key>key_with_dot</key>
<value>
<name>key_with_dot</name>
</value>
</entry>
</descriptors>
<executionNode>ALL</executionNode>
<lossTolerant>false</lossTolerant>
<penaltyDuration>30 sec</penaltyDuration>
<properties>
<entry>
<key>Match Requirement</key>
<value>content must contain match</value>
</entry>
<entry>
<key>Character Set</key>
<value>UTF-8</value>
</entry>
<entry>
<key>Content Buffer Size</key>
<value>1 MB</value>
</entry>
<entry>
<key>key_with_dot</key>
<value>(?&lt;=")(.*?)\.(.*?)(?="\s*:)</value>
</entry>
</properties>
<runDurationMillis>0</runDurationMillis>
<schedulingPeriod>0 sec</schedulingPeriod>
<schedulingStrategy>TIMER_DRIVEN</schedulingStrategy>
<yieldDuration>1 sec</yieldDuration>
</config>
<executionNodeRestricted>false</executionNodeRestricted>
<name>RouteOnContent</name>
<relationships>
<autoTerminate>false</autoTerminate>
<name>key_with_dot</name>
</relationships>
<relationships>
<autoTerminate>false</autoTerminate>
<name>unmatched</name>
</relationships>
<state>STOPPED</state>
<style/>
<type>org.apache.nifi.processors.standard.RouteOnContent</type>
</processors>
</snippet>
<timestamp>09/03/2018 13:49:11 PDT</timestamp>
</template>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment