Skip to content

Instantly share code, notes, and snippets.

@YolandaMDavis
Created June 2, 2017 14:15
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save YolandaMDavis/607c6511b6899510158fe34fcbf92057 to your computer and use it in GitHub Desktop.
Save YolandaMDavis/607c6511b6899510158fe34fcbf92057 to your computer and use it in GitHub Desktop.
<?xml version="1.0" ?>
<template encoding-version="1.1">
<description>Example of using Expression Language with Jolt. Twitter data is obtained
and transformed then stored in a file as well as a Mongo repo.</description>
<groupId>142ee790-015c-1000-9a0d-a31aac7e0eb3</groupId>
<name>TwitterJoltEl_PutFile_Mongo</name>
<snippet>
<connections>
<id>dc4efe01-905c-39bb-0000-000000000000</id>
<parentGroupId>e5a17068-5da9-3260-0000-000000000000</parentGroupId>
<backPressureDataSizeThreshold>1 GB</backPressureDataSizeThreshold>
<backPressureObjectThreshold>10000</backPressureObjectThreshold>
<destination>
<groupId>e5a17068-5da9-3260-0000-000000000000</groupId>
<id>5e74cd11-b091-337a-0000-000000000000</id>
<type>PROCESSOR</type>
</destination>
<flowFileExpiration>0 sec</flowFileExpiration>
<labelIndex>1</labelIndex>
<name></name>
<selectedRelationships>success</selectedRelationships>
<source>
<groupId>e5a17068-5da9-3260-0000-000000000000</groupId>
<id>18dc55cb-cb4b-3aa5-0000-000000000000</id>
<type>PROCESSOR</type>
</source>
<zIndex>0</zIndex>
</connections>
<connections>
<id>fa04f5b8-c1b6-3d0e-0000-000000000000</id>
<parentGroupId>e5a17068-5da9-3260-0000-000000000000</parentGroupId>
<backPressureDataSizeThreshold>1 GB</backPressureDataSizeThreshold>
<backPressureObjectThreshold>10000</backPressureObjectThreshold>
<destination>
<groupId>e5a17068-5da9-3260-0000-000000000000</groupId>
<id>18dc55cb-cb4b-3aa5-0000-000000000000</id>
<type>PROCESSOR</type>
</destination>
<flowFileExpiration>0 sec</flowFileExpiration>
<labelIndex>1</labelIndex>
<name></name>
<selectedRelationships>success</selectedRelationships>
<source>
<groupId>e5a17068-5da9-3260-0000-000000000000</groupId>
<id>95e97293-a576-3592-0000-000000000000</id>
<type>PROCESSOR</type>
</source>
<zIndex>0</zIndex>
</connections>
<connections>
<id>07c988da-6734-3e80-0000-000000000000</id>
<parentGroupId>e5a17068-5da9-3260-0000-000000000000</parentGroupId>
<backPressureDataSizeThreshold>1 GB</backPressureDataSizeThreshold>
<backPressureObjectThreshold>10000</backPressureObjectThreshold>
<destination>
<groupId>e5a17068-5da9-3260-0000-000000000000</groupId>
<id>56381546-0b2e-3924-0000-000000000000</id>
<type>PROCESSOR</type>
</destination>
<flowFileExpiration>0 sec</flowFileExpiration>
<labelIndex>1</labelIndex>
<name></name>
<selectedRelationships>success</selectedRelationships>
<source>
<groupId>e5a17068-5da9-3260-0000-000000000000</groupId>
<id>5e74cd11-b091-337a-0000-000000000000</id>
<type>PROCESSOR</type>
</source>
<zIndex>0</zIndex>
</connections>
<connections>
<id>2c8b71b3-9ad0-358e-0000-000000000000</id>
<parentGroupId>e5a17068-5da9-3260-0000-000000000000</parentGroupId>
<backPressureDataSizeThreshold>1 GB</backPressureDataSizeThreshold>
<backPressureObjectThreshold>10000</backPressureObjectThreshold>
<destination>
<groupId>e5a17068-5da9-3260-0000-000000000000</groupId>
<id>f7e5f138-c92c-37ab-0000-000000000000</id>
<type>PROCESSOR</type>
</destination>
<flowFileExpiration>0 sec</flowFileExpiration>
<labelIndex>1</labelIndex>
<name></name>
<selectedRelationships>success</selectedRelationships>
<source>
<groupId>e5a17068-5da9-3260-0000-000000000000</groupId>
<id>5e74cd11-b091-337a-0000-000000000000</id>
<type>PROCESSOR</type>
</source>
<zIndex>0</zIndex>
</connections>
<processors>
<id>95e97293-a576-3592-0000-000000000000</id>
<parentGroupId>e5a17068-5da9-3260-0000-000000000000</parentGroupId>
<position>
<x>0.0</x>
<y>0.0</y>
</position>
<bundle>
<artifact>nifi-social-media-nar</artifact>
<group>org.apache.nifi</group>
<version>1.3.0-SNAPSHOT</version>
</bundle>
<config>
<bulletinLevel>WARN</bulletinLevel>
<comments></comments>
<concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount>
<descriptors>
<entry>
<key>Twitter Endpoint</key>
<value>
<name>Twitter Endpoint</name>
</value>
</entry>
<entry>
<key>Consumer Key</key>
<value>
<name>Consumer Key</name>
</value>
</entry>
<entry>
<key>Consumer Secret</key>
<value>
<name>Consumer Secret</name>
</value>
</entry>
<entry>
<key>Access Token</key>
<value>
<name>Access Token</name>
</value>
</entry>
<entry>
<key>Access Token Secret</key>
<value>
<name>Access Token Secret</name>
</value>
</entry>
<entry>
<key>Languages</key>
<value>
<name>Languages</name>
</value>
</entry>
<entry>
<key>Terms to Filter On</key>
<value>
<name>Terms to Filter On</name>
</value>
</entry>
<entry>
<key>IDs to Follow</key>
<value>
<name>IDs to Follow</name>
</value>
</entry>
<entry>
<key>Locations to Filter On</key>
<value>
<name>Locations to Filter On</name>
</value>
</entry>
</descriptors>
<executionNode>ALL</executionNode>
<lossTolerant>false</lossTolerant>
<penaltyDuration>30 sec</penaltyDuration>
<properties>
<entry>
<key>Twitter Endpoint</key>
<value>Filter Endpoint</value>
</entry>
<entry>
<key>Consumer Key</key>
<value>PWGWQqIIlnMw1mVtlLqKap2tc</value>
</entry>
<entry>
<key>Consumer Secret</key>
</entry>
<entry>
<key>Access Token</key>
<value>548586960-SKaZmb67v7R33Nh6sz05BqaYVilRvXpBFvYUDOu5</value>
</entry>
<entry>
<key>Access Token Secret</key>
</entry>
<entry>
<key>Languages</key>
</entry>
<entry>
<key>Terms to Filter On</key>
<value>Baltimore</value>
</entry>
<entry>
<key>IDs to Follow</key>
</entry>
<entry>
<key>Locations to Filter On</key>
</entry>
</properties>
<runDurationMillis>0</runDurationMillis>
<schedulingPeriod>5 sec</schedulingPeriod>
<schedulingStrategy>TIMER_DRIVEN</schedulingStrategy>
<yieldDuration>1 sec</yieldDuration>
</config>
<name>GetTwitter</name>
<relationships>
<autoTerminate>false</autoTerminate>
<name>success</name>
</relationships>
<style></style>
<type>org.apache.nifi.processors.twitter.GetTwitter</type>
</processors>
<processors>
<id>f7e5f138-c92c-37ab-0000-000000000000</id>
<parentGroupId>e5a17068-5da9-3260-0000-000000000000</parentGroupId>
<position>
<x>734.6169738769531</x>
<y>444.4746144064619</y>
</position>
<bundle>
<artifact>nifi-standard-nar</artifact>
<group>org.apache.nifi</group>
<version>1.3.0-SNAPSHOT</version>
</bundle>
<config>
<bulletinLevel>WARN</bulletinLevel>
<comments></comments>
<concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount>
<descriptors>
<entry>
<key>Directory</key>
<value>
<name>Directory</name>
</value>
</entry>
<entry>
<key>Conflict Resolution Strategy</key>
<value>
<name>Conflict Resolution Strategy</name>
</value>
</entry>
<entry>
<key>Create Missing Directories</key>
<value>
<name>Create Missing Directories</name>
</value>
</entry>
<entry>
<key>Maximum File Count</key>
<value>
<name>Maximum File Count</name>
</value>
</entry>
<entry>
<key>Last Modified Time</key>
<value>
<name>Last Modified Time</name>
</value>
</entry>
<entry>
<key>Permissions</key>
<value>
<name>Permissions</name>
</value>
</entry>
<entry>
<key>Owner</key>
<value>
<name>Owner</name>
</value>
</entry>
<entry>
<key>Group</key>
<value>
<name>Group</name>
</value>
</entry>
</descriptors>
<executionNode>ALL</executionNode>
<lossTolerant>false</lossTolerant>
<penaltyDuration>30 sec</penaltyDuration>
<properties>
<entry>
<key>Directory</key>
<value>/Users/ydavis/dev/tools/twitter-stream/transformed</value>
</entry>
<entry>
<key>Conflict Resolution Strategy</key>
<value>fail</value>
</entry>
<entry>
<key>Create Missing Directories</key>
<value>true</value>
</entry>
<entry>
<key>Maximum File Count</key>
</entry>
<entry>
<key>Last Modified Time</key>
</entry>
<entry>
<key>Permissions</key>
</entry>
<entry>
<key>Owner</key>
</entry>
<entry>
<key>Group</key>
</entry>
</properties>
<runDurationMillis>0</runDurationMillis>
<schedulingPeriod>0 sec</schedulingPeriod>
<schedulingStrategy>TIMER_DRIVEN</schedulingStrategy>
<yieldDuration>1 sec</yieldDuration>
</config>
<name>PutFile</name>
<relationships>
<autoTerminate>true</autoTerminate>
<name>failure</name>
</relationships>
<relationships>
<autoTerminate>true</autoTerminate>
<name>success</name>
</relationships>
<style></style>
<type>org.apache.nifi.processors.standard.PutFile</type>
</processors>
<processors>
<id>18dc55cb-cb4b-3aa5-0000-000000000000</id>
<parentGroupId>e5a17068-5da9-3260-0000-000000000000</parentGroupId>
<position>
<x>179.8592529296875</x>
<y>448.0442156791687</y>
</position>
<bundle>
<artifact>nifi-update-attribute-nar</artifact>
<group>org.apache.nifi</group>
<version>1.3.0-SNAPSHOT</version>
</bundle>
<config>
<bulletinLevel>WARN</bulletinLevel>
<comments></comments>
<concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount>
<descriptors>
<entry>
<key>Delete Attributes Expression</key>
<value>
<name>Delete Attributes Expression</name>
</value>
</entry>
<entry>
<key>Store State</key>
<value>
<name>Store State</name>
</value>
</entry>
<entry>
<key>Stateful Variables Initial Value</key>
<value>
<name>Stateful Variables Initial Value</name>
</value>
</entry>
<entry>
<key>id.var</key>
<value>
<name>id.var</name>
</value>
</entry>
</descriptors>
<executionNode>ALL</executionNode>
<lossTolerant>false</lossTolerant>
<penaltyDuration>30 sec</penaltyDuration>
<properties>
<entry>
<key>Delete Attributes Expression</key>
</entry>
<entry>
<key>Store State</key>
<value>Do not store state</value>
</entry>
<entry>
<key>Stateful Variables Initial Value</key>
</entry>
<entry>
<key>id.var</key>
<value>id</value>
</entry>
</properties>
<runDurationMillis>0</runDurationMillis>
<schedulingPeriod>0 sec</schedulingPeriod>
<schedulingStrategy>TIMER_DRIVEN</schedulingStrategy>
<yieldDuration>1 sec</yieldDuration>
</config>
<name>UpdateAttribute</name>
<relationships>
<autoTerminate>false</autoTerminate>
<name>success</name>
</relationships>
<style></style>
<type>org.apache.nifi.processors.attributes.UpdateAttribute</type>
</processors>
<processors>
<id>56381546-0b2e-3924-0000-000000000000</id>
<parentGroupId>e5a17068-5da9-3260-0000-000000000000</parentGroupId>
<position>
<x>1390.8270568847656</x>
<y>6.534800052642794</y>
</position>
<bundle>
<artifact>nifi-mongodb-nar</artifact>
<group>org.apache.nifi</group>
<version>1.3.0-SNAPSHOT</version>
</bundle>
<config>
<bulletinLevel>WARN</bulletinLevel>
<comments></comments>
<concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount>
<descriptors>
<entry>
<key>Mongo URI</key>
<value>
<name>Mongo URI</name>
</value>
</entry>
<entry>
<key>Mongo Database Name</key>
<value>
<name>Mongo Database Name</name>
</value>
</entry>
<entry>
<key>Mongo Collection Name</key>
<value>
<name>Mongo Collection Name</name>
</value>
</entry>
<entry>
<key>ssl-context-service</key>
<value>
<identifiesControllerService>org.apache.nifi.ssl.SSLContextService</identifiesControllerService>
<name>ssl-context-service</name>
</value>
</entry>
<entry>
<key>ssl-client-auth</key>
<value>
<name>ssl-client-auth</name>
</value>
</entry>
<entry>
<key>Mode</key>
<value>
<name>Mode</name>
</value>
</entry>
<entry>
<key>Upsert</key>
<value>
<name>Upsert</name>
</value>
</entry>
<entry>
<key>Update Query Key</key>
<value>
<name>Update Query Key</name>
</value>
</entry>
<entry>
<key>Write Concern</key>
<value>
<name>Write Concern</name>
</value>
</entry>
<entry>
<key>Character Set</key>
<value>
<name>Character Set</name>
</value>
</entry>
</descriptors>
<executionNode>ALL</executionNode>
<lossTolerant>false</lossTolerant>
<penaltyDuration>30 sec</penaltyDuration>
<properties>
<entry>
<key>Mongo URI</key>
<value>mongodb://localhost:27017</value>
</entry>
<entry>
<key>Mongo Database Name</key>
<value>twitter_dump</value>
</entry>
<entry>
<key>Mongo Collection Name</key>
<value>subset-tweets</value>
</entry>
<entry>
<key>ssl-context-service</key>
</entry>
<entry>
<key>ssl-client-auth</key>
<value>REQUIRED</value>
</entry>
<entry>
<key>Mode</key>
<value>insert</value>
</entry>
<entry>
<key>Upsert</key>
<value>false</value>
</entry>
<entry>
<key>Update Query Key</key>
<value>_id</value>
</entry>
<entry>
<key>Write Concern</key>
<value>ACKNOWLEDGED</value>
</entry>
<entry>
<key>Character Set</key>
<value>UTF-8</value>
</entry>
</properties>
<runDurationMillis>0</runDurationMillis>
<schedulingPeriod>0 sec</schedulingPeriod>
<schedulingStrategy>TIMER_DRIVEN</schedulingStrategy>
<yieldDuration>1 sec</yieldDuration>
</config>
<name>PutMongo</name>
<relationships>
<autoTerminate>true</autoTerminate>
<name>failure</name>
</relationships>
<relationships>
<autoTerminate>true</autoTerminate>
<name>success</name>
</relationships>
<style></style>
<type>org.apache.nifi.processors.mongodb.PutMongo</type>
</processors>
<processors>
<id>5e74cd11-b091-337a-0000-000000000000</id>
<parentGroupId>e5a17068-5da9-3260-0000-000000000000</parentGroupId>
<position>
<x>736.8592529296875</x>
<y>13.544215679168673</y>
</position>
<bundle>
<artifact>nifi-standard-nar</artifact>
<group>org.apache.nifi</group>
<version>1.3.0-SNAPSHOT</version>
</bundle>
<config>
<bulletinLevel>WARN</bulletinLevel>
<comments></comments>
<concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount>
<descriptors>
<entry>
<key>jolt-transform</key>
<value>
<name>jolt-transform</name>
</value>
</entry>
<entry>
<key>jolt-custom-class</key>
<value>
<name>jolt-custom-class</name>
</value>
</entry>
<entry>
<key>jolt-custom-modules</key>
<value>
<name>jolt-custom-modules</name>
</value>
</entry>
<entry>
<key>jolt-spec</key>
<value>
<name>jolt-spec</name>
</value>
</entry>
<entry>
<key>Transform Cache Size</key>
<value>
<name>Transform Cache Size</name>
</value>
</entry>
</descriptors>
<executionNode>ALL</executionNode>
<lossTolerant>false</lossTolerant>
<penaltyDuration>30 sec</penaltyDuration>
<properties>
<entry>
<key>jolt-transform</key>
<value>jolt-transform-chain</value>
</entry>
<entry>
<key>jolt-custom-class</key>
</entry>
<entry>
<key>jolt-custom-modules</key>
</entry>
<entry>
<key>jolt-spec</key>
<value>[{
"operation": "shift",
"spec": {
"${id.var}": "tweet_id",
"text": "tweet_text",
"in_reply_to_*": "&amp;"
}
},{
"operation": "modify-overwrite-beta",
"spec": {
"tweet_text": "=toLower"
}
},{
"operation": "modify-default-beta",
"spec": {
"~in_reply_to_status_id": 0,
"~in_reply_to_status_id_str": "",
"~in_reply_to_user_id": "",
"~in_reply_to_user_id_str": 0,
"~in_reply_to_screen_name": ""
}
},{
"operation": "default",
"spec":{
"flow_file_id" : "${uuid}"
}
}]</value>
</entry>
<entry>
<key>Transform Cache Size</key>
<value>10000</value>
</entry>
</properties>
<runDurationMillis>0</runDurationMillis>
<schedulingPeriod>0 sec</schedulingPeriod>
<schedulingStrategy>TIMER_DRIVEN</schedulingStrategy>
<yieldDuration>1 sec</yieldDuration>
</config>
<name>JoltTransformJSON</name>
<relationships>
<autoTerminate>true</autoTerminate>
<name>failure</name>
</relationships>
<relationships>
<autoTerminate>false</autoTerminate>
<name>success</name>
</relationships>
<style></style>
<type>org.apache.nifi.processors.standard.JoltTransformJSON</type>
</processors>
</snippet>
<timestamp>06/02/2017 10:09:29 EDT</timestamp>
</template>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment