Created
June 2, 2017 14:15
-
-
Save YolandaMDavis/607c6511b6899510158fe34fcbf92057 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?xml version="1.0" ?> | |
<template encoding-version="1.1"> | |
<description>Example of using Expression Language with Jolt. Twitter data is obtained | |
and transformed then stored in a file as well as a Mongo repo.</description> | |
<groupId>142ee790-015c-1000-9a0d-a31aac7e0eb3</groupId> | |
<name>TwitterJoltEl_PutFile_Mongo</name> | |
<snippet> | |
<connections> | |
<id>dc4efe01-905c-39bb-0000-000000000000</id> | |
<parentGroupId>e5a17068-5da9-3260-0000-000000000000</parentGroupId> | |
<backPressureDataSizeThreshold>1 GB</backPressureDataSizeThreshold> | |
<backPressureObjectThreshold>10000</backPressureObjectThreshold> | |
<destination> | |
<groupId>e5a17068-5da9-3260-0000-000000000000</groupId> | |
<id>5e74cd11-b091-337a-0000-000000000000</id> | |
<type>PROCESSOR</type> | |
</destination> | |
<flowFileExpiration>0 sec</flowFileExpiration> | |
<labelIndex>1</labelIndex> | |
<name></name> | |
<selectedRelationships>success</selectedRelationships> | |
<source> | |
<groupId>e5a17068-5da9-3260-0000-000000000000</groupId> | |
<id>18dc55cb-cb4b-3aa5-0000-000000000000</id> | |
<type>PROCESSOR</type> | |
</source> | |
<zIndex>0</zIndex> | |
</connections> | |
<connections> | |
<id>fa04f5b8-c1b6-3d0e-0000-000000000000</id> | |
<parentGroupId>e5a17068-5da9-3260-0000-000000000000</parentGroupId> | |
<backPressureDataSizeThreshold>1 GB</backPressureDataSizeThreshold> | |
<backPressureObjectThreshold>10000</backPressureObjectThreshold> | |
<destination> | |
<groupId>e5a17068-5da9-3260-0000-000000000000</groupId> | |
<id>18dc55cb-cb4b-3aa5-0000-000000000000</id> | |
<type>PROCESSOR</type> | |
</destination> | |
<flowFileExpiration>0 sec</flowFileExpiration> | |
<labelIndex>1</labelIndex> | |
<name></name> | |
<selectedRelationships>success</selectedRelationships> | |
<source> | |
<groupId>e5a17068-5da9-3260-0000-000000000000</groupId> | |
<id>95e97293-a576-3592-0000-000000000000</id> | |
<type>PROCESSOR</type> | |
</source> | |
<zIndex>0</zIndex> | |
</connections> | |
<connections> | |
<id>07c988da-6734-3e80-0000-000000000000</id> | |
<parentGroupId>e5a17068-5da9-3260-0000-000000000000</parentGroupId> | |
<backPressureDataSizeThreshold>1 GB</backPressureDataSizeThreshold> | |
<backPressureObjectThreshold>10000</backPressureObjectThreshold> | |
<destination> | |
<groupId>e5a17068-5da9-3260-0000-000000000000</groupId> | |
<id>56381546-0b2e-3924-0000-000000000000</id> | |
<type>PROCESSOR</type> | |
</destination> | |
<flowFileExpiration>0 sec</flowFileExpiration> | |
<labelIndex>1</labelIndex> | |
<name></name> | |
<selectedRelationships>success</selectedRelationships> | |
<source> | |
<groupId>e5a17068-5da9-3260-0000-000000000000</groupId> | |
<id>5e74cd11-b091-337a-0000-000000000000</id> | |
<type>PROCESSOR</type> | |
</source> | |
<zIndex>0</zIndex> | |
</connections> | |
<connections> | |
<id>2c8b71b3-9ad0-358e-0000-000000000000</id> | |
<parentGroupId>e5a17068-5da9-3260-0000-000000000000</parentGroupId> | |
<backPressureDataSizeThreshold>1 GB</backPressureDataSizeThreshold> | |
<backPressureObjectThreshold>10000</backPressureObjectThreshold> | |
<destination> | |
<groupId>e5a17068-5da9-3260-0000-000000000000</groupId> | |
<id>f7e5f138-c92c-37ab-0000-000000000000</id> | |
<type>PROCESSOR</type> | |
</destination> | |
<flowFileExpiration>0 sec</flowFileExpiration> | |
<labelIndex>1</labelIndex> | |
<name></name> | |
<selectedRelationships>success</selectedRelationships> | |
<source> | |
<groupId>e5a17068-5da9-3260-0000-000000000000</groupId> | |
<id>5e74cd11-b091-337a-0000-000000000000</id> | |
<type>PROCESSOR</type> | |
</source> | |
<zIndex>0</zIndex> | |
</connections> | |
<processors> | |
<id>95e97293-a576-3592-0000-000000000000</id> | |
<parentGroupId>e5a17068-5da9-3260-0000-000000000000</parentGroupId> | |
<position> | |
<x>0.0</x> | |
<y>0.0</y> | |
</position> | |
<bundle> | |
<artifact>nifi-social-media-nar</artifact> | |
<group>org.apache.nifi</group> | |
<version>1.3.0-SNAPSHOT</version> | |
</bundle> | |
<config> | |
<bulletinLevel>WARN</bulletinLevel> | |
<comments></comments> | |
<concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount> | |
<descriptors> | |
<entry> | |
<key>Twitter Endpoint</key> | |
<value> | |
<name>Twitter Endpoint</name> | |
</value> | |
</entry> | |
<entry> | |
<key>Consumer Key</key> | |
<value> | |
<name>Consumer Key</name> | |
</value> | |
</entry> | |
<entry> | |
<key>Consumer Secret</key> | |
<value> | |
<name>Consumer Secret</name> | |
</value> | |
</entry> | |
<entry> | |
<key>Access Token</key> | |
<value> | |
<name>Access Token</name> | |
</value> | |
</entry> | |
<entry> | |
<key>Access Token Secret</key> | |
<value> | |
<name>Access Token Secret</name> | |
</value> | |
</entry> | |
<entry> | |
<key>Languages</key> | |
<value> | |
<name>Languages</name> | |
</value> | |
</entry> | |
<entry> | |
<key>Terms to Filter On</key> | |
<value> | |
<name>Terms to Filter On</name> | |
</value> | |
</entry> | |
<entry> | |
<key>IDs to Follow</key> | |
<value> | |
<name>IDs to Follow</name> | |
</value> | |
</entry> | |
<entry> | |
<key>Locations to Filter On</key> | |
<value> | |
<name>Locations to Filter On</name> | |
</value> | |
</entry> | |
</descriptors> | |
<executionNode>ALL</executionNode> | |
<lossTolerant>false</lossTolerant> | |
<penaltyDuration>30 sec</penaltyDuration> | |
<properties> | |
<entry> | |
<key>Twitter Endpoint</key> | |
<value>Filter Endpoint</value> | |
</entry> | |
<entry> | |
<key>Consumer Key</key> | |
<value>PWGWQqIIlnMw1mVtlLqKap2tc</value> | |
</entry> | |
<entry> | |
<key>Consumer Secret</key> | |
</entry> | |
<entry> | |
<key>Access Token</key> | |
<value>548586960-SKaZmb67v7R33Nh6sz05BqaYVilRvXpBFvYUDOu5</value> | |
</entry> | |
<entry> | |
<key>Access Token Secret</key> | |
</entry> | |
<entry> | |
<key>Languages</key> | |
</entry> | |
<entry> | |
<key>Terms to Filter On</key> | |
<value>Baltimore</value> | |
</entry> | |
<entry> | |
<key>IDs to Follow</key> | |
</entry> | |
<entry> | |
<key>Locations to Filter On</key> | |
</entry> | |
</properties> | |
<runDurationMillis>0</runDurationMillis> | |
<schedulingPeriod>5 sec</schedulingPeriod> | |
<schedulingStrategy>TIMER_DRIVEN</schedulingStrategy> | |
<yieldDuration>1 sec</yieldDuration> | |
</config> | |
<name>GetTwitter</name> | |
<relationships> | |
<autoTerminate>false</autoTerminate> | |
<name>success</name> | |
</relationships> | |
<style></style> | |
<type>org.apache.nifi.processors.twitter.GetTwitter</type> | |
</processors> | |
<processors> | |
<id>f7e5f138-c92c-37ab-0000-000000000000</id> | |
<parentGroupId>e5a17068-5da9-3260-0000-000000000000</parentGroupId> | |
<position> | |
<x>734.6169738769531</x> | |
<y>444.4746144064619</y> | |
</position> | |
<bundle> | |
<artifact>nifi-standard-nar</artifact> | |
<group>org.apache.nifi</group> | |
<version>1.3.0-SNAPSHOT</version> | |
</bundle> | |
<config> | |
<bulletinLevel>WARN</bulletinLevel> | |
<comments></comments> | |
<concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount> | |
<descriptors> | |
<entry> | |
<key>Directory</key> | |
<value> | |
<name>Directory</name> | |
</value> | |
</entry> | |
<entry> | |
<key>Conflict Resolution Strategy</key> | |
<value> | |
<name>Conflict Resolution Strategy</name> | |
</value> | |
</entry> | |
<entry> | |
<key>Create Missing Directories</key> | |
<value> | |
<name>Create Missing Directories</name> | |
</value> | |
</entry> | |
<entry> | |
<key>Maximum File Count</key> | |
<value> | |
<name>Maximum File Count</name> | |
</value> | |
</entry> | |
<entry> | |
<key>Last Modified Time</key> | |
<value> | |
<name>Last Modified Time</name> | |
</value> | |
</entry> | |
<entry> | |
<key>Permissions</key> | |
<value> | |
<name>Permissions</name> | |
</value> | |
</entry> | |
<entry> | |
<key>Owner</key> | |
<value> | |
<name>Owner</name> | |
</value> | |
</entry> | |
<entry> | |
<key>Group</key> | |
<value> | |
<name>Group</name> | |
</value> | |
</entry> | |
</descriptors> | |
<executionNode>ALL</executionNode> | |
<lossTolerant>false</lossTolerant> | |
<penaltyDuration>30 sec</penaltyDuration> | |
<properties> | |
<entry> | |
<key>Directory</key> | |
<value>/Users/ydavis/dev/tools/twitter-stream/transformed</value> | |
</entry> | |
<entry> | |
<key>Conflict Resolution Strategy</key> | |
<value>fail</value> | |
</entry> | |
<entry> | |
<key>Create Missing Directories</key> | |
<value>true</value> | |
</entry> | |
<entry> | |
<key>Maximum File Count</key> | |
</entry> | |
<entry> | |
<key>Last Modified Time</key> | |
</entry> | |
<entry> | |
<key>Permissions</key> | |
</entry> | |
<entry> | |
<key>Owner</key> | |
</entry> | |
<entry> | |
<key>Group</key> | |
</entry> | |
</properties> | |
<runDurationMillis>0</runDurationMillis> | |
<schedulingPeriod>0 sec</schedulingPeriod> | |
<schedulingStrategy>TIMER_DRIVEN</schedulingStrategy> | |
<yieldDuration>1 sec</yieldDuration> | |
</config> | |
<name>PutFile</name> | |
<relationships> | |
<autoTerminate>true</autoTerminate> | |
<name>failure</name> | |
</relationships> | |
<relationships> | |
<autoTerminate>true</autoTerminate> | |
<name>success</name> | |
</relationships> | |
<style></style> | |
<type>org.apache.nifi.processors.standard.PutFile</type> | |
</processors> | |
<processors> | |
<id>18dc55cb-cb4b-3aa5-0000-000000000000</id> | |
<parentGroupId>e5a17068-5da9-3260-0000-000000000000</parentGroupId> | |
<position> | |
<x>179.8592529296875</x> | |
<y>448.0442156791687</y> | |
</position> | |
<bundle> | |
<artifact>nifi-update-attribute-nar</artifact> | |
<group>org.apache.nifi</group> | |
<version>1.3.0-SNAPSHOT</version> | |
</bundle> | |
<config> | |
<bulletinLevel>WARN</bulletinLevel> | |
<comments></comments> | |
<concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount> | |
<descriptors> | |
<entry> | |
<key>Delete Attributes Expression</key> | |
<value> | |
<name>Delete Attributes Expression</name> | |
</value> | |
</entry> | |
<entry> | |
<key>Store State</key> | |
<value> | |
<name>Store State</name> | |
</value> | |
</entry> | |
<entry> | |
<key>Stateful Variables Initial Value</key> | |
<value> | |
<name>Stateful Variables Initial Value</name> | |
</value> | |
</entry> | |
<entry> | |
<key>id.var</key> | |
<value> | |
<name>id.var</name> | |
</value> | |
</entry> | |
</descriptors> | |
<executionNode>ALL</executionNode> | |
<lossTolerant>false</lossTolerant> | |
<penaltyDuration>30 sec</penaltyDuration> | |
<properties> | |
<entry> | |
<key>Delete Attributes Expression</key> | |
</entry> | |
<entry> | |
<key>Store State</key> | |
<value>Do not store state</value> | |
</entry> | |
<entry> | |
<key>Stateful Variables Initial Value</key> | |
</entry> | |
<entry> | |
<key>id.var</key> | |
<value>id</value> | |
</entry> | |
</properties> | |
<runDurationMillis>0</runDurationMillis> | |
<schedulingPeriod>0 sec</schedulingPeriod> | |
<schedulingStrategy>TIMER_DRIVEN</schedulingStrategy> | |
<yieldDuration>1 sec</yieldDuration> | |
</config> | |
<name>UpdateAttribute</name> | |
<relationships> | |
<autoTerminate>false</autoTerminate> | |
<name>success</name> | |
</relationships> | |
<style></style> | |
<type>org.apache.nifi.processors.attributes.UpdateAttribute</type> | |
</processors> | |
<processors> | |
<id>56381546-0b2e-3924-0000-000000000000</id> | |
<parentGroupId>e5a17068-5da9-3260-0000-000000000000</parentGroupId> | |
<position> | |
<x>1390.8270568847656</x> | |
<y>6.534800052642794</y> | |
</position> | |
<bundle> | |
<artifact>nifi-mongodb-nar</artifact> | |
<group>org.apache.nifi</group> | |
<version>1.3.0-SNAPSHOT</version> | |
</bundle> | |
<config> | |
<bulletinLevel>WARN</bulletinLevel> | |
<comments></comments> | |
<concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount> | |
<descriptors> | |
<entry> | |
<key>Mongo URI</key> | |
<value> | |
<name>Mongo URI</name> | |
</value> | |
</entry> | |
<entry> | |
<key>Mongo Database Name</key> | |
<value> | |
<name>Mongo Database Name</name> | |
</value> | |
</entry> | |
<entry> | |
<key>Mongo Collection Name</key> | |
<value> | |
<name>Mongo Collection Name</name> | |
</value> | |
</entry> | |
<entry> | |
<key>ssl-context-service</key> | |
<value> | |
<identifiesControllerService>org.apache.nifi.ssl.SSLContextService</identifiesControllerService> | |
<name>ssl-context-service</name> | |
</value> | |
</entry> | |
<entry> | |
<key>ssl-client-auth</key> | |
<value> | |
<name>ssl-client-auth</name> | |
</value> | |
</entry> | |
<entry> | |
<key>Mode</key> | |
<value> | |
<name>Mode</name> | |
</value> | |
</entry> | |
<entry> | |
<key>Upsert</key> | |
<value> | |
<name>Upsert</name> | |
</value> | |
</entry> | |
<entry> | |
<key>Update Query Key</key> | |
<value> | |
<name>Update Query Key</name> | |
</value> | |
</entry> | |
<entry> | |
<key>Write Concern</key> | |
<value> | |
<name>Write Concern</name> | |
</value> | |
</entry> | |
<entry> | |
<key>Character Set</key> | |
<value> | |
<name>Character Set</name> | |
</value> | |
</entry> | |
</descriptors> | |
<executionNode>ALL</executionNode> | |
<lossTolerant>false</lossTolerant> | |
<penaltyDuration>30 sec</penaltyDuration> | |
<properties> | |
<entry> | |
<key>Mongo URI</key> | |
<value>mongodb://localhost:27017</value> | |
</entry> | |
<entry> | |
<key>Mongo Database Name</key> | |
<value>twitter_dump</value> | |
</entry> | |
<entry> | |
<key>Mongo Collection Name</key> | |
<value>subset-tweets</value> | |
</entry> | |
<entry> | |
<key>ssl-context-service</key> | |
</entry> | |
<entry> | |
<key>ssl-client-auth</key> | |
<value>REQUIRED</value> | |
</entry> | |
<entry> | |
<key>Mode</key> | |
<value>insert</value> | |
</entry> | |
<entry> | |
<key>Upsert</key> | |
<value>false</value> | |
</entry> | |
<entry> | |
<key>Update Query Key</key> | |
<value>_id</value> | |
</entry> | |
<entry> | |
<key>Write Concern</key> | |
<value>ACKNOWLEDGED</value> | |
</entry> | |
<entry> | |
<key>Character Set</key> | |
<value>UTF-8</value> | |
</entry> | |
</properties> | |
<runDurationMillis>0</runDurationMillis> | |
<schedulingPeriod>0 sec</schedulingPeriod> | |
<schedulingStrategy>TIMER_DRIVEN</schedulingStrategy> | |
<yieldDuration>1 sec</yieldDuration> | |
</config> | |
<name>PutMongo</name> | |
<relationships> | |
<autoTerminate>true</autoTerminate> | |
<name>failure</name> | |
</relationships> | |
<relationships> | |
<autoTerminate>true</autoTerminate> | |
<name>success</name> | |
</relationships> | |
<style></style> | |
<type>org.apache.nifi.processors.mongodb.PutMongo</type> | |
</processors> | |
<processors> | |
<id>5e74cd11-b091-337a-0000-000000000000</id> | |
<parentGroupId>e5a17068-5da9-3260-0000-000000000000</parentGroupId> | |
<position> | |
<x>736.8592529296875</x> | |
<y>13.544215679168673</y> | |
</position> | |
<bundle> | |
<artifact>nifi-standard-nar</artifact> | |
<group>org.apache.nifi</group> | |
<version>1.3.0-SNAPSHOT</version> | |
</bundle> | |
<config> | |
<bulletinLevel>WARN</bulletinLevel> | |
<comments></comments> | |
<concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount> | |
<descriptors> | |
<entry> | |
<key>jolt-transform</key> | |
<value> | |
<name>jolt-transform</name> | |
</value> | |
</entry> | |
<entry> | |
<key>jolt-custom-class</key> | |
<value> | |
<name>jolt-custom-class</name> | |
</value> | |
</entry> | |
<entry> | |
<key>jolt-custom-modules</key> | |
<value> | |
<name>jolt-custom-modules</name> | |
</value> | |
</entry> | |
<entry> | |
<key>jolt-spec</key> | |
<value> | |
<name>jolt-spec</name> | |
</value> | |
</entry> | |
<entry> | |
<key>Transform Cache Size</key> | |
<value> | |
<name>Transform Cache Size</name> | |
</value> | |
</entry> | |
</descriptors> | |
<executionNode>ALL</executionNode> | |
<lossTolerant>false</lossTolerant> | |
<penaltyDuration>30 sec</penaltyDuration> | |
<properties> | |
<entry> | |
<key>jolt-transform</key> | |
<value>jolt-transform-chain</value> | |
</entry> | |
<entry> | |
<key>jolt-custom-class</key> | |
</entry> | |
<entry> | |
<key>jolt-custom-modules</key> | |
</entry> | |
<entry> | |
<key>jolt-spec</key> | |
<value>[{ | |
"operation": "shift", | |
"spec": { | |
"${id.var}": "tweet_id", | |
"text": "tweet_text", | |
"in_reply_to_*": "&" | |
} | |
},{ | |
"operation": "modify-overwrite-beta", | |
"spec": { | |
"tweet_text": "=toLower" | |
} | |
},{ | |
"operation": "modify-default-beta", | |
"spec": { | |
"~in_reply_to_status_id": 0, | |
"~in_reply_to_status_id_str": "", | |
"~in_reply_to_user_id": "", | |
"~in_reply_to_user_id_str": 0, | |
"~in_reply_to_screen_name": "" | |
} | |
},{ | |
"operation": "default", | |
"spec":{ | |
"flow_file_id" : "${uuid}" | |
} | |
}]</value> | |
</entry> | |
<entry> | |
<key>Transform Cache Size</key> | |
<value>10000</value> | |
</entry> | |
</properties> | |
<runDurationMillis>0</runDurationMillis> | |
<schedulingPeriod>0 sec</schedulingPeriod> | |
<schedulingStrategy>TIMER_DRIVEN</schedulingStrategy> | |
<yieldDuration>1 sec</yieldDuration> | |
</config> | |
<name>JoltTransformJSON</name> | |
<relationships> | |
<autoTerminate>true</autoTerminate> | |
<name>failure</name> | |
</relationships> | |
<relationships> | |
<autoTerminate>false</autoTerminate> | |
<name>success</name> | |
</relationships> | |
<style></style> | |
<type>org.apache.nifi.processors.standard.JoltTransformJSON</type> | |
</processors> | |
</snippet> | |
<timestamp>06/02/2017 10:09:29 EDT</timestamp> | |
</template> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment