Created
July 10, 2016 23:37
-
-
Save YolandaMDavis/eb9f9d5f21175fbf576c0639fb690b5f to your computer and use it in GitHub Desktop.
NiFi Template for Twitter Streams Transformed with Jolt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?xml version="1.0" encoding="UTF-8" standalone="yes"?><template><description></description><name>JoltTransformJSON_Twitter</name><snippet><connections><id>4b7692b8-a3f2-4efc-9105-dc10a1a32571</id><parentGroupId>1c00c635-485b-4e88-89b0-b84da3fd3e05</parentGroupId><backPressureDataSizeThreshold>10 MB</backPressureDataSizeThreshold><backPressureObjectThreshold>100</backPressureObjectThreshold><destination><groupId>1c00c635-485b-4e88-89b0-b84da3fd3e05</groupId><id>694eaedb-5a0b-47ec-8fcb-64548799da78</id><type>PROCESSOR</type></destination><flowFileExpiration>0 sec</flowFileExpiration><labelIndex>1</labelIndex><name>Transform Tweets</name><selectedRelationships>success</selectedRelationships><source><groupId>1c00c635-485b-4e88-89b0-b84da3fd3e05</groupId><id>8e566f04-31d3-4932-aea1-ba1aa445ec45</id><type>PROCESSOR</type></source><zIndex>0</zIndex></connections><connections><id>acf4bde3-2252-421a-9e69-69b6cc52e382</id><parentGroupId>1c00c635-485b-4e88-89b0-b84da3fd3e05</parentGroupId><backPressureDataSizeThreshold>10MB</backPressureDataSizeThreshold><backPressureObjectThreshold>100</backPressureObjectThreshold><destination><groupId>1c00c635-485b-4e88-89b0-b84da3fd3e05</groupId><id>4e1ad780-bd99-44b2-9884-47bfc6654cbc</id><type>PROCESSOR</type></destination><flowFileExpiration>0 sec</flowFileExpiration><labelIndex>1</labelIndex><name>Store Transformed Tweets</name><selectedRelationships>success</selectedRelationships><source><groupId>1c00c635-485b-4e88-89b0-b84da3fd3e05</groupId><id>694eaedb-5a0b-47ec-8fcb-64548799da78</id><type>PROCESSOR</type></source><zIndex>0</zIndex></connections><connections><id>e77f7bdd-a8c2-4bf6-bd1c-9ee031e1ef28</id><parentGroupId>1c00c635-485b-4e88-89b0-b84da3fd3e05</parentGroupId><backPressureDataSizeThreshold>0 MB</backPressureDataSizeThreshold><backPressureObjectThreshold>0</backPressureObjectThreshold><destination><groupId>1c00c635-485b-4e88-89b0-b84da3fd3e05</groupId><id>b3c15f88-ce09-44eb-98fc-f3ac7b8fc73b</id><type>PROCESSOR</type></destination><flowFileExpiration>0 sec</flowFileExpiration><labelIndex>1</labelIndex><name>Store Raw Tweets</name><selectedRelationships>success</selectedRelationships><source><groupId>1c00c635-485b-4e88-89b0-b84da3fd3e05</groupId><id>8e566f04-31d3-4932-aea1-ba1aa445ec45</id><type>PROCESSOR</type></source><zIndex>0</zIndex></connections><processors><id>4e1ad780-bd99-44b2-9884-47bfc6654cbc</id><parentGroupId>1c00c635-485b-4e88-89b0-b84da3fd3e05</parentGroupId><position><x>3743.8715805215184</x><y>642.4559968579285</y></position><config><bulletinLevel>WARN</bulletinLevel><comments></comments><concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount><defaultConcurrentTasks><entry><key>TIMER_DRIVEN</key><value>1</value></entry><entry><key>EVENT_DRIVEN</key><value>0</value></entry><entry><key>CRON_DRIVEN</key><value>1</value></entry></defaultConcurrentTasks><defaultSchedulingPeriod><entry><key>TIMER_DRIVEN</key><value>0 sec</value></entry><entry><key>CRON_DRIVEN</key><value>* * * * * ?</value></entry></defaultSchedulingPeriod><descriptors><entry><key>Directory</key><value><description>The directory to which files should be written. You may use expression language such as /aa/bb/${path}</description><displayName>Directory</displayName><dynamic>false</dynamic><name>Directory</name><required>true</required><sensitive>false</sensitive><supportsEl>true</supportsEl></value></entry><entry><key>Conflict Resolution Strategy</key><value><allowableValues><displayName>replace</displayName><value>replace</value></allowableValues><allowableValues><displayName>ignore</displayName><value>ignore</value></allowableValues><allowableValues><displayName>fail</displayName><value>fail</value></allowableValues><defaultValue>fail</defaultValue><description>Indicates what should happen when a file with the same name already exists in the output directory</description><displayName>Conflict Resolution Strategy</displayName><dynamic>false</dynamic><name>Conflict Resolution Strategy</name><required>true</required><sensitive>false</sensitive><supportsEl>false</supportsEl></value></entry><entry><key>Create Missing Directories</key><value><allowableValues><displayName>true</displayName><value>true</value></allowableValues><allowableValues><displayName>false</displayName><value>false</value></allowableValues><defaultValue>true</defaultValue><description>If true, then missing destination directories will be created. If false, flowfiles are penalized and sent to failure.</description><displayName>Create Missing Directories</displayName><dynamic>false</dynamic><name>Create Missing Directories</name><required>true</required><sensitive>false</sensitive><supportsEl>false</supportsEl></value></entry><entry><key>Maximum File Count</key><value><description>Specifies the maximum number of files that can exist in the output directory</description><displayName>Maximum File Count</displayName><dynamic>false</dynamic><name>Maximum File Count</name><required>false</required><sensitive>false</sensitive><supportsEl>false</supportsEl></value></entry><entry><key>Last Modified Time</key><value><description>Sets the lastModifiedTime on the output file to the value of this attribute. Format must be yyyy-MM-dd'T'HH:mm:ssZ. You may also use expression language such as ${file.lastModifiedTime}.</description><displayName>Last Modified Time</displayName><dynamic>false</dynamic><name>Last Modified Time</name><required>false</required><sensitive>false</sensitive><supportsEl>true</supportsEl></value></entry><entry><key>Permissions</key><value><description>Sets the permissions on the output file to the value of this attribute. Format must be either UNIX rwxrwxrwx with a - in place of denied permissions (e.g. rw-r--r--) or an octal number (e.g. 644). You may also use expression language such as ${file.permissions}.</description><displayName>Permissions</displayName><dynamic>false</dynamic><name>Permissions</name><required>false</required><sensitive>false</sensitive><supportsEl>true</supportsEl></value></entry><entry><key>Owner</key><value><description>Sets the owner on the output file to the value of this attribute. You may also use expression language such as ${file.owner}.</description><displayName>Owner</displayName><dynamic>false</dynamic><name>Owner</name><required>false</required><sensitive>false</sensitive><supportsEl>true</supportsEl></value></entry><entry><key>Group</key><value><description>Sets the group on the output file to the value of this attribute. You may also use expression language such as ${file.group}.</description><displayName>Group</displayName><dynamic>false</dynamic><name>Group</name><required>false</required><sensitive>false</sensitive><supportsEl>true</supportsEl></value></entry></descriptors><lossTolerant>false</lossTolerant><penaltyDuration>30 sec</penaltyDuration><properties><entry><key>Directory</key><value></value></entry><entry><key>Conflict Resolution Strategy</key><value>fail</value></entry><entry><key>Create Missing Directories</key><value>true</value></entry><entry><key>Maximum File Count</key></entry><entry><key>Last Modified Time</key></entry><entry><key>Permissions</key></entry><entry><key>Owner</key></entry><entry><key>Group</key></entry></properties><runDurationMillis>0</runDurationMillis><schedulingPeriod>0 sec</schedulingPeriod><schedulingStrategy>TIMER_DRIVEN</schedulingStrategy><yieldDuration>1 sec</yieldDuration></config><name>PutTransformedFile</name><relationships><autoTerminate>true</autoTerminate><description>Files that could not be written to the output directory for some reason are transferred to this relationship</description><name>failure</name></relationships><relationships><autoTerminate>true</autoTerminate><description>Files that have been successfully written to the output directory are transferred to this relationship</description><name>success</name></relationships><state>STOPPED</state><style/><supportsEventDriven>false</supportsEventDriven><supportsParallelProcessing>true</supportsParallelProcessing><type>org.apache.nifi.processors.standard.PutFile</type></processors><processors><id>8e566f04-31d3-4932-aea1-ba1aa445ec45</id><parentGroupId>1c00c635-485b-4e88-89b0-b84da3fd3e05</parentGroupId><position><x>2804.8313382772876</x><y>430.9118108097517</y></position><config><bulletinLevel>WARN</bulletinLevel><comments></comments><concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount><defaultConcurrentTasks><entry><key>TIMER_DRIVEN</key><value>1</value></entry><entry><key>EVENT_DRIVEN</key><value>0</value></entry><entry><key>CRON_DRIVEN</key><value>1</value></entry></defaultConcurrentTasks><defaultSchedulingPeriod><entry><key>TIMER_DRIVEN</key><value>0 sec</value></entry><entry><key>CRON_DRIVEN</key><value>* * * * * ?</value></entry></defaultSchedulingPeriod><descriptors><entry><key>Twitter Endpoint</key><value><allowableValues><description>The endpoint that provides public data, aka a 'garden hose'</description><displayName>Sample Endpoint</displayName><value>Sample Endpoint</value></allowableValues><allowableValues><description>The endpoint that provides access to all tweets</description><displayName>Firehose Endpoint</displayName><value>Firehose Endpoint</value></allowableValues><allowableValues><description>Endpoint that allows the stream to be filtered by specific terms or User IDs</description><displayName>Filter Endpoint</displayName><value>Filter Endpoint</value></allowableValues><defaultValue>Sample Endpoint</defaultValue><description>Specifies which endpoint data should be pulled from</description><displayName>Twitter Endpoint</displayName><dynamic>false</dynamic><name>Twitter Endpoint</name><required>true</required><sensitive>false</sensitive><supportsEl>false</supportsEl></value></entry><entry><key>Consumer Key</key><value><description>The Consumer Key provided by Twitter</description><displayName>Consumer Key</displayName><dynamic>false</dynamic><name>Consumer Key</name><required>true</required><sensitive>false</sensitive><supportsEl>false</supportsEl></value></entry><entry><key>Consumer Secret</key><value><description>The Consumer Secret provided by Twitter</description><displayName>Consumer Secret</displayName><dynamic>false</dynamic><name>Consumer Secret</name><required>true</required><sensitive>true</sensitive><supportsEl>false</supportsEl></value></entry><entry><key>Access Token</key><value><description>The Access Token provided by Twitter</description><displayName>Access Token</displayName><dynamic>false</dynamic><name>Access Token</name><required>true</required><sensitive>false</sensitive><supportsEl>false</supportsEl></value></entry><entry><key>Access Token Secret</key><value><description>The Access Token Secret provided by Twitter</description><displayName>Access Token Secret</displayName><dynamic>false</dynamic><name>Access Token Secret</name><required>true</required><sensitive>true</sensitive><supportsEl>false</supportsEl></value></entry><entry><key>Languages</key><value><description>A comma-separated list of languages for which tweets should be fetched</description><displayName>Languages</displayName><dynamic>false</dynamic><name>Languages</name><required>false</required><sensitive>false</sensitive><supportsEl>false</supportsEl></value></entry><entry><key>Terms to Filter On</key><value><description>A comma-separated list of terms to filter on. Ignored unless Endpoint is set to 'Filter Endpoint'. The filter works such that if any term matches, the status update will be retrieved; multiple terms separated by a space function as an 'AND'. I.e., 'it was, hello' will retrieve status updates that have either 'hello' or both 'it' AND 'was'</description><displayName>Terms to Filter On</displayName><dynamic>false</dynamic><name>Terms to Filter On</name><required>false</required><sensitive>false</sensitive><supportsEl>false</supportsEl></value></entry><entry><key>IDs to Follow</key><value><description>A comma-separated list of Twitter User ID's to follow. Ignored unless Endpoint is set to 'Filter Endpoint'.</description><displayName>IDs to Follow</displayName><dynamic>false</dynamic><name>IDs to Follow</name><required>false</required><sensitive>false</sensitive><supportsEl>false</supportsEl></value></entry><entry><key>Locations to Filter On</key><value><description>A comma-separated list of coordinates specifying one or more bounding boxes to filter on.Each bounding box is specified by a pair of coordinates in the format: swLon,swLat,neLon,neLat. Multiple bounding boxes can be specified as such: swLon1,swLat1,neLon1,neLat1,swLon2,swLat2,neLon2,neLat2.Ignored unless Endpoint is set to 'Filter Endpoint'.</description><displayName>Locations to Filter On</displayName><dynamic>false</dynamic><name>Locations to Filter On</name><required>false</required><sensitive>false</sensitive><supportsEl>false</supportsEl></value></entry></descriptors><lossTolerant>false</lossTolerant><penaltyDuration>30 sec</penaltyDuration><properties><entry><key>Twitter Endpoint</key><value>Filter Endpoint</value></entry><entry><key>Consumer Key</key><value>PWGWQqIIlnMw1mVtlLqKap2tc</value></entry><entry><key>Consumer Secret</key></entry><entry><key>Access Token</key><value>548586960-SKaZmb67v7R33Nh6sz05BqaYVilRvXpBFvYUDOu5</value></entry><entry><key>Access Token Secret</key></entry><entry><key>Languages</key></entry><entry><key>Terms to Filter On</key><value>Baltimore, Maryland</value></entry><entry><key>IDs to Follow</key></entry><entry><key>Locations to Filter On</key></entry></properties><runDurationMillis>0</runDurationMillis><schedulingPeriod>0 sec</schedulingPeriod><schedulingStrategy>TIMER_DRIVEN</schedulingStrategy><yieldDuration>1 sec</yieldDuration></config><name>GetTwitter</name><relationships><autoTerminate>false</autoTerminate><description>All status updates will be routed to this relationship</description><name>success</name></relationships><state>STOPPED</state><style/><supportsEventDriven>false</supportsEventDriven><supportsParallelProcessing>true</supportsParallelProcessing><type>org.apache.nifi.processors.twitter.GetTwitter</type></processors><processors><id>694eaedb-5a0b-47ec-8fcb-64548799da78</id><parentGroupId>1c00c635-485b-4e88-89b0-b84da3fd3e05</parentGroupId><position><x>3192.269005454541</x><y>634.8750842950883</y></position><config><bulletinLevel>WARN</bulletinLevel><comments></comments><concurrentlySchedulableTaskCount>5</concurrentlySchedulableTaskCount><defaultConcurrentTasks><entry><key>TIMER_DRIVEN</key><value>1</value></entry><entry><key>EVENT_DRIVEN</key><value>0</value></entry><entry><key>CRON_DRIVEN</key><value>1</value></entry></defaultConcurrentTasks><defaultSchedulingPeriod><entry><key>TIMER_DRIVEN</key><value>0 sec</value></entry><entry><key>CRON_DRIVEN</key><value>* * * * * ?</value></entry></defaultSchedulingPeriod><descriptors><entry><key>jolt-transform</key><value><allowableValues><description>Change the cardinality of input elements to create the output JSON.</description><displayName>Cardinality</displayName><value>jolt-transform-card</value></allowableValues><allowableValues><description>Execute list of Jolt transformations.</description><displayName>Chain</displayName><value>jolt-transform-chain</value></allowableValues><allowableValues><description> Apply default values to the output JSON.</description><displayName>Default</displayName><value>jolt-transform-default</value></allowableValues><allowableValues><description> Remove values from input data to create the output JSON.</description><displayName>Remove</displayName><value>jolt-transform-remove</value></allowableValues><allowableValues><description>Shift input JSON/data to create the output JSON.</description><displayName>Shift</displayName><value>jolt-transform-shift</value></allowableValues><allowableValues><description>Sort input json key values alphabetically. Any specification set is ignored.</description><displayName>Sort</displayName><value>jolt-transform-sort</value></allowableValues><defaultValue>jolt-transform-chain</defaultValue><description>Specifies the Jolt Transformation that should be used with the provided specification.</description><displayName>Jolt Transformation DSL</displayName><dynamic>false</dynamic><name>jolt-transform</name><required>true</required><sensitive>false</sensitive><supportsEl>false</supportsEl></value></entry><entry><key>jolt-spec</key><value><description>Jolt Specification for transform of JSON data. This value is ignored if the Jolt Sort Transformation is selected.</description><displayName>Jolt Specification</displayName><dynamic>false</dynamic><name>jolt-spec</name><required>false</required><sensitive>false</sensitive><supportsEl>false</supportsEl></value></entry></descriptors><lossTolerant>false</lossTolerant><penaltyDuration>30 sec</penaltyDuration><properties><entry><key>jolt-transform</key><value>jolt-transform-chain</value></entry><entry><key>jolt-spec</key><value> [{ | |
"operation": "shift", | |
"spec": { | |
"created_at": "created_date_time", | |
"id": "tweet_id", | |
"text": "tweet_text", | |
"user": { | |
"id": "user_id" | |
} | |
} | |
}, | |
{ | |
"operation": "default", | |
"spec":{ | |
"chainr-rating" : 4 | |
} | |
} | |
]</value></entry></properties><runDurationMillis>0</runDurationMillis><schedulingPeriod>0 sec</schedulingPeriod><schedulingStrategy>TIMER_DRIVEN</schedulingStrategy><yieldDuration>1 sec</yieldDuration></config><name>JoltTransformJSON</name><relationships><autoTerminate>true</autoTerminate><description>If a FlowFile fails processing for any reason (for example, the FlowFile is not valid JSON), it will be routed to this relationship</description><name>failure</name></relationships><relationships><autoTerminate>false</autoTerminate><description>The FlowFile with transformed content will be routed to this relationship</description><name>success</name></relationships><state>STOPPED</state><style/><supportsEventDriven>true</supportsEventDriven><supportsParallelProcessing>true</supportsParallelProcessing><type>org.apache.nifi.processors.standard.JoltTransformJSON</type></processors><processors><id>b3c15f88-ce09-44eb-98fc-f3ac7b8fc73b</id><parentGroupId>1c00c635-485b-4e88-89b0-b84da3fd3e05</parentGroupId><position><x>3744.258003841074</x><y>436.263202619321</y></position><config><bulletinLevel>WARN</bulletinLevel><comments></comments><concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount><defaultConcurrentTasks><entry><key>TIMER_DRIVEN</key><value>1</value></entry><entry><key>EVENT_DRIVEN</key><value>0</value></entry><entry><key>CRON_DRIVEN</key><value>1</value></entry></defaultConcurrentTasks><defaultSchedulingPeriod><entry><key>TIMER_DRIVEN</key><value>0 sec</value></entry><entry><key>CRON_DRIVEN</key><value>* * * * * ?</value></entry></defaultSchedulingPeriod><descriptors><entry><key>Directory</key><value><description>The directory to which files should be written. You may use expression language such as /aa/bb/${path}</description><displayName>Directory</displayName><dynamic>false</dynamic><name>Directory</name><required>true</required><sensitive>false</sensitive><supportsEl>true</supportsEl></value></entry><entry><key>Conflict Resolution Strategy</key><value><allowableValues><displayName>replace</displayName><value>replace</value></allowableValues><allowableValues><displayName>ignore</displayName><value>ignore</value></allowableValues><allowableValues><displayName>fail</displayName><value>fail</value></allowableValues><defaultValue>fail</defaultValue><description>Indicates what should happen when a file with the same name already exists in the output directory</description><displayName>Conflict Resolution Strategy</displayName><dynamic>false</dynamic><name>Conflict Resolution Strategy</name><required>true</required><sensitive>false</sensitive><supportsEl>false</supportsEl></value></entry><entry><key>Create Missing Directories</key><value><allowableValues><displayName>true</displayName><value>true</value></allowableValues><allowableValues><displayName>false</displayName><value>false</value></allowableValues><defaultValue>true</defaultValue><description>If true, then missing destination directories will be created. If false, flowfiles are penalized and sent to failure.</description><displayName>Create Missing Directories</displayName><dynamic>false</dynamic><name>Create Missing Directories</name><required>true</required><sensitive>false</sensitive><supportsEl>false</supportsEl></value></entry><entry><key>Maximum File Count</key><value><description>Specifies the maximum number of files that can exist in the output directory</description><displayName>Maximum File Count</displayName><dynamic>false</dynamic><name>Maximum File Count</name><required>false</required><sensitive>false</sensitive><supportsEl>false</supportsEl></value></entry><entry><key>Last Modified Time</key><value><description>Sets the lastModifiedTime on the output file to the value of this attribute. Format must be yyyy-MM-dd'T'HH:mm:ssZ. You may also use expression language such as ${file.lastModifiedTime}.</description><displayName>Last Modified Time</displayName><dynamic>false</dynamic><name>Last Modified Time</name><required>false</required><sensitive>false</sensitive><supportsEl>true</supportsEl></value></entry><entry><key>Permissions</key><value><description>Sets the permissions on the output file to the value of this attribute. Format must be either UNIX rwxrwxrwx with a - in place of denied permissions (e.g. rw-r--r--) or an octal number (e.g. 644). You may also use expression language such as ${file.permissions}.</description><displayName>Permissions</displayName><dynamic>false</dynamic><name>Permissions</name><required>false</required><sensitive>false</sensitive><supportsEl>true</supportsEl></value></entry><entry><key>Owner</key><value><description>Sets the owner on the output file to the value of this attribute. You may also use expression language such as ${file.owner}.</description><displayName>Owner</displayName><dynamic>false</dynamic><name>Owner</name><required>false</required><sensitive>false</sensitive><supportsEl>true</supportsEl></value></entry><entry><key>Group</key><value><description>Sets the group on the output file to the value of this attribute. You may also use expression language such as ${file.group}.</description><displayName>Group</displayName><dynamic>false</dynamic><name>Group</name><required>false</required><sensitive>false</sensitive><supportsEl>true</supportsEl></value></entry></descriptors><lossTolerant>false</lossTolerant><penaltyDuration>30 sec</penaltyDuration><properties><entry><key>Directory</key><value></value></entry><entry><key>Conflict Resolution Strategy</key><value>fail</value></entry><entry><key>Create Missing Directories</key><value>true</value></entry><entry><key>Maximum File Count</key></entry><entry><key>Last Modified Time</key></entry><entry><key>Permissions</key></entry><entry><key>Owner</key></entry><entry><key>Group</key></entry></properties><runDurationMillis>0</runDurationMillis><schedulingPeriod>0 sec</schedulingPeriod><schedulingStrategy>TIMER_DRIVEN</schedulingStrategy><yieldDuration>1 sec</yieldDuration></config><name>Put Raw File</name><relationships><autoTerminate>true</autoTerminate><description>Files that could not be written to the output directory for some reason are transferred to this relationship</description><name>failure</name></relationships><relationships><autoTerminate>true</autoTerminate><description>Files that have been successfully written to the output directory are transferred to this relationship</description><name>success</name></relationships><state>STOPPED</state><style/><supportsEventDriven>false</supportsEventDriven><supportsParallelProcessing>true</supportsParallelProcessing><type>org.apache.nifi.processors.standard.PutFile</type></processors></snippet><timestamp>07/10/2016 19:34:33 EDT</timestamp></template> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment