Skip to content

Instantly share code, notes, and snippets.

@YolandaMDavis
Created July 10, 2016 23:37
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save YolandaMDavis/eb9f9d5f21175fbf576c0639fb690b5f to your computer and use it in GitHub Desktop.
Save YolandaMDavis/eb9f9d5f21175fbf576c0639fb690b5f to your computer and use it in GitHub Desktop.
NiFi Template for Twitter Streams Transformed with Jolt
<?xml version="1.0" encoding="UTF-8" standalone="yes"?><template><description></description><name>JoltTransformJSON_Twitter</name><snippet><connections><id>4b7692b8-a3f2-4efc-9105-dc10a1a32571</id><parentGroupId>1c00c635-485b-4e88-89b0-b84da3fd3e05</parentGroupId><backPressureDataSizeThreshold>10 MB</backPressureDataSizeThreshold><backPressureObjectThreshold>100</backPressureObjectThreshold><destination><groupId>1c00c635-485b-4e88-89b0-b84da3fd3e05</groupId><id>694eaedb-5a0b-47ec-8fcb-64548799da78</id><type>PROCESSOR</type></destination><flowFileExpiration>0 sec</flowFileExpiration><labelIndex>1</labelIndex><name>Transform Tweets</name><selectedRelationships>success</selectedRelationships><source><groupId>1c00c635-485b-4e88-89b0-b84da3fd3e05</groupId><id>8e566f04-31d3-4932-aea1-ba1aa445ec45</id><type>PROCESSOR</type></source><zIndex>0</zIndex></connections><connections><id>acf4bde3-2252-421a-9e69-69b6cc52e382</id><parentGroupId>1c00c635-485b-4e88-89b0-b84da3fd3e05</parentGroupId><backPressureDataSizeThreshold>10MB</backPressureDataSizeThreshold><backPressureObjectThreshold>100</backPressureObjectThreshold><destination><groupId>1c00c635-485b-4e88-89b0-b84da3fd3e05</groupId><id>4e1ad780-bd99-44b2-9884-47bfc6654cbc</id><type>PROCESSOR</type></destination><flowFileExpiration>0 sec</flowFileExpiration><labelIndex>1</labelIndex><name>Store Transformed Tweets</name><selectedRelationships>success</selectedRelationships><source><groupId>1c00c635-485b-4e88-89b0-b84da3fd3e05</groupId><id>694eaedb-5a0b-47ec-8fcb-64548799da78</id><type>PROCESSOR</type></source><zIndex>0</zIndex></connections><connections><id>e77f7bdd-a8c2-4bf6-bd1c-9ee031e1ef28</id><parentGroupId>1c00c635-485b-4e88-89b0-b84da3fd3e05</parentGroupId><backPressureDataSizeThreshold>0 MB</backPressureDataSizeThreshold><backPressureObjectThreshold>0</backPressureObjectThreshold><destination><groupId>1c00c635-485b-4e88-89b0-b84da3fd3e05</groupId><id>b3c15f88-ce09-44eb-98fc-f3ac7b8fc73b</id><type>PROCESSOR</type></destination><flowFileExpiration>0 sec</flowFileExpiration><labelIndex>1</labelIndex><name>Store Raw Tweets</name><selectedRelationships>success</selectedRelationships><source><groupId>1c00c635-485b-4e88-89b0-b84da3fd3e05</groupId><id>8e566f04-31d3-4932-aea1-ba1aa445ec45</id><type>PROCESSOR</type></source><zIndex>0</zIndex></connections><processors><id>4e1ad780-bd99-44b2-9884-47bfc6654cbc</id><parentGroupId>1c00c635-485b-4e88-89b0-b84da3fd3e05</parentGroupId><position><x>3743.8715805215184</x><y>642.4559968579285</y></position><config><bulletinLevel>WARN</bulletinLevel><comments></comments><concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount><defaultConcurrentTasks><entry><key>TIMER_DRIVEN</key><value>1</value></entry><entry><key>EVENT_DRIVEN</key><value>0</value></entry><entry><key>CRON_DRIVEN</key><value>1</value></entry></defaultConcurrentTasks><defaultSchedulingPeriod><entry><key>TIMER_DRIVEN</key><value>0 sec</value></entry><entry><key>CRON_DRIVEN</key><value>* * * * * ?</value></entry></defaultSchedulingPeriod><descriptors><entry><key>Directory</key><value><description>The directory to which files should be written. You may use expression language such as /aa/bb/${path}</description><displayName>Directory</displayName><dynamic>false</dynamic><name>Directory</name><required>true</required><sensitive>false</sensitive><supportsEl>true</supportsEl></value></entry><entry><key>Conflict Resolution Strategy</key><value><allowableValues><displayName>replace</displayName><value>replace</value></allowableValues><allowableValues><displayName>ignore</displayName><value>ignore</value></allowableValues><allowableValues><displayName>fail</displayName><value>fail</value></allowableValues><defaultValue>fail</defaultValue><description>Indicates what should happen when a file with the same name already exists in the output directory</description><displayName>Conflict Resolution Strategy</displayName><dynamic>false</dynamic><name>Conflict Resolution Strategy</name><required>true</required><sensitive>false</sensitive><supportsEl>false</supportsEl></value></entry><entry><key>Create Missing Directories</key><value><allowableValues><displayName>true</displayName><value>true</value></allowableValues><allowableValues><displayName>false</displayName><value>false</value></allowableValues><defaultValue>true</defaultValue><description>If true, then missing destination directories will be created. If false, flowfiles are penalized and sent to failure.</description><displayName>Create Missing Directories</displayName><dynamic>false</dynamic><name>Create Missing Directories</name><required>true</required><sensitive>false</sensitive><supportsEl>false</supportsEl></value></entry><entry><key>Maximum File Count</key><value><description>Specifies the maximum number of files that can exist in the output directory</description><displayName>Maximum File Count</displayName><dynamic>false</dynamic><name>Maximum File Count</name><required>false</required><sensitive>false</sensitive><supportsEl>false</supportsEl></value></entry><entry><key>Last Modified Time</key><value><description>Sets the lastModifiedTime on the output file to the value of this attribute. Format must be yyyy-MM-dd'T'HH:mm:ssZ. You may also use expression language such as ${file.lastModifiedTime}.</description><displayName>Last Modified Time</displayName><dynamic>false</dynamic><name>Last Modified Time</name><required>false</required><sensitive>false</sensitive><supportsEl>true</supportsEl></value></entry><entry><key>Permissions</key><value><description>Sets the permissions on the output file to the value of this attribute. Format must be either UNIX rwxrwxrwx with a - in place of denied permissions (e.g. rw-r--r--) or an octal number (e.g. 644). You may also use expression language such as ${file.permissions}.</description><displayName>Permissions</displayName><dynamic>false</dynamic><name>Permissions</name><required>false</required><sensitive>false</sensitive><supportsEl>true</supportsEl></value></entry><entry><key>Owner</key><value><description>Sets the owner on the output file to the value of this attribute. You may also use expression language such as ${file.owner}.</description><displayName>Owner</displayName><dynamic>false</dynamic><name>Owner</name><required>false</required><sensitive>false</sensitive><supportsEl>true</supportsEl></value></entry><entry><key>Group</key><value><description>Sets the group on the output file to the value of this attribute. You may also use expression language such as ${file.group}.</description><displayName>Group</displayName><dynamic>false</dynamic><name>Group</name><required>false</required><sensitive>false</sensitive><supportsEl>true</supportsEl></value></entry></descriptors><lossTolerant>false</lossTolerant><penaltyDuration>30 sec</penaltyDuration><properties><entry><key>Directory</key><value></value></entry><entry><key>Conflict Resolution Strategy</key><value>fail</value></entry><entry><key>Create Missing Directories</key><value>true</value></entry><entry><key>Maximum File Count</key></entry><entry><key>Last Modified Time</key></entry><entry><key>Permissions</key></entry><entry><key>Owner</key></entry><entry><key>Group</key></entry></properties><runDurationMillis>0</runDurationMillis><schedulingPeriod>0 sec</schedulingPeriod><schedulingStrategy>TIMER_DRIVEN</schedulingStrategy><yieldDuration>1 sec</yieldDuration></config><name>PutTransformedFile</name><relationships><autoTerminate>true</autoTerminate><description>Files that could not be written to the output directory for some reason are transferred to this relationship</description><name>failure</name></relationships><relationships><autoTerminate>true</autoTerminate><description>Files that have been successfully written to the output directory are transferred to this relationship</description><name>success</name></relationships><state>STOPPED</state><style/><supportsEventDriven>false</supportsEventDriven><supportsParallelProcessing>true</supportsParallelProcessing><type>org.apache.nifi.processors.standard.PutFile</type></processors><processors><id>8e566f04-31d3-4932-aea1-ba1aa445ec45</id><parentGroupId>1c00c635-485b-4e88-89b0-b84da3fd3e05</parentGroupId><position><x>2804.8313382772876</x><y>430.9118108097517</y></position><config><bulletinLevel>WARN</bulletinLevel><comments></comments><concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount><defaultConcurrentTasks><entry><key>TIMER_DRIVEN</key><value>1</value></entry><entry><key>EVENT_DRIVEN</key><value>0</value></entry><entry><key>CRON_DRIVEN</key><value>1</value></entry></defaultConcurrentTasks><defaultSchedulingPeriod><entry><key>TIMER_DRIVEN</key><value>0 sec</value></entry><entry><key>CRON_DRIVEN</key><value>* * * * * ?</value></entry></defaultSchedulingPeriod><descriptors><entry><key>Twitter Endpoint</key><value><allowableValues><description>The endpoint that provides public data, aka a 'garden hose'</description><displayName>Sample Endpoint</displayName><value>Sample Endpoint</value></allowableValues><allowableValues><description>The endpoint that provides access to all tweets</description><displayName>Firehose Endpoint</displayName><value>Firehose Endpoint</value></allowableValues><allowableValues><description>Endpoint that allows the stream to be filtered by specific terms or User IDs</description><displayName>Filter Endpoint</displayName><value>Filter Endpoint</value></allowableValues><defaultValue>Sample Endpoint</defaultValue><description>Specifies which endpoint data should be pulled from</description><displayName>Twitter Endpoint</displayName><dynamic>false</dynamic><name>Twitter Endpoint</name><required>true</required><sensitive>false</sensitive><supportsEl>false</supportsEl></value></entry><entry><key>Consumer Key</key><value><description>The Consumer Key provided by Twitter</description><displayName>Consumer Key</displayName><dynamic>false</dynamic><name>Consumer Key</name><required>true</required><sensitive>false</sensitive><supportsEl>false</supportsEl></value></entry><entry><key>Consumer Secret</key><value><description>The Consumer Secret provided by Twitter</description><displayName>Consumer Secret</displayName><dynamic>false</dynamic><name>Consumer Secret</name><required>true</required><sensitive>true</sensitive><supportsEl>false</supportsEl></value></entry><entry><key>Access Token</key><value><description>The Access Token provided by Twitter</description><displayName>Access Token</displayName><dynamic>false</dynamic><name>Access Token</name><required>true</required><sensitive>false</sensitive><supportsEl>false</supportsEl></value></entry><entry><key>Access Token Secret</key><value><description>The Access Token Secret provided by Twitter</description><displayName>Access Token Secret</displayName><dynamic>false</dynamic><name>Access Token Secret</name><required>true</required><sensitive>true</sensitive><supportsEl>false</supportsEl></value></entry><entry><key>Languages</key><value><description>A comma-separated list of languages for which tweets should be fetched</description><displayName>Languages</displayName><dynamic>false</dynamic><name>Languages</name><required>false</required><sensitive>false</sensitive><supportsEl>false</supportsEl></value></entry><entry><key>Terms to Filter On</key><value><description>A comma-separated list of terms to filter on. Ignored unless Endpoint is set to 'Filter Endpoint'. The filter works such that if any term matches, the status update will be retrieved; multiple terms separated by a space function as an 'AND'. I.e., 'it was, hello' will retrieve status updates that have either 'hello' or both 'it' AND 'was'</description><displayName>Terms to Filter On</displayName><dynamic>false</dynamic><name>Terms to Filter On</name><required>false</required><sensitive>false</sensitive><supportsEl>false</supportsEl></value></entry><entry><key>IDs to Follow</key><value><description>A comma-separated list of Twitter User ID's to follow. Ignored unless Endpoint is set to 'Filter Endpoint'.</description><displayName>IDs to Follow</displayName><dynamic>false</dynamic><name>IDs to Follow</name><required>false</required><sensitive>false</sensitive><supportsEl>false</supportsEl></value></entry><entry><key>Locations to Filter On</key><value><description>A comma-separated list of coordinates specifying one or more bounding boxes to filter on.Each bounding box is specified by a pair of coordinates in the format: swLon,swLat,neLon,neLat. Multiple bounding boxes can be specified as such: swLon1,swLat1,neLon1,neLat1,swLon2,swLat2,neLon2,neLat2.Ignored unless Endpoint is set to 'Filter Endpoint'.</description><displayName>Locations to Filter On</displayName><dynamic>false</dynamic><name>Locations to Filter On</name><required>false</required><sensitive>false</sensitive><supportsEl>false</supportsEl></value></entry></descriptors><lossTolerant>false</lossTolerant><penaltyDuration>30 sec</penaltyDuration><properties><entry><key>Twitter Endpoint</key><value>Filter Endpoint</value></entry><entry><key>Consumer Key</key><value>PWGWQqIIlnMw1mVtlLqKap2tc</value></entry><entry><key>Consumer Secret</key></entry><entry><key>Access Token</key><value>548586960-SKaZmb67v7R33Nh6sz05BqaYVilRvXpBFvYUDOu5</value></entry><entry><key>Access Token Secret</key></entry><entry><key>Languages</key></entry><entry><key>Terms to Filter On</key><value>Baltimore, Maryland</value></entry><entry><key>IDs to Follow</key></entry><entry><key>Locations to Filter On</key></entry></properties><runDurationMillis>0</runDurationMillis><schedulingPeriod>0 sec</schedulingPeriod><schedulingStrategy>TIMER_DRIVEN</schedulingStrategy><yieldDuration>1 sec</yieldDuration></config><name>GetTwitter</name><relationships><autoTerminate>false</autoTerminate><description>All status updates will be routed to this relationship</description><name>success</name></relationships><state>STOPPED</state><style/><supportsEventDriven>false</supportsEventDriven><supportsParallelProcessing>true</supportsParallelProcessing><type>org.apache.nifi.processors.twitter.GetTwitter</type></processors><processors><id>694eaedb-5a0b-47ec-8fcb-64548799da78</id><parentGroupId>1c00c635-485b-4e88-89b0-b84da3fd3e05</parentGroupId><position><x>3192.269005454541</x><y>634.8750842950883</y></position><config><bulletinLevel>WARN</bulletinLevel><comments></comments><concurrentlySchedulableTaskCount>5</concurrentlySchedulableTaskCount><defaultConcurrentTasks><entry><key>TIMER_DRIVEN</key><value>1</value></entry><entry><key>EVENT_DRIVEN</key><value>0</value></entry><entry><key>CRON_DRIVEN</key><value>1</value></entry></defaultConcurrentTasks><defaultSchedulingPeriod><entry><key>TIMER_DRIVEN</key><value>0 sec</value></entry><entry><key>CRON_DRIVEN</key><value>* * * * * ?</value></entry></defaultSchedulingPeriod><descriptors><entry><key>jolt-transform</key><value><allowableValues><description>Change the cardinality of input elements to create the output JSON.</description><displayName>Cardinality</displayName><value>jolt-transform-card</value></allowableValues><allowableValues><description>Execute list of Jolt transformations.</description><displayName>Chain</displayName><value>jolt-transform-chain</value></allowableValues><allowableValues><description> Apply default values to the output JSON.</description><displayName>Default</displayName><value>jolt-transform-default</value></allowableValues><allowableValues><description> Remove values from input data to create the output JSON.</description><displayName>Remove</displayName><value>jolt-transform-remove</value></allowableValues><allowableValues><description>Shift input JSON/data to create the output JSON.</description><displayName>Shift</displayName><value>jolt-transform-shift</value></allowableValues><allowableValues><description>Sort input json key values alphabetically. Any specification set is ignored.</description><displayName>Sort</displayName><value>jolt-transform-sort</value></allowableValues><defaultValue>jolt-transform-chain</defaultValue><description>Specifies the Jolt Transformation that should be used with the provided specification.</description><displayName>Jolt Transformation DSL</displayName><dynamic>false</dynamic><name>jolt-transform</name><required>true</required><sensitive>false</sensitive><supportsEl>false</supportsEl></value></entry><entry><key>jolt-spec</key><value><description>Jolt Specification for transform of JSON data. This value is ignored if the Jolt Sort Transformation is selected.</description><displayName>Jolt Specification</displayName><dynamic>false</dynamic><name>jolt-spec</name><required>false</required><sensitive>false</sensitive><supportsEl>false</supportsEl></value></entry></descriptors><lossTolerant>false</lossTolerant><penaltyDuration>30 sec</penaltyDuration><properties><entry><key>jolt-transform</key><value>jolt-transform-chain</value></entry><entry><key>jolt-spec</key><value> [{
&quot;operation&quot;: &quot;shift&quot;,
&quot;spec&quot;: {
&quot;created_at&quot;: &quot;created_date_time&quot;,
&quot;id&quot;: &quot;tweet_id&quot;,
&quot;text&quot;: &quot;tweet_text&quot;,
&quot;user&quot;: {
&quot;id&quot;: &quot;user_id&quot;
}
}
},
{
&quot;operation&quot;: &quot;default&quot;,
&quot;spec&quot;:{
&quot;chainr-rating&quot; : 4
}
}
]</value></entry></properties><runDurationMillis>0</runDurationMillis><schedulingPeriod>0 sec</schedulingPeriod><schedulingStrategy>TIMER_DRIVEN</schedulingStrategy><yieldDuration>1 sec</yieldDuration></config><name>JoltTransformJSON</name><relationships><autoTerminate>true</autoTerminate><description>If a FlowFile fails processing for any reason (for example, the FlowFile is not valid JSON), it will be routed to this relationship</description><name>failure</name></relationships><relationships><autoTerminate>false</autoTerminate><description>The FlowFile with transformed content will be routed to this relationship</description><name>success</name></relationships><state>STOPPED</state><style/><supportsEventDriven>true</supportsEventDriven><supportsParallelProcessing>true</supportsParallelProcessing><type>org.apache.nifi.processors.standard.JoltTransformJSON</type></processors><processors><id>b3c15f88-ce09-44eb-98fc-f3ac7b8fc73b</id><parentGroupId>1c00c635-485b-4e88-89b0-b84da3fd3e05</parentGroupId><position><x>3744.258003841074</x><y>436.263202619321</y></position><config><bulletinLevel>WARN</bulletinLevel><comments></comments><concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount><defaultConcurrentTasks><entry><key>TIMER_DRIVEN</key><value>1</value></entry><entry><key>EVENT_DRIVEN</key><value>0</value></entry><entry><key>CRON_DRIVEN</key><value>1</value></entry></defaultConcurrentTasks><defaultSchedulingPeriod><entry><key>TIMER_DRIVEN</key><value>0 sec</value></entry><entry><key>CRON_DRIVEN</key><value>* * * * * ?</value></entry></defaultSchedulingPeriod><descriptors><entry><key>Directory</key><value><description>The directory to which files should be written. You may use expression language such as /aa/bb/${path}</description><displayName>Directory</displayName><dynamic>false</dynamic><name>Directory</name><required>true</required><sensitive>false</sensitive><supportsEl>true</supportsEl></value></entry><entry><key>Conflict Resolution Strategy</key><value><allowableValues><displayName>replace</displayName><value>replace</value></allowableValues><allowableValues><displayName>ignore</displayName><value>ignore</value></allowableValues><allowableValues><displayName>fail</displayName><value>fail</value></allowableValues><defaultValue>fail</defaultValue><description>Indicates what should happen when a file with the same name already exists in the output directory</description><displayName>Conflict Resolution Strategy</displayName><dynamic>false</dynamic><name>Conflict Resolution Strategy</name><required>true</required><sensitive>false</sensitive><supportsEl>false</supportsEl></value></entry><entry><key>Create Missing Directories</key><value><allowableValues><displayName>true</displayName><value>true</value></allowableValues><allowableValues><displayName>false</displayName><value>false</value></allowableValues><defaultValue>true</defaultValue><description>If true, then missing destination directories will be created. If false, flowfiles are penalized and sent to failure.</description><displayName>Create Missing Directories</displayName><dynamic>false</dynamic><name>Create Missing Directories</name><required>true</required><sensitive>false</sensitive><supportsEl>false</supportsEl></value></entry><entry><key>Maximum File Count</key><value><description>Specifies the maximum number of files that can exist in the output directory</description><displayName>Maximum File Count</displayName><dynamic>false</dynamic><name>Maximum File Count</name><required>false</required><sensitive>false</sensitive><supportsEl>false</supportsEl></value></entry><entry><key>Last Modified Time</key><value><description>Sets the lastModifiedTime on the output file to the value of this attribute. Format must be yyyy-MM-dd'T'HH:mm:ssZ. You may also use expression language such as ${file.lastModifiedTime}.</description><displayName>Last Modified Time</displayName><dynamic>false</dynamic><name>Last Modified Time</name><required>false</required><sensitive>false</sensitive><supportsEl>true</supportsEl></value></entry><entry><key>Permissions</key><value><description>Sets the permissions on the output file to the value of this attribute. Format must be either UNIX rwxrwxrwx with a - in place of denied permissions (e.g. rw-r--r--) or an octal number (e.g. 644). You may also use expression language such as ${file.permissions}.</description><displayName>Permissions</displayName><dynamic>false</dynamic><name>Permissions</name><required>false</required><sensitive>false</sensitive><supportsEl>true</supportsEl></value></entry><entry><key>Owner</key><value><description>Sets the owner on the output file to the value of this attribute. You may also use expression language such as ${file.owner}.</description><displayName>Owner</displayName><dynamic>false</dynamic><name>Owner</name><required>false</required><sensitive>false</sensitive><supportsEl>true</supportsEl></value></entry><entry><key>Group</key><value><description>Sets the group on the output file to the value of this attribute. You may also use expression language such as ${file.group}.</description><displayName>Group</displayName><dynamic>false</dynamic><name>Group</name><required>false</required><sensitive>false</sensitive><supportsEl>true</supportsEl></value></entry></descriptors><lossTolerant>false</lossTolerant><penaltyDuration>30 sec</penaltyDuration><properties><entry><key>Directory</key><value></value></entry><entry><key>Conflict Resolution Strategy</key><value>fail</value></entry><entry><key>Create Missing Directories</key><value>true</value></entry><entry><key>Maximum File Count</key></entry><entry><key>Last Modified Time</key></entry><entry><key>Permissions</key></entry><entry><key>Owner</key></entry><entry><key>Group</key></entry></properties><runDurationMillis>0</runDurationMillis><schedulingPeriod>0 sec</schedulingPeriod><schedulingStrategy>TIMER_DRIVEN</schedulingStrategy><yieldDuration>1 sec</yieldDuration></config><name>Put Raw File</name><relationships><autoTerminate>true</autoTerminate><description>Files that could not be written to the output directory for some reason are transferred to this relationship</description><name>failure</name></relationships><relationships><autoTerminate>true</autoTerminate><description>Files that have been successfully written to the output directory are transferred to this relationship</description><name>success</name></relationships><state>STOPPED</state><style/><supportsEventDriven>false</supportsEventDriven><supportsParallelProcessing>true</supportsParallelProcessing><type>org.apache.nifi.processors.standard.PutFile</type></processors></snippet><timestamp>07/10/2016 19:34:33 EDT</timestamp></template>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment