Skip to content

Instantly share code, notes, and snippets.

@arpieb
Created May 8, 2018 21:17
Show Gist options
  • Save arpieb/e812f48ef49b5f16057e7246607f9c69 to your computer and use it in GitHub Desktop.
Save arpieb/e812f48ef49b5f16057e7246607f9c69 to your computer and use it in GitHub Desktop.
Apache NiFi Template - GDELT 2.0 Data Download
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<template encoding-version="1.2">
<description></description>
<groupId>98771fb2-0162-1000-65a1-1228bdb238d4</groupId>
<name>GDELT 2.0 Data Download</name>
<snippet>
<connections>
<id>99327f17-9239-34b6-0000-000000000000</id>
<parentGroupId>5eca6977-2945-3990-0000-000000000000</parentGroupId>
<backPressureDataSizeThreshold>1 GB</backPressureDataSizeThreshold>
<backPressureObjectThreshold>10000</backPressureObjectThreshold>
<destination>
<groupId>5eca6977-2945-3990-0000-000000000000</groupId>
<id>1253949f-17b6-3aee-0000-000000000000</id>
<type>PROCESSOR</type>
</destination>
<flowFileExpiration>0 sec</flowFileExpiration>
<labelIndex>1</labelIndex>
<name></name>
<selectedRelationships>Response</selectedRelationships>
<source>
<groupId>5eca6977-2945-3990-0000-000000000000</groupId>
<id>9892f3ab-2005-34de-0000-000000000000</id>
<type>PROCESSOR</type>
</source>
<zIndex>0</zIndex>
</connections>
<connections>
<id>a939802c-1ec6-3828-0000-000000000000</id>
<parentGroupId>5eca6977-2945-3990-0000-000000000000</parentGroupId>
<backPressureDataSizeThreshold>1 GB</backPressureDataSizeThreshold>
<backPressureObjectThreshold>10000</backPressureObjectThreshold>
<bends>
<x>829.4226962795478</x>
<y>957.6170600254031</y>
</bends>
<bends>
<x>829.4226962795478</x>
<y>1007.6170600254031</y>
</bends>
<destination>
<groupId>5eca6977-2945-3990-0000-000000000000</groupId>
<id>9892f3ab-2005-34de-0000-000000000000</id>
<type>PROCESSOR</type>
</destination>
<flowFileExpiration>0 sec</flowFileExpiration>
<labelIndex>1</labelIndex>
<name></name>
<selectedRelationships>Retry</selectedRelationships>
<source>
<groupId>5eca6977-2945-3990-0000-000000000000</groupId>
<id>9892f3ab-2005-34de-0000-000000000000</id>
<type>PROCESSOR</type>
</source>
<zIndex>0</zIndex>
</connections>
<connections>
<id>ab03714d-c7d6-3882-0000-000000000000</id>
<parentGroupId>5eca6977-2945-3990-0000-000000000000</parentGroupId>
<backPressureDataSizeThreshold>1 GB</backPressureDataSizeThreshold>
<backPressureObjectThreshold>10000</backPressureObjectThreshold>
<destination>
<groupId>5eca6977-2945-3990-0000-000000000000</groupId>
<id>c254210d-44ae-35be-0000-000000000000</id>
<type>FUNNEL</type>
</destination>
<flowFileExpiration>0 sec</flowFileExpiration>
<labelIndex>1</labelIndex>
<name></name>
<selectedRelationships>success</selectedRelationships>
<source>
<groupId>5eca6977-2945-3990-0000-000000000000</groupId>
<id>27065e5a-8193-37f1-0000-000000000000</id>
<type>PROCESSOR</type>
</source>
<zIndex>0</zIndex>
</connections>
<connections>
<id>eca60c53-90cd-32c2-0000-000000000000</id>
<parentGroupId>5eca6977-2945-3990-0000-000000000000</parentGroupId>
<backPressureDataSizeThreshold>1 GB</backPressureDataSizeThreshold>
<backPressureObjectThreshold>10000</backPressureObjectThreshold>
<destination>
<groupId>5eca6977-2945-3990-0000-000000000000</groupId>
<id>b2983d3e-72b8-3391-0000-000000000000</id>
<type>PROCESSOR</type>
</destination>
<flowFileExpiration>0 sec</flowFileExpiration>
<labelIndex>1</labelIndex>
<name></name>
<selectedRelationships>success</selectedRelationships>
<source>
<groupId>5eca6977-2945-3990-0000-000000000000</groupId>
<id>1253949f-17b6-3aee-0000-000000000000</id>
<type>PROCESSOR</type>
</source>
<zIndex>0</zIndex>
</connections>
<connections>
<id>009a306e-5376-3af7-0000-000000000000</id>
<parentGroupId>5eca6977-2945-3990-0000-000000000000</parentGroupId>
<backPressureDataSizeThreshold>1 GB</backPressureDataSizeThreshold>
<backPressureObjectThreshold>10000</backPressureObjectThreshold>
<destination>
<groupId>5eca6977-2945-3990-0000-000000000000</groupId>
<id>9892f3ab-2005-34de-0000-000000000000</id>
<type>PROCESSOR</type>
</destination>
<flowFileExpiration>0 sec</flowFileExpiration>
<labelIndex>1</labelIndex>
<name></name>
<selectedRelationships>matched</selectedRelationships>
<source>
<groupId>5eca6977-2945-3990-0000-000000000000</groupId>
<id>6e24142e-c9f5-3a9d-0000-000000000000</id>
<type>PROCESSOR</type>
</source>
<zIndex>0</zIndex>
</connections>
<connections>
<id>6147492b-9d34-3396-0000-000000000000</id>
<parentGroupId>5eca6977-2945-3990-0000-000000000000</parentGroupId>
<backPressureDataSizeThreshold>1 GB</backPressureDataSizeThreshold>
<backPressureObjectThreshold>10000</backPressureObjectThreshold>
<destination>
<groupId>5eca6977-2945-3990-0000-000000000000</groupId>
<id>54f6611d-e7af-3fd5-0000-000000000000</id>
<type>PROCESSOR</type>
</destination>
<flowFileExpiration>0 sec</flowFileExpiration>
<labelIndex>1</labelIndex>
<name></name>
<source>
<groupId>5eca6977-2945-3990-0000-000000000000</groupId>
<id>c254210d-44ae-35be-0000-000000000000</id>
<type>FUNNEL</type>
</source>
<zIndex>0</zIndex>
</connections>
<connections>
<id>6be5f8c1-67a1-3ef4-0000-000000000000</id>
<parentGroupId>5eca6977-2945-3990-0000-000000000000</parentGroupId>
<backPressureDataSizeThreshold>1 GB</backPressureDataSizeThreshold>
<backPressureObjectThreshold>10000</backPressureObjectThreshold>
<destination>
<groupId>5eca6977-2945-3990-0000-000000000000</groupId>
<id>c254210d-44ae-35be-0000-000000000000</id>
<type>FUNNEL</type>
</destination>
<flowFileExpiration>0 sec</flowFileExpiration>
<labelIndex>1</labelIndex>
<name></name>
<selectedRelationships>success</selectedRelationships>
<source>
<groupId>5eca6977-2945-3990-0000-000000000000</groupId>
<id>2ab60b2a-144f-3b76-0000-000000000000</id>
<type>PROCESSOR</type>
</source>
<zIndex>0</zIndex>
</connections>
<connections>
<id>733d9115-5d49-3533-0000-000000000000</id>
<parentGroupId>5eca6977-2945-3990-0000-000000000000</parentGroupId>
<backPressureDataSizeThreshold>1 GB</backPressureDataSizeThreshold>
<backPressureObjectThreshold>10000</backPressureObjectThreshold>
<destination>
<groupId>5eca6977-2945-3990-0000-000000000000</groupId>
<id>6e24142e-c9f5-3a9d-0000-000000000000</id>
<type>PROCESSOR</type>
</destination>
<flowFileExpiration>0 sec</flowFileExpiration>
<labelIndex>1</labelIndex>
<name></name>
<selectedRelationships>splits</selectedRelationships>
<source>
<groupId>5eca6977-2945-3990-0000-000000000000</groupId>
<id>54f6611d-e7af-3fd5-0000-000000000000</id>
<type>PROCESSOR</type>
</source>
<zIndex>0</zIndex>
</connections>
<funnels>
<id>c254210d-44ae-35be-0000-000000000000</id>
<parentGroupId>5eca6977-2945-3990-0000-000000000000</parentGroupId>
<position>
<x>525.4226962795478</x>
<y>231.89324870662216</y>
</position>
</funnels>
<processors>
<id>9892f3ab-2005-34de-0000-000000000000</id>
<parentGroupId>5eca6977-2945-3990-0000-000000000000</parentGroupId>
<position>
<x>374.42269627954784</x>
<y>917.6170600254031</y>
</position>
<bundle>
<artifact>nifi-standard-nar</artifact>
<group>org.apache.nifi</group>
<version>1.6.0</version>
</bundle>
<config>
<bulletinLevel>WARN</bulletinLevel>
<comments></comments>
<concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount>
<descriptors>
<entry>
<key>HTTP Method</key>
<value>
<name>HTTP Method</name>
</value>
</entry>
<entry>
<key>Remote URL</key>
<value>
<name>Remote URL</name>
</value>
</entry>
<entry>
<key>SSL Context Service</key>
<value>
<identifiesControllerService>org.apache.nifi.ssl.SSLContextService</identifiesControllerService>
<name>SSL Context Service</name>
</value>
</entry>
<entry>
<key>Connection Timeout</key>
<value>
<name>Connection Timeout</name>
</value>
</entry>
<entry>
<key>Read Timeout</key>
<value>
<name>Read Timeout</name>
</value>
</entry>
<entry>
<key>Include Date Header</key>
<value>
<name>Include Date Header</name>
</value>
</entry>
<entry>
<key>Follow Redirects</key>
<value>
<name>Follow Redirects</name>
</value>
</entry>
<entry>
<key>Attributes to Send</key>
<value>
<name>Attributes to Send</name>
</value>
</entry>
<entry>
<key>Basic Authentication Username</key>
<value>
<name>Basic Authentication Username</name>
</value>
</entry>
<entry>
<key>Basic Authentication Password</key>
<value>
<name>Basic Authentication Password</name>
</value>
</entry>
<entry>
<key>Proxy Host</key>
<value>
<name>Proxy Host</name>
</value>
</entry>
<entry>
<key>Proxy Port</key>
<value>
<name>Proxy Port</name>
</value>
</entry>
<entry>
<key>Proxy Type</key>
<value>
<name>Proxy Type</name>
</value>
</entry>
<entry>
<key>invokehttp-proxy-user</key>
<value>
<name>invokehttp-proxy-user</name>
</value>
</entry>
<entry>
<key>invokehttp-proxy-password</key>
<value>
<name>invokehttp-proxy-password</name>
</value>
</entry>
<entry>
<key>Put Response Body In Attribute</key>
<value>
<name>Put Response Body In Attribute</name>
</value>
</entry>
<entry>
<key>Max Length To Put In Attribute</key>
<value>
<name>Max Length To Put In Attribute</name>
</value>
</entry>
<entry>
<key>Digest Authentication</key>
<value>
<name>Digest Authentication</name>
</value>
</entry>
<entry>
<key>Always Output Response</key>
<value>
<name>Always Output Response</name>
</value>
</entry>
<entry>
<key>Trusted Hostname</key>
<value>
<name>Trusted Hostname</name>
</value>
</entry>
<entry>
<key>Add Response Headers to Request</key>
<value>
<name>Add Response Headers to Request</name>
</value>
</entry>
<entry>
<key>Content-Type</key>
<value>
<name>Content-Type</name>
</value>
</entry>
<entry>
<key>send-message-body</key>
<value>
<name>send-message-body</name>
</value>
</entry>
<entry>
<key>Use Chunked Encoding</key>
<value>
<name>Use Chunked Encoding</name>
</value>
</entry>
<entry>
<key>Penalize on "No Retry"</key>
<value>
<name>Penalize on "No Retry"</name>
</value>
</entry>
<entry>
<key>use-etag</key>
<value>
<name>use-etag</name>
</value>
</entry>
<entry>
<key>etag-max-cache-size</key>
<value>
<name>etag-max-cache-size</name>
</value>
</entry>
</descriptors>
<executionNode>ALL</executionNode>
<lossTolerant>false</lossTolerant>
<penaltyDuration>30 sec</penaltyDuration>
<properties>
<entry>
<key>HTTP Method</key>
<value>GET</value>
</entry>
<entry>
<key>Remote URL</key>
<value>${regex.3}</value>
</entry>
<entry>
<key>SSL Context Service</key>
</entry>
<entry>
<key>Connection Timeout</key>
<value>5 secs</value>
</entry>
<entry>
<key>Read Timeout</key>
<value>15 secs</value>
</entry>
<entry>
<key>Include Date Header</key>
<value>True</value>
</entry>
<entry>
<key>Follow Redirects</key>
<value>True</value>
</entry>
<entry>
<key>Attributes to Send</key>
</entry>
<entry>
<key>Basic Authentication Username</key>
</entry>
<entry>
<key>Basic Authentication Password</key>
</entry>
<entry>
<key>Proxy Host</key>
</entry>
<entry>
<key>Proxy Port</key>
</entry>
<entry>
<key>Proxy Type</key>
<value>http</value>
</entry>
<entry>
<key>invokehttp-proxy-user</key>
</entry>
<entry>
<key>invokehttp-proxy-password</key>
</entry>
<entry>
<key>Put Response Body In Attribute</key>
</entry>
<entry>
<key>Max Length To Put In Attribute</key>
<value>256</value>
</entry>
<entry>
<key>Digest Authentication</key>
<value>false</value>
</entry>
<entry>
<key>Always Output Response</key>
<value>false</value>
</entry>
<entry>
<key>Trusted Hostname</key>
</entry>
<entry>
<key>Add Response Headers to Request</key>
<value>false</value>
</entry>
<entry>
<key>Content-Type</key>
<value>${mime.type}</value>
</entry>
<entry>
<key>send-message-body</key>
<value>true</value>
</entry>
<entry>
<key>Use Chunked Encoding</key>
<value>false</value>
</entry>
<entry>
<key>Penalize on "No Retry"</key>
<value>false</value>
</entry>
<entry>
<key>use-etag</key>
<value>false</value>
</entry>
<entry>
<key>etag-max-cache-size</key>
<value>10MB</value>
</entry>
</properties>
<runDurationMillis>0</runDurationMillis>
<schedulingPeriod>0 sec</schedulingPeriod>
<schedulingStrategy>TIMER_DRIVEN</schedulingStrategy>
<yieldDuration>1 sec</yieldDuration>
</config>
<name>Get GDELT Data from URL</name>
<relationships>
<autoTerminate>true</autoTerminate>
<name>Failure</name>
</relationships>
<relationships>
<autoTerminate>true</autoTerminate>
<name>No Retry</name>
</relationships>
<relationships>
<autoTerminate>true</autoTerminate>
<name>Original</name>
</relationships>
<relationships>
<autoTerminate>false</autoTerminate>
<name>Response</name>
</relationships>
<relationships>
<autoTerminate>false</autoTerminate>
<name>Retry</name>
</relationships>
<state>RUNNING</state>
<style/>
<type>org.apache.nifi.processors.standard.InvokeHTTP</type>
</processors>
<processors>
<id>b2983d3e-72b8-3391-0000-000000000000</id>
<parentGroupId>5eca6977-2945-3990-0000-000000000000</parentGroupId>
<position>
<x>374.42269627954784</x>
<y>1451.6170600254031</y>
</position>
<bundle>
<artifact>nifi-standard-nar</artifact>
<group>org.apache.nifi</group>
<version>1.6.0</version>
</bundle>
<config>
<bulletinLevel>WARN</bulletinLevel>
<comments></comments>
<concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount>
<descriptors>
<entry>
<key>Directory</key>
<value>
<name>Directory</name>
</value>
</entry>
<entry>
<key>Conflict Resolution Strategy</key>
<value>
<name>Conflict Resolution Strategy</name>
</value>
</entry>
<entry>
<key>Create Missing Directories</key>
<value>
<name>Create Missing Directories</name>
</value>
</entry>
<entry>
<key>Maximum File Count</key>
<value>
<name>Maximum File Count</name>
</value>
</entry>
<entry>
<key>Last Modified Time</key>
<value>
<name>Last Modified Time</name>
</value>
</entry>
<entry>
<key>Permissions</key>
<value>
<name>Permissions</name>
</value>
</entry>
<entry>
<key>Owner</key>
<value>
<name>Owner</name>
</value>
</entry>
<entry>
<key>Group</key>
<value>
<name>Group</name>
</value>
</entry>
</descriptors>
<executionNode>ALL</executionNode>
<lossTolerant>false</lossTolerant>
<penaltyDuration>30 sec</penaltyDuration>
<properties>
<entry>
<key>Directory</key>
<value>/data/rbates/gdelt</value>
</entry>
<entry>
<key>Conflict Resolution Strategy</key>
<value>fail</value>
</entry>
<entry>
<key>Create Missing Directories</key>
<value>true</value>
</entry>
<entry>
<key>Maximum File Count</key>
</entry>
<entry>
<key>Last Modified Time</key>
</entry>
<entry>
<key>Permissions</key>
<value>644</value>
</entry>
<entry>
<key>Owner</key>
<value>rbates</value>
</entry>
<entry>
<key>Group</key>
<value>rbates</value>
</entry>
</properties>
<runDurationMillis>0</runDurationMillis>
<schedulingPeriod>0 sec</schedulingPeriod>
<schedulingStrategy>TIMER_DRIVEN</schedulingStrategy>
<yieldDuration>1 sec</yieldDuration>
</config>
<name>Save GDELT Data</name>
<relationships>
<autoTerminate>true</autoTerminate>
<name>failure</name>
</relationships>
<relationships>
<autoTerminate>true</autoTerminate>
<name>success</name>
</relationships>
<state>RUNNING</state>
<style/>
<type>org.apache.nifi.processors.standard.PutFile</type>
</processors>
<processors>
<id>1253949f-17b6-3aee-0000-000000000000</id>
<parentGroupId>5eca6977-2945-3990-0000-000000000000</parentGroupId>
<position>
<x>374.42269627954784</x>
<y>1177.531140868199</y>
</position>
<bundle>
<artifact>nifi-update-attribute-nar</artifact>
<group>org.apache.nifi</group>
<version>1.6.0</version>
</bundle>
<config>
<bulletinLevel>WARN</bulletinLevel>
<comments></comments>
<concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount>
<descriptors>
<entry>
<key>Delete Attributes Expression</key>
<value>
<name>Delete Attributes Expression</name>
</value>
</entry>
<entry>
<key>Store State</key>
<value>
<name>Store State</name>
</value>
</entry>
<entry>
<key>Stateful Variables Initial Value</key>
<value>
<name>Stateful Variables Initial Value</name>
</value>
</entry>
<entry>
<key>filename</key>
<value>
<name>filename</name>
</value>
</entry>
</descriptors>
<executionNode>ALL</executionNode>
<lossTolerant>false</lossTolerant>
<penaltyDuration>30 sec</penaltyDuration>
<properties>
<entry>
<key>Delete Attributes Expression</key>
</entry>
<entry>
<key>Store State</key>
<value>Do not store state</value>
</entry>
<entry>
<key>Stateful Variables Initial Value</key>
</entry>
<entry>
<key>filename</key>
<value>${regex.4}</value>
</entry>
</properties>
<runDurationMillis>0</runDurationMillis>
<schedulingPeriod>0 sec</schedulingPeriod>
<schedulingStrategy>TIMER_DRIVEN</schedulingStrategy>
<yieldDuration>1 sec</yieldDuration>
</config>
<name>Update Flowfile Name from URL</name>
<relationships>
<autoTerminate>false</autoTerminate>
<name>success</name>
</relationships>
<state>RUNNING</state>
<style/>
<type>org.apache.nifi.processors.attributes.UpdateAttribute</type>
</processors>
<processors>
<id>27065e5a-8193-37f1-0000-000000000000</id>
<parentGroupId>5eca6977-2945-3990-0000-000000000000</parentGroupId>
<position>
<x>0.0</x>
<y>0.0</y>
</position>
<bundle>
<artifact>nifi-standard-nar</artifact>
<group>org.apache.nifi</group>
<version>1.6.0</version>
</bundle>
<config>
<bulletinLevel>WARN</bulletinLevel>
<comments></comments>
<concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount>
<descriptors>
<entry>
<key>URL</key>
<value>
<name>URL</name>
</value>
</entry>
<entry>
<key>Filename</key>
<value>
<name>Filename</name>
</value>
</entry>
<entry>
<key>SSL Context Service</key>
<value>
<identifiesControllerService>org.apache.nifi.ssl.SSLContextService</identifiesControllerService>
<name>SSL Context Service</name>
</value>
</entry>
<entry>
<key>Username</key>
<value>
<name>Username</name>
</value>
</entry>
<entry>
<key>Password</key>
<value>
<name>Password</name>
</value>
</entry>
<entry>
<key>Connection Timeout</key>
<value>
<name>Connection Timeout</name>
</value>
</entry>
<entry>
<key>Data Timeout</key>
<value>
<name>Data Timeout</name>
</value>
</entry>
<entry>
<key>User Agent</key>
<value>
<name>User Agent</name>
</value>
</entry>
<entry>
<key>Accept Content-Type</key>
<value>
<name>Accept Content-Type</name>
</value>
</entry>
<entry>
<key>Follow Redirects</key>
<value>
<name>Follow Redirects</name>
</value>
</entry>
<entry>
<key>redirect-cookie-policy</key>
<value>
<name>redirect-cookie-policy</name>
</value>
</entry>
<entry>
<key>Proxy Host</key>
<value>
<name>Proxy Host</name>
</value>
</entry>
<entry>
<key>Proxy Port</key>
<value>
<name>Proxy Port</name>
</value>
</entry>
</descriptors>
<executionNode>ALL</executionNode>
<lossTolerant>false</lossTolerant>
<penaltyDuration>30 sec</penaltyDuration>
<properties>
<entry>
<key>URL</key>
<value>http://data.gdeltproject.org/gdeltv2/lastupdate.txt</value>
</entry>
<entry>
<key>Filename</key>
<value>${now():format('yyyy-dd-MM')}-lastupdate.txt</value>
</entry>
<entry>
<key>SSL Context Service</key>
</entry>
<entry>
<key>Username</key>
</entry>
<entry>
<key>Password</key>
</entry>
<entry>
<key>Connection Timeout</key>
<value>30 sec</value>
</entry>
<entry>
<key>Data Timeout</key>
<value>30 sec</value>
</entry>
<entry>
<key>User Agent</key>
</entry>
<entry>
<key>Accept Content-Type</key>
</entry>
<entry>
<key>Follow Redirects</key>
<value>false</value>
</entry>
<entry>
<key>redirect-cookie-policy</key>
<value>default</value>
</entry>
<entry>
<key>Proxy Host</key>
</entry>
<entry>
<key>Proxy Port</key>
</entry>
</properties>
<runDurationMillis>0</runDurationMillis>
<schedulingPeriod>15 min</schedulingPeriod>
<schedulingStrategy>TIMER_DRIVEN</schedulingStrategy>
<yieldDuration>1 sec</yieldDuration>
</config>
<name>Last 15m – English</name>
<relationships>
<autoTerminate>false</autoTerminate>
<name>success</name>
</relationships>
<state>RUNNING</state>
<style/>
<type>org.apache.nifi.processors.standard.GetHTTP</type>
</processors>
<processors>
<id>2ab60b2a-144f-3b76-0000-000000000000</id>
<parentGroupId>5eca6977-2945-3990-0000-000000000000</parentGroupId>
<position>
<x>747.1084899902344</x>
<y>0.0</y>
</position>
<bundle>
<artifact>nifi-standard-nar</artifact>
<group>org.apache.nifi</group>
<version>1.6.0</version>
</bundle>
<config>
<bulletinLevel>WARN</bulletinLevel>
<comments></comments>
<concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount>
<descriptors>
<entry>
<key>URL</key>
<value>
<name>URL</name>
</value>
</entry>
<entry>
<key>Filename</key>
<value>
<name>Filename</name>
</value>
</entry>
<entry>
<key>SSL Context Service</key>
<value>
<identifiesControllerService>org.apache.nifi.ssl.SSLContextService</identifiesControllerService>
<name>SSL Context Service</name>
</value>
</entry>
<entry>
<key>Username</key>
<value>
<name>Username</name>
</value>
</entry>
<entry>
<key>Password</key>
<value>
<name>Password</name>
</value>
</entry>
<entry>
<key>Connection Timeout</key>
<value>
<name>Connection Timeout</name>
</value>
</entry>
<entry>
<key>Data Timeout</key>
<value>
<name>Data Timeout</name>
</value>
</entry>
<entry>
<key>User Agent</key>
<value>
<name>User Agent</name>
</value>
</entry>
<entry>
<key>Accept Content-Type</key>
<value>
<name>Accept Content-Type</name>
</value>
</entry>
<entry>
<key>Follow Redirects</key>
<value>
<name>Follow Redirects</name>
</value>
</entry>
<entry>
<key>redirect-cookie-policy</key>
<value>
<name>redirect-cookie-policy</name>
</value>
</entry>
<entry>
<key>Proxy Host</key>
<value>
<name>Proxy Host</name>
</value>
</entry>
<entry>
<key>Proxy Port</key>
<value>
<name>Proxy Port</name>
</value>
</entry>
</descriptors>
<executionNode>ALL</executionNode>
<lossTolerant>false</lossTolerant>
<penaltyDuration>30 sec</penaltyDuration>
<properties>
<entry>
<key>URL</key>
<value>http://data.gdeltproject.org/gdeltv2/lastupdate-translation.txt</value>
</entry>
<entry>
<key>Filename</key>
<value>${now():format('yyyy-dd-MM')}-lastupdate-translation.txt</value>
</entry>
<entry>
<key>SSL Context Service</key>
</entry>
<entry>
<key>Username</key>
</entry>
<entry>
<key>Password</key>
</entry>
<entry>
<key>Connection Timeout</key>
<value>30 sec</value>
</entry>
<entry>
<key>Data Timeout</key>
<value>30 sec</value>
</entry>
<entry>
<key>User Agent</key>
</entry>
<entry>
<key>Accept Content-Type</key>
</entry>
<entry>
<key>Follow Redirects</key>
<value>false</value>
</entry>
<entry>
<key>redirect-cookie-policy</key>
<value>default</value>
</entry>
<entry>
<key>Proxy Host</key>
</entry>
<entry>
<key>Proxy Port</key>
</entry>
</properties>
<runDurationMillis>0</runDurationMillis>
<schedulingPeriod>15 min</schedulingPeriod>
<schedulingStrategy>TIMER_DRIVEN</schedulingStrategy>
<yieldDuration>1 sec</yieldDuration>
</config>
<name>Last 15m – Translingual</name>
<relationships>
<autoTerminate>false</autoTerminate>
<name>success</name>
</relationships>
<state>RUNNING</state>
<style/>
<type>org.apache.nifi.processors.standard.GetHTTP</type>
</processors>
<processors>
<id>54f6611d-e7af-3fd5-0000-000000000000</id>
<parentGroupId>5eca6977-2945-3990-0000-000000000000</parentGroupId>
<position>
<x>374.42269627954784</x>
<y>399.3157196044922</y>
</position>
<bundle>
<artifact>nifi-standard-nar</artifact>
<group>org.apache.nifi</group>
<version>1.6.0</version>
</bundle>
<config>
<bulletinLevel>WARN</bulletinLevel>
<comments></comments>
<concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount>
<descriptors>
<entry>
<key>Line Split Count</key>
<value>
<name>Line Split Count</name>
</value>
</entry>
<entry>
<key>Maximum Fragment Size</key>
<value>
<name>Maximum Fragment Size</name>
</value>
</entry>
<entry>
<key>Header Line Count</key>
<value>
<name>Header Line Count</name>
</value>
</entry>
<entry>
<key>Header Line Marker Characters</key>
<value>
<name>Header Line Marker Characters</name>
</value>
</entry>
<entry>
<key>Remove Trailing Newlines</key>
<value>
<name>Remove Trailing Newlines</name>
</value>
</entry>
</descriptors>
<executionNode>ALL</executionNode>
<lossTolerant>false</lossTolerant>
<penaltyDuration>30 sec</penaltyDuration>
<properties>
<entry>
<key>Line Split Count</key>
<value>1</value>
</entry>
<entry>
<key>Maximum Fragment Size</key>
</entry>
<entry>
<key>Header Line Count</key>
<value>0</value>
</entry>
<entry>
<key>Header Line Marker Characters</key>
</entry>
<entry>
<key>Remove Trailing Newlines</key>
<value>true</value>
</entry>
</properties>
<runDurationMillis>0</runDurationMillis>
<schedulingPeriod>0 sec</schedulingPeriod>
<schedulingStrategy>TIMER_DRIVEN</schedulingStrategy>
<yieldDuration>1 sec</yieldDuration>
</config>
<name>Separate lines for URL listings</name>
<relationships>
<autoTerminate>true</autoTerminate>
<name>failure</name>
</relationships>
<relationships>
<autoTerminate>true</autoTerminate>
<name>original</name>
</relationships>
<relationships>
<autoTerminate>false</autoTerminate>
<name>splits</name>
</relationships>
<state>RUNNING</state>
<style/>
<type>org.apache.nifi.processors.standard.SplitText</type>
</processors>
<processors>
<id>6e24142e-c9f5-3a9d-0000-000000000000</id>
<parentGroupId>5eca6977-2945-3990-0000-000000000000</parentGroupId>
<position>
<x>374.42269627954784</x>
<y>652.6176053343637</y>
</position>
<bundle>
<artifact>nifi-standard-nar</artifact>
<group>org.apache.nifi</group>
<version>1.6.0</version>
</bundle>
<config>
<bulletinLevel>WARN</bulletinLevel>
<comments></comments>
<concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount>
<descriptors>
<entry>
<key>Character Set</key>
<value>
<name>Character Set</name>
</value>
</entry>
<entry>
<key>Maximum Buffer Size</key>
<value>
<name>Maximum Buffer Size</name>
</value>
</entry>
<entry>
<key>Maximum Capture Group Length</key>
<value>
<name>Maximum Capture Group Length</name>
</value>
</entry>
<entry>
<key>Enable Canonical Equivalence</key>
<value>
<name>Enable Canonical Equivalence</name>
</value>
</entry>
<entry>
<key>Enable Case-insensitive Matching</key>
<value>
<name>Enable Case-insensitive Matching</name>
</value>
</entry>
<entry>
<key>Permit Whitespace and Comments in Pattern</key>
<value>
<name>Permit Whitespace and Comments in Pattern</name>
</value>
</entry>
<entry>
<key>Enable DOTALL Mode</key>
<value>
<name>Enable DOTALL Mode</name>
</value>
</entry>
<entry>
<key>Enable Literal Parsing of the Pattern</key>
<value>
<name>Enable Literal Parsing of the Pattern</name>
</value>
</entry>
<entry>
<key>Enable Multiline Mode</key>
<value>
<name>Enable Multiline Mode</name>
</value>
</entry>
<entry>
<key>Enable Unicode-aware Case Folding</key>
<value>
<name>Enable Unicode-aware Case Folding</name>
</value>
</entry>
<entry>
<key>Enable Unicode Predefined Character Classes</key>
<value>
<name>Enable Unicode Predefined Character Classes</name>
</value>
</entry>
<entry>
<key>Enable Unix Lines Mode</key>
<value>
<name>Enable Unix Lines Mode</name>
</value>
</entry>
<entry>
<key>Include Capture Group 0</key>
<value>
<name>Include Capture Group 0</name>
</value>
</entry>
<entry>
<key>extract-text-enable-repeating-capture-group</key>
<value>
<name>extract-text-enable-repeating-capture-group</name>
</value>
</entry>
<entry>
<key>regex</key>
<value>
<name>regex</name>
</value>
</entry>
</descriptors>
<executionNode>ALL</executionNode>
<lossTolerant>false</lossTolerant>
<penaltyDuration>30 sec</penaltyDuration>
<properties>
<entry>
<key>Character Set</key>
<value>UTF-8</value>
</entry>
<entry>
<key>Maximum Buffer Size</key>
<value>1 MB</value>
</entry>
<entry>
<key>Maximum Capture Group Length</key>
<value>1024</value>
</entry>
<entry>
<key>Enable Canonical Equivalence</key>
<value>false</value>
</entry>
<entry>
<key>Enable Case-insensitive Matching</key>
<value>false</value>
</entry>
<entry>
<key>Permit Whitespace and Comments in Pattern</key>
<value>false</value>
</entry>
<entry>
<key>Enable DOTALL Mode</key>
<value>false</value>
</entry>
<entry>
<key>Enable Literal Parsing of the Pattern</key>
<value>false</value>
</entry>
<entry>
<key>Enable Multiline Mode</key>
<value>false</value>
</entry>
<entry>
<key>Enable Unicode-aware Case Folding</key>
<value>false</value>
</entry>
<entry>
<key>Enable Unicode Predefined Character Classes</key>
<value>false</value>
</entry>
<entry>
<key>Enable Unix Lines Mode</key>
<value>false</value>
</entry>
<entry>
<key>Include Capture Group 0</key>
<value>true</value>
</entry>
<entry>
<key>extract-text-enable-repeating-capture-group</key>
<value>false</value>
</entry>
<entry>
<key>regex</key>
<value>(\d+)\s+([0-9a-f]+)\s+((?:http|https):\/\/(?:[^\/]+\/)*(.*(?:export|mentions).*))</value>
</entry>
</properties>
<runDurationMillis>0</runDurationMillis>
<schedulingPeriod>0 sec</schedulingPeriod>
<schedulingStrategy>TIMER_DRIVEN</schedulingStrategy>
<yieldDuration>1 sec</yieldDuration>
</config>
<name>Extract URL from line</name>
<relationships>
<autoTerminate>false</autoTerminate>
<name>matched</name>
</relationships>
<relationships>
<autoTerminate>true</autoTerminate>
<name>unmatched</name>
</relationships>
<state>RUNNING</state>
<style/>
<type>org.apache.nifi.processors.standard.ExtractText</type>
</processors>
</snippet>
<timestamp>05/08/2018 17:13:58 EDT</timestamp>
</template>
@arpieb
Copy link
Author

arpieb commented May 8, 2018

Apache NiFi 1.6.0 Template for downloading event and mention 15m updates, in English and translated, from the GDELT 2.0 data service and storing them to disk.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment