Skip to content

Instantly share code, notes, and snippets.

@alopresto
Created February 18, 2017 00:08
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save alopresto/2f26863afee6cc3109c1a4c5d33c1234 to your computer and use it in GitHub Desktop.
Save alopresto/2f26863afee6cc3109c1a4c5d33c1234 to your computer and use it in GitHub Desktop.
Lists files from a directory, then updates the attributes so a processor can run a command against "pairs" of files (i.e. sam.txt and sam.txt.gz) from a single flowfile.
<?xml version="1.0" ?>
<template encoding-version="1.0">
<description>Lists files from a directory, then updates the attributes so a processor can run a command against "pairs" of files (i.e. sam.txt and sam.txt.gz) from a single flowfile. </description>
<groupId>4e3e7f99-015a-1000-b05e-27a45fe36d70</groupId>
<name>ListFiles</name>
<snippet>
<connections>
<id>015a1012-dfe4-1e6f-0000-000000000000</id>
<parentGroupId>4e3e7f99-015a-1000-0000-000000000000</parentGroupId>
<backPressureDataSizeThreshold>1 GB</backPressureDataSizeThreshold>
<backPressureObjectThreshold>10000</backPressureObjectThreshold>
<destination>
<groupId>4e3e7f99-015a-1000-0000-000000000000</groupId>
<id>015a1011-dfe4-1e6f-0000-000000000000</id>
<type>PROCESSOR</type>
</destination>
<flowFileExpiration>0 sec</flowFileExpiration>
<labelIndex>1</labelIndex>
<name></name>
<selectedRelationships>success</selectedRelationships>
<source>
<groupId>4e3e7f99-015a-1000-0000-000000000000</groupId>
<id>4e6fdfe4-015a-1000-0000-000000000000</id>
<type>PROCESSOR</type>
</source>
<zIndex>0</zIndex>
</connections>
<connections>
<id>015a1029-dfe4-1e6f-0000-000000000000</id>
<parentGroupId>4e3e7f99-015a-1000-0000-000000000000</parentGroupId>
<backPressureDataSizeThreshold>1 GB</backPressureDataSizeThreshold>
<backPressureObjectThreshold>10000</backPressureObjectThreshold>
<destination>
<groupId>4e3e7f99-015a-1000-0000-000000000000</groupId>
<id>015a1028-dfe4-1e6f-0000-000000000000</id>
<type>PROCESSOR</type>
</destination>
<flowFileExpiration>0 sec</flowFileExpiration>
<labelIndex>1</labelIndex>
<name></name>
<selectedRelationships>success</selectedRelationships>
<source>
<groupId>4e3e7f99-015a-1000-0000-000000000000</groupId>
<id>015a1011-dfe4-1e6f-0000-000000000000</id>
<type>PROCESSOR</type>
</source>
<zIndex>0</zIndex>
</connections>
<connections>
<id>015a102a-dfe4-1e6f-0000-000000000000</id>
<parentGroupId>4e3e7f99-015a-1000-0000-000000000000</parentGroupId>
<backPressureDataSizeThreshold>1 GB</backPressureDataSizeThreshold>
<backPressureObjectThreshold>10000</backPressureObjectThreshold>
<destination>
<groupId>4e3e7f99-015a-1000-0000-000000000000</groupId>
<id>015a1005-dfe4-1e6f-0000-000000000000</id>
<type>PROCESSOR</type>
</destination>
<flowFileExpiration>0 sec</flowFileExpiration>
<labelIndex>1</labelIndex>
<name></name>
<selectedRelationships>output stream</selectedRelationships>
<source>
<groupId>4e3e7f99-015a-1000-0000-000000000000</groupId>
<id>015a1028-dfe4-1e6f-0000-000000000000</id>
<type>PROCESSOR</type>
</source>
<zIndex>0</zIndex>
</connections>
<connections>
<id>4e7020c6-015a-1000-0000-000000000000</id>
<parentGroupId>4e3e7f99-015a-1000-0000-000000000000</parentGroupId>
<backPressureDataSizeThreshold>1 GB</backPressureDataSizeThreshold>
<backPressureObjectThreshold>10000</backPressureObjectThreshold>
<destination>
<groupId>4e3e7f99-015a-1000-0000-000000000000</groupId>
<id>4e6fdfe4-015a-1000-0000-000000000000</id>
<type>PROCESSOR</type>
</destination>
<flowFileExpiration>0 sec</flowFileExpiration>
<labelIndex>1</labelIndex>
<name></name>
<selectedRelationships>success</selectedRelationships>
<source>
<groupId>4e3e7f99-015a-1000-0000-000000000000</groupId>
<id>4e6f0fcf-015a-1000-0000-000000000000</id>
<type>PROCESSOR</type>
</source>
<zIndex>0</zIndex>
</connections>
<labels>
<id>015a102b-dfe4-1e6f-0000-000000000000</id>
<parentGroupId>4e3e7f99-015a-1000-0000-000000000000</parentGroupId>
<position>
<x>0.0</x>
<y>571.6000076293946</y>
</position>
<height>224.3999481201172</height>
<label>You could also fetch the files here, etc.</label>
<style>
<entry>
<key>font-size</key>
<value>12px</value>
</entry>
</style>
<width>539.9999389648438</width>
</labels>
<processors>
<id>015a1005-dfe4-1e6f-0000-000000000000</id>
<parentGroupId>4e3e7f99-015a-1000-0000-000000000000</parentGroupId>
<position>
<x>157.1999954223635</x>
<y>856.799934387207</y>
</position>
<config>
<bulletinLevel>WARN</bulletinLevel>
<comments></comments>
<concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount>
<descriptors>
<entry>
<key>Log Level</key>
<value>
<name>Log Level</name>
</value>
</entry>
<entry>
<key>Log Payload</key>
<value>
<name>Log Payload</name>
</value>
</entry>
<entry>
<key>Attributes to Log</key>
<value>
<name>Attributes to Log</name>
</value>
</entry>
<entry>
<key>Attributes to Ignore</key>
<value>
<name>Attributes to Ignore</name>
</value>
</entry>
<entry>
<key>Log prefix</key>
<value>
<name>Log prefix</name>
</value>
</entry>
</descriptors>
<executionNode>ALL</executionNode>
<lossTolerant>false</lossTolerant>
<penaltyDuration>30 sec</penaltyDuration>
<properties>
<entry>
<key>Log Level</key>
<value>info</value>
</entry>
<entry>
<key>Log Payload</key>
<value>true</value>
</entry>
<entry>
<key>Attributes to Log</key>
</entry>
<entry>
<key>Attributes to Ignore</key>
</entry>
<entry>
<key>Log prefix</key>
</entry>
</properties>
<runDurationMillis>0</runDurationMillis>
<schedulingPeriod>0 sec</schedulingPeriod>
<schedulingStrategy>TIMER_DRIVEN</schedulingStrategy>
<yieldDuration>1 sec</yieldDuration>
</config>
<name>LogAttribute</name>
<relationships>
<autoTerminate>true</autoTerminate>
<name>success</name>
</relationships>
<style></style>
<type>org.apache.nifi.processors.standard.LogAttribute</type>
</processors>
<processors>
<id>015a1011-dfe4-1e6f-0000-000000000000</id>
<parentGroupId>4e3e7f99-015a-1000-0000-000000000000</parentGroupId>
<position>
<x>157.1999954223635</x>
<y>405.59998321533203</y>
</position>
<config>
<bulletinLevel>WARN</bulletinLevel>
<comments></comments>
<concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount>
<descriptors>
<entry>
<key>Delete Attributes Expression</key>
<value>
<name>Delete Attributes Expression</name>
</value>
</entry>
<entry>
<key>Store State</key>
<value>
<name>Store State</name>
</value>
</entry>
<entry>
<key>Stateful Variables Initial Value</key>
<value>
<name>Stateful Variables Initial Value</name>
</value>
</entry>
<entry>
<key>gzip_filename</key>
<value>
<name>gzip_filename</name>
</value>
</entry>
<entry>
<key>image_filename</key>
<value>
<name>image_filename</name>
</value>
</entry>
</descriptors>
<executionNode>ALL</executionNode>
<lossTolerant>false</lossTolerant>
<penaltyDuration>30 sec</penaltyDuration>
<properties>
<entry>
<key>Delete Attributes Expression</key>
</entry>
<entry>
<key>Store State</key>
<value>Do not store state</value>
</entry>
<entry>
<key>Stateful Variables Initial Value</key>
</entry>
<entry>
<key>gzip_filename</key>
<value>${filename}.gz</value>
</entry>
<entry>
<key>image_filename</key>
<value>${filename}</value>
</entry>
</properties>
<runDurationMillis>0</runDurationMillis>
<schedulingPeriod>0 sec</schedulingPeriod>
<schedulingStrategy>TIMER_DRIVEN</schedulingStrategy>
<yieldDuration>1 sec</yieldDuration>
</config>
<name>UpdateAttribute</name>
<relationships>
<autoTerminate>false</autoTerminate>
<name>success</name>
</relationships>
<style></style>
<type>org.apache.nifi.processors.attributes.UpdateAttribute</type>
</processors>
<processors>
<id>015a1028-dfe4-1e6f-0000-000000000000</id>
<parentGroupId>4e3e7f99-015a-1000-0000-000000000000</parentGroupId>
<position>
<x>157.1999954223635</x>
<y>624.0000076293945</y>
</position>
<config>
<bulletinLevel>WARN</bulletinLevel>
<comments></comments>
<concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount>
<descriptors>
<entry>
<key>Command Arguments</key>
<value>
<name>Command Arguments</name>
</value>
</entry>
<entry>
<key>Command Path</key>
<value>
<name>Command Path</name>
</value>
</entry>
<entry>
<key>Ignore STDIN</key>
<value>
<name>Ignore STDIN</name>
</value>
</entry>
<entry>
<key>Working Directory</key>
<value>
<name>Working Directory</name>
</value>
</entry>
<entry>
<key>Argument Delimiter</key>
<value>
<name>Argument Delimiter</name>
</value>
</entry>
<entry>
<key>Output Destination Attribute</key>
<value>
<name>Output Destination Attribute</name>
</value>
</entry>
<entry>
<key>Max Attribute Length</key>
<value>
<name>Max Attribute Length</name>
</value>
</entry>
</descriptors>
<executionNode>ALL</executionNode>
<lossTolerant>false</lossTolerant>
<penaltyDuration>30 sec</penaltyDuration>
<properties>
<entry>
<key>Command Arguments</key>
<value>--image ${image_filename} --gzip ${gzip_filename}</value>
</entry>
<entry>
<key>Command Path</key>
<value>echo</value>
</entry>
<entry>
<key>Ignore STDIN</key>
<value>false</value>
</entry>
<entry>
<key>Working Directory</key>
</entry>
<entry>
<key>Argument Delimiter</key>
<value>;</value>
</entry>
<entry>
<key>Output Destination Attribute</key>
</entry>
<entry>
<key>Max Attribute Length</key>
<value>256</value>
</entry>
</properties>
<runDurationMillis>0</runDurationMillis>
<schedulingPeriod>0 sec</schedulingPeriod>
<schedulingStrategy>TIMER_DRIVEN</schedulingStrategy>
<yieldDuration>1 sec</yieldDuration>
</config>
<name>ExecuteStreamCommand</name>
<relationships>
<autoTerminate>true</autoTerminate>
<name>original</name>
</relationships>
<relationships>
<autoTerminate>false</autoTerminate>
<name>output stream</name>
</relationships>
<style></style>
<type>org.apache.nifi.processors.standard.ExecuteStreamCommand</type>
</processors>
<processors>
<id>4e6f0fcf-015a-1000-0000-000000000000</id>
<parentGroupId>4e3e7f99-015a-1000-0000-000000000000</parentGroupId>
<position>
<x>157.1999954223635</x>
<y>0.0</y>
</position>
<config>
<bulletinLevel>WARN</bulletinLevel>
<comments></comments>
<concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount>
<descriptors>
<entry>
<key>Input Directory</key>
<value>
<name>Input Directory</name>
</value>
</entry>
<entry>
<key>Recurse Subdirectories</key>
<value>
<name>Recurse Subdirectories</name>
</value>
</entry>
<entry>
<key>Input Directory Location</key>
<value>
<name>Input Directory Location</name>
</value>
</entry>
<entry>
<key>File Filter</key>
<value>
<name>File Filter</name>
</value>
</entry>
<entry>
<key>Path Filter</key>
<value>
<name>Path Filter</name>
</value>
</entry>
<entry>
<key>Minimum File Age</key>
<value>
<name>Minimum File Age</name>
</value>
</entry>
<entry>
<key>Maximum File Age</key>
<value>
<name>Maximum File Age</name>
</value>
</entry>
<entry>
<key>Minimum File Size</key>
<value>
<name>Minimum File Size</name>
</value>
</entry>
<entry>
<key>Maximum File Size</key>
<value>
<name>Maximum File Size</name>
</value>
</entry>
<entry>
<key>Ignore Hidden Files</key>
<value>
<name>Ignore Hidden Files</name>
</value>
</entry>
</descriptors>
<executionNode>ALL</executionNode>
<lossTolerant>false</lossTolerant>
<penaltyDuration>30 sec</penaltyDuration>
<properties>
<entry>
<key>Input Directory</key>
<value>/Users/alopresto/Workspace/scratch/listfiles</value>
</entry>
<entry>
<key>Recurse Subdirectories</key>
<value>true</value>
</entry>
<entry>
<key>Input Directory Location</key>
<value>Local</value>
</entry>
<entry>
<key>File Filter</key>
<value>[^\.]*\.txt</value>
</entry>
<entry>
<key>Path Filter</key>
</entry>
<entry>
<key>Minimum File Age</key>
<value>0 sec</value>
</entry>
<entry>
<key>Maximum File Age</key>
</entry>
<entry>
<key>Minimum File Size</key>
<value>0 B</value>
</entry>
<entry>
<key>Maximum File Size</key>
</entry>
<entry>
<key>Ignore Hidden Files</key>
<value>true</value>
</entry>
</properties>
<runDurationMillis>0</runDurationMillis>
<schedulingPeriod>0 sec</schedulingPeriod>
<schedulingStrategy>TIMER_DRIVEN</schedulingStrategy>
<yieldDuration>1 sec</yieldDuration>
</config>
<name>List Images (Not Gzipped)</name>
<relationships>
<autoTerminate>false</autoTerminate>
<name>success</name>
</relationships>
<style></style>
<type>org.apache.nifi.processors.standard.ListFile</type>
</processors>
<processors>
<id>4e6fdfe4-015a-1000-0000-000000000000</id>
<parentGroupId>4e3e7f99-015a-1000-0000-000000000000</parentGroupId>
<position>
<x>157.1999954223635</x>
<y>202.79996490478516</y>
</position>
<config>
<bulletinLevel>WARN</bulletinLevel>
<comments></comments>
<concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount>
<descriptors>
<entry>
<key>Log Level</key>
<value>
<name>Log Level</name>
</value>
</entry>
<entry>
<key>Log Payload</key>
<value>
<name>Log Payload</name>
</value>
</entry>
<entry>
<key>Attributes to Log</key>
<value>
<name>Attributes to Log</name>
</value>
</entry>
<entry>
<key>Attributes to Ignore</key>
<value>
<name>Attributes to Ignore</name>
</value>
</entry>
<entry>
<key>Log prefix</key>
<value>
<name>Log prefix</name>
</value>
</entry>
</descriptors>
<executionNode>ALL</executionNode>
<lossTolerant>false</lossTolerant>
<penaltyDuration>30 sec</penaltyDuration>
<properties>
<entry>
<key>Log Level</key>
<value>info</value>
</entry>
<entry>
<key>Log Payload</key>
<value>true</value>
</entry>
<entry>
<key>Attributes to Log</key>
</entry>
<entry>
<key>Attributes to Ignore</key>
</entry>
<entry>
<key>Log prefix</key>
</entry>
</properties>
<runDurationMillis>0</runDurationMillis>
<schedulingPeriod>0 sec</schedulingPeriod>
<schedulingStrategy>TIMER_DRIVEN</schedulingStrategy>
<yieldDuration>1 sec</yieldDuration>
</config>
<name>LogAttribute</name>
<relationships>
<autoTerminate>false</autoTerminate>
<name>success</name>
</relationships>
<style></style>
<type>org.apache.nifi.processors.standard.LogAttribute</type>
</processors>
</snippet>
<timestamp>02/17/2017 16:07:39 PST</timestamp>
</template>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment