Skip to content

Instantly share code, notes, and snippets.

@mattyb149
Created July 11, 2018 16:23
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mattyb149/ee134bbe63a1315eb643a8141ed9ae13 to your computer and use it in GitHub Desktop.
Save mattyb149/ee134bbe63a1315eb643a8141ed9ae13 to your computer and use it in GitHub Desktop.
NiFi SplitRecord example that converts CSV to Avro while splitting files
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<template encoding-version="1.2">
<description></description>
<groupId>89f8c6b8-0164-1000-d937-82c55b6fdffd</groupId>
<name>SplitRecord_w_Conversion</name>
<snippet>
<controllerServices>
<id>350009ab-3f44-32bf-0000-000000000000</id>
<parentGroupId>fa0118f1-ff03-3030-0000-000000000000</parentGroupId>
<bundle>
<artifact>nifi-record-serialization-services-nar</artifact>
<group>org.apache.nifi</group>
<version>1.8.0-SNAPSHOT</version>
</bundle>
<comments></comments>
<descriptors>
<entry>
<key>schema-access-strategy</key>
<value>
<name>schema-access-strategy</name>
</value>
</entry>
<entry>
<key>schema-registry</key>
<value>
<identifiesControllerService>org.apache.nifi.schemaregistry.services.SchemaRegistry</identifiesControllerService>
<name>schema-registry</name>
</value>
</entry>
<entry>
<key>schema-name</key>
<value>
<name>schema-name</name>
</value>
</entry>
<entry>
<key>schema-version</key>
<value>
<name>schema-version</name>
</value>
</entry>
<entry>
<key>schema-branch</key>
<value>
<name>schema-branch</name>
</value>
</entry>
<entry>
<key>schema-text</key>
<value>
<name>schema-text</name>
</value>
</entry>
<entry>
<key>csv-reader-csv-parser</key>
<value>
<name>csv-reader-csv-parser</name>
</value>
</entry>
<entry>
<key>Date Format</key>
<value>
<name>Date Format</name>
</value>
</entry>
<entry>
<key>Time Format</key>
<value>
<name>Time Format</name>
</value>
</entry>
<entry>
<key>Timestamp Format</key>
<value>
<name>Timestamp Format</name>
</value>
</entry>
<entry>
<key>CSV Format</key>
<value>
<name>CSV Format</name>
</value>
</entry>
<entry>
<key>Value Separator</key>
<value>
<name>Value Separator</name>
</value>
</entry>
<entry>
<key>Skip Header Line</key>
<value>
<name>Skip Header Line</name>
</value>
</entry>
<entry>
<key>ignore-csv-header</key>
<value>
<name>ignore-csv-header</name>
</value>
</entry>
<entry>
<key>Quote Character</key>
<value>
<name>Quote Character</name>
</value>
</entry>
<entry>
<key>Escape Character</key>
<value>
<name>Escape Character</name>
</value>
</entry>
<entry>
<key>Comment Marker</key>
<value>
<name>Comment Marker</name>
</value>
</entry>
<entry>
<key>Null String</key>
<value>
<name>Null String</name>
</value>
</entry>
<entry>
<key>Trim Fields</key>
<value>
<name>Trim Fields</name>
</value>
</entry>
<entry>
<key>csvutils-character-set</key>
<value>
<name>csvutils-character-set</name>
</value>
</entry>
</descriptors>
<name>CSVReader</name>
<persistsState>false</persistsState>
<properties>
<entry>
<key>schema-access-strategy</key>
<value>schema-text-property</value>
</entry>
<entry>
<key>schema-registry</key>
</entry>
<entry>
<key>schema-name</key>
</entry>
<entry>
<key>schema-version</key>
</entry>
<entry>
<key>schema-branch</key>
</entry>
<entry>
<key>schema-text</key>
<value>${inferred.avro.schema}</value>
</entry>
<entry>
<key>csv-reader-csv-parser</key>
<value>jackson-csv</value>
</entry>
<entry>
<key>Date Format</key>
</entry>
<entry>
<key>Time Format</key>
</entry>
<entry>
<key>Timestamp Format</key>
</entry>
<entry>
<key>CSV Format</key>
</entry>
<entry>
<key>Value Separator</key>
</entry>
<entry>
<key>Skip Header Line</key>
<value>true</value>
</entry>
<entry>
<key>ignore-csv-header</key>
<value>true</value>
</entry>
<entry>
<key>Quote Character</key>
</entry>
<entry>
<key>Escape Character</key>
</entry>
<entry>
<key>Comment Marker</key>
</entry>
<entry>
<key>Null String</key>
</entry>
<entry>
<key>Trim Fields</key>
</entry>
<entry>
<key>csvutils-character-set</key>
</entry>
</properties>
<state>ENABLED</state>
<type>org.apache.nifi.csv.CSVReader</type>
</controllerServices>
<controllerServices>
<id>8cd2c752-582f-327b-0000-000000000000</id>
<parentGroupId>fa0118f1-ff03-3030-0000-000000000000</parentGroupId>
<bundle>
<artifact>nifi-record-serialization-services-nar</artifact>
<group>org.apache.nifi</group>
<version>1.8.0-SNAPSHOT</version>
</bundle>
<comments></comments>
<descriptors>
<entry>
<key>Schema Write Strategy</key>
<value>
<name>Schema Write Strategy</name>
</value>
</entry>
<entry>
<key>schema-access-strategy</key>
<value>
<name>schema-access-strategy</name>
</value>
</entry>
<entry>
<key>schema-registry</key>
<value>
<identifiesControllerService>org.apache.nifi.schemaregistry.services.SchemaRegistry</identifiesControllerService>
<name>schema-registry</name>
</value>
</entry>
<entry>
<key>schema-name</key>
<value>
<name>schema-name</name>
</value>
</entry>
<entry>
<key>schema-version</key>
<value>
<name>schema-version</name>
</value>
</entry>
<entry>
<key>schema-branch</key>
<value>
<name>schema-branch</name>
</value>
</entry>
<entry>
<key>schema-text</key>
<value>
<name>schema-text</name>
</value>
</entry>
<entry>
<key>compression-format</key>
<value>
<name>compression-format</name>
</value>
</entry>
</descriptors>
<name>AvroRecordSetWriter</name>
<persistsState>false</persistsState>
<properties>
<entry>
<key>Schema Write Strategy</key>
</entry>
<entry>
<key>schema-access-strategy</key>
</entry>
<entry>
<key>schema-registry</key>
</entry>
<entry>
<key>schema-name</key>
</entry>
<entry>
<key>schema-version</key>
</entry>
<entry>
<key>schema-branch</key>
</entry>
<entry>
<key>schema-text</key>
</entry>
<entry>
<key>compression-format</key>
</entry>
</properties>
<state>ENABLED</state>
<type>org.apache.nifi.avro.AvroRecordSetWriter</type>
</controllerServices>
<processors>
<id>8acee69c-23ad-315a-0000-000000000000</id>
<parentGroupId>fa0118f1-ff03-3030-0000-000000000000</parentGroupId>
<position>
<x>0.0</x>
<y>0.0</y>
</position>
<bundle>
<artifact>nifi-standard-nar</artifact>
<group>org.apache.nifi</group>
<version>1.8.0-SNAPSHOT</version>
</bundle>
<config>
<bulletinLevel>WARN</bulletinLevel>
<comments></comments>
<concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount>
<descriptors>
<entry>
<key>Record Reader</key>
<value>
<identifiesControllerService>org.apache.nifi.serialization.RecordReaderFactory</identifiesControllerService>
<name>Record Reader</name>
</value>
</entry>
<entry>
<key>Record Writer</key>
<value>
<identifiesControllerService>org.apache.nifi.serialization.RecordSetWriterFactory</identifiesControllerService>
<name>Record Writer</name>
</value>
</entry>
<entry>
<key>Records Per Split</key>
<value>
<name>Records Per Split</name>
</value>
</entry>
</descriptors>
<executionNode>ALL</executionNode>
<lossTolerant>false</lossTolerant>
<penaltyDuration>30 sec</penaltyDuration>
<properties>
<entry>
<key>Record Reader</key>
<value>350009ab-3f44-32bf-0000-000000000000</value>
</entry>
<entry>
<key>Record Writer</key>
<value>8cd2c752-582f-327b-0000-000000000000</value>
</entry>
<entry>
<key>Records Per Split</key>
<value>10000</value>
</entry>
</properties>
<runDurationMillis>0</runDurationMillis>
<schedulingPeriod>0 sec</schedulingPeriod>
<schedulingStrategy>TIMER_DRIVEN</schedulingStrategy>
<yieldDuration>1 sec</yieldDuration>
</config>
<executionNodeRestricted>false</executionNodeRestricted>
<name>SplitRecord</name>
<relationships>
<autoTerminate>false</autoTerminate>
<name>failure</name>
</relationships>
<relationships>
<autoTerminate>true</autoTerminate>
<name>original</name>
</relationships>
<relationships>
<autoTerminate>false</autoTerminate>
<name>splits</name>
</relationships>
<state>STOPPED</state>
<style/>
<type>org.apache.nifi.processors.standard.SplitRecord</type>
</processors>
</snippet>
<timestamp>07/11/2018 12:20:51 EDT</timestamp>
</template>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment