Skip to content

Instantly share code, notes, and snippets.

@jfrazee
Last active July 28, 2016 14:40
Show Gist options
  • Save jfrazee/a501336826aa77d0daa74e8e0aa3afd0 to your computer and use it in GitHub Desktop.
Save jfrazee/a501336826aa77d0daa74e8e0aa3afd0 to your computer and use it in GitHub Desktop.
NIFI-2142 Cache compiled XSLT in TransformXml
<?xml version="1.0" ?>
<template encoding-version="1.0">
<description>Example of running TransformXml with and without caching improvements added in NIFI-2142</description>
<groupId>d5323c9a-890e-4205-b824-c45be4fa5953</groupId>
<name>TransformXml with Caching Example</name>
<snippet>
<connections>
<id>31c5f25a-0156-1000-0000-000000000000</id>
<parentGroupId>d5323c9a-890e-4205-b824-c45be4fa5953</parentGroupId>
<backPressureDataSizeThreshold>1 GB</backPressureDataSizeThreshold>
<backPressureObjectThreshold>10000</backPressureObjectThreshold>
<destination>
<groupId>d5323c9a-890e-4205-b824-c45be4fa5953</groupId>
<id>31c5ddbb-0156-1000-0000-000000000000</id>
<type>PROCESSOR</type>
</destination>
<flowFileExpiration>0 sec</flowFileExpiration>
<labelIndex>1</labelIndex>
<name></name>
<selectedRelationships>success</selectedRelationships>
<source>
<groupId>d5323c9a-890e-4205-b824-c45be4fa5953</groupId>
<id>31c3537a-0156-1000-0000-000000000000</id>
<type>PROCESSOR</type>
</source>
<zIndex>0</zIndex>
</connections>
<connections>
<id>31c6054f-0156-1000-0000-000000000000</id>
<parentGroupId>d5323c9a-890e-4205-b824-c45be4fa5953</parentGroupId>
<backPressureDataSizeThreshold>1 GB</backPressureDataSizeThreshold>
<backPressureObjectThreshold>10000</backPressureObjectThreshold>
<destination>
<groupId>d5323c9a-890e-4205-b824-c45be4fa5953</groupId>
<id>31c5ddbb-0156-1000-0000-000000000000</id>
<type>PROCESSOR</type>
</destination>
<flowFileExpiration>0 sec</flowFileExpiration>
<labelIndex>1</labelIndex>
<name></name>
<selectedRelationships>success</selectedRelationships>
<source>
<groupId>d5323c9a-890e-4205-b824-c45be4fa5953</groupId>
<id>01561000-537a-11c3-0000-000000000000</id>
<type>PROCESSOR</type>
</source>
<zIndex>0</zIndex>
</connections>
<connections>
<id>31c9b4b9-0156-1000-0000-000000000000</id>
<parentGroupId>d5323c9a-890e-4205-b824-c45be4fa5953</parentGroupId>
<backPressureDataSizeThreshold>1 GB</backPressureDataSizeThreshold>
<backPressureObjectThreshold>10000</backPressureObjectThreshold>
<destination>
<groupId>d5323c9a-890e-4205-b824-c45be4fa5953</groupId>
<id>31c904f9-0156-1000-0000-000000000000</id>
<type>PROCESSOR</type>
</destination>
<flowFileExpiration>0 sec</flowFileExpiration>
<labelIndex>1</labelIndex>
<name></name>
<selectedRelationships>success</selectedRelationships>
<source>
<groupId>d5323c9a-890e-4205-b824-c45be4fa5953</groupId>
<id>31c1d738-0156-1000-0000-000000000000</id>
<type>PROCESSOR</type>
</source>
<zIndex>0</zIndex>
</connections>
<connections>
<id>31cde82d-0156-1000-0000-000000000000</id>
<parentGroupId>d5323c9a-890e-4205-b824-c45be4fa5953</parentGroupId>
<backPressureDataSizeThreshold>1 GB</backPressureDataSizeThreshold>
<backPressureObjectThreshold>10000</backPressureObjectThreshold>
<destination>
<groupId>d5323c9a-890e-4205-b824-c45be4fa5953</groupId>
<id>31cd4eac-0156-1000-0000-000000000000</id>
<type>PROCESSOR</type>
</destination>
<flowFileExpiration>0 sec</flowFileExpiration>
<labelIndex>1</labelIndex>
<name></name>
<selectedRelationships>success</selectedRelationships>
<source>
<groupId>d5323c9a-890e-4205-b824-c45be4fa5953</groupId>
<id>31c904f9-0156-1000-0000-000000000000</id>
<type>PROCESSOR</type>
</source>
<zIndex>0</zIndex>
</connections>
<connections>
<id>31ddd9e3-0156-1000-0000-000000000000</id>
<parentGroupId>d5323c9a-890e-4205-b824-c45be4fa5953</parentGroupId>
<backPressureDataSizeThreshold>1 GB</backPressureDataSizeThreshold>
<backPressureObjectThreshold>10000</backPressureObjectThreshold>
<destination>
<groupId>d5323c9a-890e-4205-b824-c45be4fa5953</groupId>
<id>31c3537a-0156-1000-0000-000000000000</id>
<type>PROCESSOR</type>
</destination>
<flowFileExpiration>0 sec</flowFileExpiration>
<labelIndex>1</labelIndex>
<name></name>
<selectedRelationships>success</selectedRelationships>
<source>
<groupId>d5323c9a-890e-4205-b824-c45be4fa5953</groupId>
<id>31ddb93b-0156-1000-0000-000000000000</id>
<type>PROCESSOR</type>
</source>
<zIndex>0</zIndex>
</connections>
<connections>
<id>31dde503-0156-1000-0000-000000000000</id>
<parentGroupId>d5323c9a-890e-4205-b824-c45be4fa5953</parentGroupId>
<backPressureDataSizeThreshold>1 GB</backPressureDataSizeThreshold>
<backPressureObjectThreshold>10000</backPressureObjectThreshold>
<destination>
<groupId>d5323c9a-890e-4205-b824-c45be4fa5953</groupId>
<id>01561000-537a-11c3-0000-000000000000</id>
<type>PROCESSOR</type>
</destination>
<flowFileExpiration>0 sec</flowFileExpiration>
<labelIndex>1</labelIndex>
<name></name>
<selectedRelationships>success</selectedRelationships>
<source>
<groupId>d5323c9a-890e-4205-b824-c45be4fa5953</groupId>
<id>31ddb93b-0156-1000-0000-000000000000</id>
<type>PROCESSOR</type>
</source>
<zIndex>0</zIndex>
</connections>
<connections>
<id>31ddfdaf-0156-1000-0000-000000000000</id>
<parentGroupId>d5323c9a-890e-4205-b824-c45be4fa5953</parentGroupId>
<backPressureDataSizeThreshold>1 GB</backPressureDataSizeThreshold>
<backPressureObjectThreshold>10000</backPressureObjectThreshold>
<destination>
<groupId>d5323c9a-890e-4205-b824-c45be4fa5953</groupId>
<id>31ddb93b-0156-1000-0000-000000000000</id>
<type>PROCESSOR</type>
</destination>
<flowFileExpiration>0 sec</flowFileExpiration>
<labelIndex>1</labelIndex>
<name></name>
<selectedRelationships>success</selectedRelationships>
<source>
<groupId>d5323c9a-890e-4205-b824-c45be4fa5953</groupId>
<id>31cd4eac-0156-1000-0000-000000000000</id>
<type>PROCESSOR</type>
</source>
<zIndex>0</zIndex>
</connections>
<processors>
<id>01561000-537a-11c3-0000-000000000000</id>
<parentGroupId>d5323c9a-890e-4205-b824-c45be4fa5953</parentGroupId>
<position>
<x>1107.5198669433594</x>
<y>512.7399761199952</y>
</position>
<config>
<bulletinLevel>WARN</bulletinLevel>
<comments></comments>
<concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount>
<lossTolerant>false</lossTolerant>
<penaltyDuration>30 sec</penaltyDuration>
<properties>
<entry>
<key>XSLT file name</key>
<value>/tmp/xml2json.xsl</value>
</entry>
<entry>
<key>indent-output</key>
<value>true</value>
</entry>
<entry>
<key>cache-size</key>
<value>100</value>
</entry>
<entry>
<key>cache-ttl-after-last-access</key>
<value>60 secs</value>
</entry>
</properties>
<runDurationMillis>0</runDurationMillis>
<schedulingPeriod>0 sec</schedulingPeriod>
<schedulingStrategy>TIMER_DRIVEN</schedulingStrategy>
<yieldDuration>1 sec</yieldDuration>
</config>
<name>Transform XML to JSON w/ cache</name>
<relationships>
<autoTerminate>true</autoTerminate>
<name>failure</name>
</relationships>
<relationships>
<autoTerminate>false</autoTerminate>
<name>success</name>
</relationships>
<style></style>
<type>org.apache.nifi.processors.standard.TransformXml</type>
</processors>
<processors>
<id>31c1d738-0156-1000-0000-000000000000</id>
<parentGroupId>d5323c9a-890e-4205-b824-c45be4fa5953</parentGroupId>
<position>
<x>0.0</x>
<y>0.0</y>
</position>
<config>
<bulletinLevel>WARN</bulletinLevel>
<comments></comments>
<concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount>
<lossTolerant>false</lossTolerant>
<penaltyDuration>30 sec</penaltyDuration>
<properties>
<entry>
<key>File Size</key>
<value>0B</value>
</entry>
<entry>
<key>Batch Size</key>
<value>1</value>
</entry>
<entry>
<key>Data Format</key>
<value>Text</value>
</entry>
<entry>
<key>Unique FlowFiles</key>
<value>false</value>
</entry>
</properties>
<runDurationMillis>0</runDurationMillis>
<schedulingPeriod>1 sec</schedulingPeriod>
<schedulingStrategy>TIMER_DRIVEN</schedulingStrategy>
<yieldDuration>1 sec</yieldDuration>
</config>
<name>Generate 1 FlowFile per sec</name>
<relationships>
<autoTerminate>false</autoTerminate>
<name>success</name>
</relationships>
<style></style>
<type>org.apache.nifi.processors.standard.GenerateFlowFile</type>
</processors>
<processors>
<id>31c3537a-0156-1000-0000-000000000000</id>
<parentGroupId>d5323c9a-890e-4205-b824-c45be4fa5953</parentGroupId>
<position>
<x>365.19978637695317</x>
<y>512.4999980926514</y>
</position>
<config>
<bulletinLevel>WARN</bulletinLevel>
<comments></comments>
<concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount>
<lossTolerant>false</lossTolerant>
<penaltyDuration>30 sec</penaltyDuration>
<properties>
<entry>
<key>XSLT file name</key>
<value>/tmp/xml2json.xsl</value>
</entry>
<entry>
<key>indent-output</key>
<value>true</value>
</entry>
<entry>
<key>cache-size</key>
<value>0</value>
</entry>
<entry>
<key>cache-ttl-after-last-access</key>
<value>60 secs</value>
</entry>
</properties>
<runDurationMillis>0</runDurationMillis>
<schedulingPeriod>0 sec</schedulingPeriod>
<schedulingStrategy>TIMER_DRIVEN</schedulingStrategy>
<yieldDuration>1 sec</yieldDuration>
</config>
<name>Transform XML to JSON w/o cache</name>
<relationships>
<autoTerminate>true</autoTerminate>
<name>failure</name>
</relationships>
<relationships>
<autoTerminate>false</autoTerminate>
<name>success</name>
</relationships>
<style></style>
<type>org.apache.nifi.processors.standard.TransformXml</type>
</processors>
<processors>
<id>31c5ddbb-0156-1000-0000-000000000000</id>
<parentGroupId>d5323c9a-890e-4205-b824-c45be4fa5953</parentGroupId>
<position>
<x>738.6398986816406</x>
<y>760.6600353240967</y>
</position>
<config>
<bulletinLevel>WARN</bulletinLevel>
<comments></comments>
<concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount>
<lossTolerant>false</lossTolerant>
<penaltyDuration>30 sec</penaltyDuration>
<properties>
<entry>
<key>Delete Attributes Expression</key>
</entry>
<entry>
<key>mime.extension</key>
<value>.json</value>
</entry>
<entry>
<key>mime.type</key>
<value>application/json</value>
</entry>
</properties>
<runDurationMillis>0</runDurationMillis>
<schedulingPeriod>0 sec</schedulingPeriod>
<schedulingStrategy>TIMER_DRIVEN</schedulingStrategy>
<yieldDuration>1 sec</yieldDuration>
</config>
<name>Set mime.type to application/json</name>
<relationships>
<autoTerminate>true</autoTerminate>
<name>success</name>
</relationships>
<style></style>
<type>org.apache.nifi.processors.attributes.UpdateAttribute</type>
</processors>
<processors>
<id>31c904f9-0156-1000-0000-000000000000</id>
<parentGroupId>d5323c9a-890e-4205-b824-c45be4fa5953</parentGroupId>
<position>
<x>726.7799145507812</x>
<y>1.5600074768066463</y>
</position>
<config>
<bulletinLevel>WARN</bulletinLevel>
<comments></comments>
<concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount>
<lossTolerant>false</lossTolerant>
<penaltyDuration>30 sec</penaltyDuration>
<properties>
<entry>
<key>Regular Expression</key>
<value>(?s:^.*$)</value>
</entry>
<entry>
<key>Replacement Value</key>
<value>&lt;?xml version="1.0" encoding="UTF-8"?&gt;
&lt;messages&gt;
&lt;message id="1"&gt;
&lt;text&gt;lions&lt;/text&gt;
&lt;code&gt;L&lt;/code&gt;
&lt;/message&gt;
&lt;message id="2"&gt;
&lt;text&gt;tigers&lt;/text&gt;
&lt;code&gt;T&lt;/code&gt;
&lt;/message&gt;
&lt;message id="3"&gt;
&lt;text&gt;bears&lt;/text&gt;
&lt;code&gt;B&lt;/code&gt;
&lt;/message&gt;
&lt;message id="4"&gt;
&lt;text&gt;oh, my!&lt;/text&gt;
&lt;code&gt;O&lt;/code&gt;
&lt;/message&gt;
&lt;/messages&gt;</value>
</entry>
<entry>
<key>Character Set</key>
<value>UTF-8</value>
</entry>
<entry>
<key>Maximum Buffer Size</key>
<value>1 MB</value>
</entry>
<entry>
<key>Replacement Strategy</key>
<value>Regex Replace</value>
</entry>
<entry>
<key>Evaluation Mode</key>
<value>Entire text</value>
</entry>
</properties>
<runDurationMillis>0</runDurationMillis>
<schedulingPeriod>0 sec</schedulingPeriod>
<schedulingStrategy>TIMER_DRIVEN</schedulingStrategy>
<yieldDuration>1 sec</yieldDuration>
</config>
<name>Replace text w/ XML</name>
<relationships>
<autoTerminate>true</autoTerminate>
<name>failure</name>
</relationships>
<relationships>
<autoTerminate>false</autoTerminate>
<name>success</name>
</relationships>
<style></style>
<type>org.apache.nifi.processors.standard.ReplaceText</type>
</processors>
<processors>
<id>31cd4eac-0156-1000-0000-000000000000</id>
<parentGroupId>d5323c9a-890e-4205-b824-c45be4fa5953</parentGroupId>
<position>
<x>2.460002441406118</x>
<y>255.0000304412842</y>
</position>
<config>
<bulletinLevel>WARN</bulletinLevel>
<comments></comments>
<concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount>
<lossTolerant>false</lossTolerant>
<penaltyDuration>30 sec</penaltyDuration>
<properties>
<entry>
<key>Delete Attributes Expression</key>
</entry>
<entry>
<key>mime.extension</key>
<value>.xml</value>
</entry>
<entry>
<key>mime.type</key>
<value>application/xml</value>
</entry>
</properties>
<runDurationMillis>0</runDurationMillis>
<schedulingPeriod>0 sec</schedulingPeriod>
<schedulingStrategy>TIMER_DRIVEN</schedulingStrategy>
<yieldDuration>1 sec</yieldDuration>
</config>
<name>Set mime.type to application/xml</name>
<relationships>
<autoTerminate>false</autoTerminate>
<name>success</name>
</relationships>
<style></style>
<type>org.apache.nifi.processors.attributes.UpdateAttribute</type>
</processors>
<processors>
<id>31ddb93b-0156-1000-0000-000000000000</id>
<parentGroupId>d5323c9a-890e-4205-b824-c45be4fa5953</parentGroupId>
<position>
<x>732.9799719238281</x>
<y>251.12004936218256</y>
</position>
<config>
<bulletinLevel>WARN</bulletinLevel>
<comments></comments>
<concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount>
<lossTolerant>false</lossTolerant>
<penaltyDuration>30 sec</penaltyDuration>
<properties>
<entry>
<key>Number of Copies</key>
<value>1000</value>
</entry>
</properties>
<runDurationMillis>0</runDurationMillis>
<schedulingPeriod>0 sec</schedulingPeriod>
<schedulingStrategy>TIMER_DRIVEN</schedulingStrategy>
<yieldDuration>1 sec</yieldDuration>
</config>
<name>Duplicate FlowFile x 1000</name>
<relationships>
<autoTerminate>false</autoTerminate>
<name>success</name>
</relationships>
<style></style>
<type>org.apache.nifi.processors.standard.DuplicateFlowFile</type>
</processors>
</snippet>
<timestamp>07/28/2016 09:29:04 CDT</timestamp>
</template>
<?xml version="1.0" encoding="UTF-8" ?>
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:output method="text" encoding="utf-8"/>
<xsl:template match="/*[node()]">
<xsl:text>{</xsl:text>
<xsl:apply-templates select="." mode="detect" />
<xsl:text>}</xsl:text>
</xsl:template>
<xsl:template match="*" mode="detect">
<xsl:choose>
<xsl:when test="name(preceding-sibling::*[1]) = name(current()) and name(following-sibling::*[1]) != name(current())">
<xsl:apply-templates select="." mode="obj-content" />
<xsl:text>]</xsl:text>
<xsl:if test="count(following-sibling::*[name() != name(current())]) &gt; 0">, </xsl:if>
</xsl:when>
<xsl:when test="name(preceding-sibling::*[1]) = name(current())">
<xsl:apply-templates select="." mode="obj-content" />
<xsl:if test="name(following-sibling::*) = name(current())">, </xsl:if>
</xsl:when>
<xsl:when test="following-sibling::*[1][name() = name(current())]">
<xsl:text>"</xsl:text><xsl:value-of select="name()"/><xsl:text>" : [</xsl:text>
<xsl:apply-templates select="." mode="obj-content" /><xsl:text>, </xsl:text>
</xsl:when>
<xsl:when test="count(./child::*) > 0 or count(@*) > 0">
<xsl:text>"</xsl:text><xsl:value-of select="name()"/>" : <xsl:apply-templates select="." mode="obj-content" />
<xsl:if test="count(following-sibling::*) &gt; 0">, </xsl:if>
</xsl:when>
<xsl:when test="count(./child::*) = 0">
<xsl:text>"</xsl:text><xsl:value-of select="name()"/>" : "<xsl:apply-templates select="."/><xsl:text>"</xsl:text>
<xsl:if test="count(following-sibling::*) &gt; 0">, </xsl:if>
</xsl:when>
</xsl:choose>
</xsl:template>
<xsl:template match="*" mode="obj-content">
<xsl:text>{</xsl:text>
<xsl:apply-templates select="@*" mode="attr" />
<xsl:if test="count(@*) &gt; 0 and (count(child::*) &gt; 0 or text())">, </xsl:if>
<xsl:apply-templates select="./*" mode="detect" />
<xsl:if test="count(child::*) = 0 and text() and not(@*)">
<xsl:text>"</xsl:text><xsl:value-of select="name()"/>" : "<xsl:value-of select="text()"/><xsl:text>"</xsl:text>
</xsl:if>
<xsl:if test="count(child::*) = 0 and text() and @*">
<xsl:text>"text" : "</xsl:text><xsl:value-of select="text()"/><xsl:text>"</xsl:text>
</xsl:if>
<xsl:text>}</xsl:text>
<xsl:if test="position() &lt; last()">, </xsl:if>
</xsl:template>
<xsl:template match="@*" mode="attr">
<xsl:text>"</xsl:text><xsl:value-of select="name()"/>" : "<xsl:value-of select="."/><xsl:text>"</xsl:text>
<xsl:if test="position() &lt; last()">,</xsl:if>
</xsl:template>
<xsl:template match="node/@TEXT | text()" name="removeBreaks">
<xsl:param name="pText" select="normalize-space(.)"/>
<xsl:choose>
<xsl:when test="not(contains($pText, '&#xA;'))"><xsl:copy-of select="$pText"/></xsl:when>
<xsl:otherwise>
<xsl:value-of select="concat(substring-before($pText, '&#xD;&#xA;'), ' ')"/>
<xsl:call-template name="removeBreaks">
<xsl:with-param name="pText" select="substring-after($pText, '&#xD;&#xA;')"/>
</xsl:call-template>
</xsl:otherwise>
</xsl:choose>
</xsl:template>
</xsl:stylesheet>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment