public

Streaming XML standardiser implementation in Groovy using NUX. Allows feeds to be processed via DB configuration rather than XSL

  • Download Gist
streaming-xml-standardiser.groovy
Groovy
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
package richmarr.xml.example
import org.xml.sax.InputSource
import nu.xom.*
import nux.xom.io.*
import nux.xom.xquery.*
 
// this would normally be pulled from a database
def config = [
item : "/xpath/to/item/elements",
fields : [
id : "./id/text()",
title : "./title/text()",
description : "./desc/text()"
]
]
 
def feedName = "testA"
InputStream inputStream = new FileInputStream("target/raw/${feedName}.xml")
OutputStream outputStream = new FileOutputStream("target/standardised/${feedName}.xml")
 
StreamingSerializerFactory factory = new StreamingSerializerFactory();
StreamingSerializer out = factory.createXMLSerializer( outputStream, "UTF-8" );
out.writeXMLDeclaration();
out.writeStartTag(new Element("items"));
 
StreamingTransform itemTransform = new StreamingTransform() {
public Nodes transform( Element item ) {
out.writeStartTag(new Element("item"))
config.fields.each { fieldEntry ->
Nodes results = XQueryUtil.xquery( item, fieldEntry.value )
if (results.size() > 0)
{
out.writeStartTag(new Element(fieldEntry.key))
out.write(new Text( results.get(0).getValue() ))
out.writeEndTag()
}
}
out.writeEndTag()
return new Nodes()
}
}
 
// parse document with a filtering Builder
Builder builder = new Builder(new StreamingPathFilter( config.item, null ).createNodeFactory( null, itemTransform ));
builder.build(new InputSource(inputStream));
out.writeEndTag()
out.writeEndDocument()

Please sign in to comment on this gist.

Something went wrong with that request. Please try again.