Create a gist now

Instantly share code, notes, and snippets.

Streaming XML standardiser implementation in Groovy using NUX. Allows feeds to be processed via DB configuration rather than XSL
package richmarr.xml.example
import org.xml.sax.InputSource
import nu.xom.*
import nux.xom.io.*
import nux.xom.xquery.*
// this would normally be pulled from a database
def config = [
item : "/xpath/to/item/elements",
fields : [
id : "./id/text()",
title : "./title/text()",
description : "./desc/text()"
]
]
def feedName = "testA"
InputStream inputStream = new FileInputStream("target/raw/${feedName}.xml")
OutputStream outputStream = new FileOutputStream("target/standardised/${feedName}.xml")
StreamingSerializerFactory factory = new StreamingSerializerFactory();
StreamingSerializer out = factory.createXMLSerializer( outputStream, "UTF-8" );
out.writeXMLDeclaration();
out.writeStartTag(new Element("items"));
StreamingTransform itemTransform = new StreamingTransform() {
public Nodes transform( Element item ) {
out.writeStartTag(new Element("item"))
config.fields.each { fieldEntry ->
Nodes results = XQueryUtil.xquery( item, fieldEntry.value )
if (results.size() > 0)
{
out.writeStartTag(new Element(fieldEntry.key))
out.write(new Text( results.get(0).getValue() ))
out.writeEndTag()
}
}
out.writeEndTag()
return new Nodes()
}
}
// parse document with a filtering Builder
Builder builder = new Builder(new StreamingPathFilter( config.item, null ).createNodeFactory( null, itemTransform ));
builder.build(new InputSource(inputStream));
out.writeEndTag()
out.writeEndDocument()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment