Skip to content

Instantly share code, notes, and snippets.

@tmclnk
Last active December 18, 2017 22:10
Show Gist options
  • Save tmclnk/716e8cbea1da32cbef5234d636a63df9 to your computer and use it in GitHub Desktop.
Save tmclnk/716e8cbea1da32cbef5234d636a63df9 to your computer and use it in GitHub Desktop.
XML Wrapper in Java to reduce boilerplate XPath, DocumentBuilder, and Transformer work
package be.shouldyou;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.math.BigDecimal;
import java.math.BigInteger;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.text.DecimalFormat;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Date;
import java.util.List;
import java.util.StringJoiner;
import java.util.function.Consumer;
import java.util.function.Function;
import java.util.stream.Stream;
import javax.xml.bind.DatatypeConverter;
import javax.xml.namespace.QName;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Result;
import javax.xml.transform.Source;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;
/**
* A stateful wrapper class around {@link Node}s.
* All {@link XPathExpressionException}s are rethrown as unchecked exceptions.
* Type conversions are done with {@link DatatypeConverter}.
* <br/>
* Examples:
* <code>
* <pre>
* XML xml = new XML(payloadXML);
* String submissionId = xml.asString("IRSDataForStateSubmission/SubmissionId");
* BigInteger submissionId = xml.asBigInteger("IRSDataForStateSubmission/EID");
* Calendar recvd = xml.asDateTime("IRSDataForStateSubmission/ReceivedAtIRSTs");
* </pre>
* </code>
*
* Instances are stateful, so the scope of operations can be changed using
* {@link #with(String)}. State will be restored to the original context
* using {@link #reset()}.
* <code>
* <pre>
* XML xml = new XML(stateXML);
* xml.with("ReturnState/ReturnDataState/FormNE1120SN");
* BigInteger neIncTaxWithNonResAmt = xml.asBigInteger("NeIncTaxWithNonResAmt");
* BigInteger f3800NCreditOrRecaptureAmt = xml.asBigInteger("F3800NCreditOrRecaptureAmt");
* BigInteger taxDepositedExtOrEstPytAmt = xml.asBigInteger("TaxDepositedExtOrEstPytAmt");
* xml.reset(); // set back to the original stateXML
* </pre>
* </code>
*
* @see <a href="https://gist.github.com/tmcoma/716e8cbea1da32cbef5234d636a63df9">Gist</a>
*/
public class XML {
@SuppressWarnings("unused")
private static final Logger logger = LoggerFactory.getLogger(XML.class);
private final XPath delegate = XPathFactory.newInstance().newXPath();
private final Node node;
/**
* The current node being examined; initial will be {@link #node}, but can
* be changed via {@link #with(String)}
*/
private Node withNode;
/** whether or not to indent {@link #transform()} output } */
private boolean indent = false;
/**
* Attempt to parse a {@link Document} off the classpath; if that fails
* try using {@link Paths#get(String, String...)}
*/
public static Document parseDocument(String path, boolean nsAware) throws IOException{
InputStream in = XML.class.getClassLoader().getResourceAsStream(path);
if(in == null){
Path p = Paths.get(path);
if(!Files.exists(p)){
throw new IOException ("Failed to locate " + path + " on classpath or in filesystem");
}
in = Files.newInputStream(p);
}
return parseDocument(in, nsAware);
}
/**
* Make a {@link Document} out of the given {@link InputStream}.
*/
public static Document parseDocument(InputStream in, boolean nsAware) throws IOException{
try {
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
dbf.setNamespaceAware(false);
DocumentBuilder docBuilder = dbf.newDocumentBuilder();
Document node = docBuilder.parse(in);
return node;
} catch (ParserConfigurationException | SAXException e) {
throw new RuntimeException("Document creation failed", e);
}
}
/**
* Parse the given {@link InputStream} into a schemaless DOM node.
* @param in
*/
public static XML parse(InputStream in){
try {
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
dbf.setNamespaceAware(false);
DocumentBuilder docBuilder = dbf.newDocumentBuilder();
Document node = docBuilder.parse(in);
return new XML(node);
} catch (ParserConfigurationException | SAXException | IOException e) {
throw new RuntimeException("Document creation failed", e);
}
}
/**
* @param node {@link Node} to which all later xpath expressions will
* apply. The namespaciness of this node determines the types of
* expressions that should be used.
*/
public XML(Node node) {
this.node = node;
this.withNode = node;
}
/**
* Transform the current {@link #withNode} into a {@link Result}.
*
* <pre>
* xml.transform(new {@link StreamResult}(System.out));
* </pre>
*
* @param result {@link Result} to write to
*/
public void transform(Result result){
TransformerFactory factory = TransformerFactory.newInstance();
try {
Transformer transformer = factory.newTransformer();
if(this.indent){
transformer.setOutputProperty(OutputKeys.INDENT, "yes");
transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "2");
}
Source xmlSource = new DOMSource(this.withNode);
transformer.transform(xmlSource, result);
} catch (TransformerException e) {
throw new RuntimeException("Transformation failure", e);
}
}
public String transform(){
ByteArrayOutputStream bos = new ByteArrayOutputStream();
StreamResult result = new StreamResult(bos);
this.transform(result);
return new String(bos.toByteArray());
}
/**
* Transform the current {@link #withNode} into a {@link Result}.
*
* <pre>
* InputStream myXsl = getClass().getClassLoader().getInputStream("myfile.xsl");
* xml.transform(new StreamSource(myXsl), new StreamResult(System.out));
* </pre>
*
* @param xslSource XSLT stylesheet
* @param result {@link Result} to write to
*/
public void transform(Source xslSource, Result result){
TransformerFactory factory = TransformerFactory.newInstance();
try {
Transformer transformer = factory.newTransformer(xslSource);
Source xmlSource = new DOMSource(this.withNode);
transformer.transform(xmlSource, result);
} catch (TransformerException e) {
throw new RuntimeException("Transformation failure", e);
}
}
/**
* A convenience method which will perform all subsequent evaluations
* against the given node.
*
* The "with" idiom is derived from the the language construct
* in other languages.
* This can be reset using {@code with("")} or {@code with(null)}
* @param with the prefix to apply to all subsequent calls; null is ok here
* @return the value of the expression, possibly null
*/
public Node with(String expression) {
if(expression == null || expression.isEmpty()){
withNode = node;
return withNode;
}
try {
withNode = (Node)delegate.evaluate(expression, this.node, XPathConstants.NODE);
return withNode;
} catch (XPathExpressionException e) {
throw new RuntimeException("Failed to evaluate " + expression, e);
}
}
/**
* Reset the {@link #withNode} value back to the original {@link #node}.
*/
public void reset(){
with(null);
}
/**
* <p>Evaluate an <code>XPath</code> expression in the specified context and return the result as the specified type.</p>
*
* <p>See <a href="#XPath-evaluation">Evaluation of XPath Expressions</a> for context item evaluation,
* variable, function and <code>QName</code> resolution and return type conversion.</p>
*
* <p>If <code>returnType</code> is not one of the types defined in {@link XPathConstants} (
* {@link XPathConstants#NUMBER NUMBER},
* {@link XPathConstants#STRING STRING},
* {@link XPathConstants#BOOLEAN BOOLEAN},
* {@link XPathConstants#NODE NODE} or
* {@link XPathConstants#NODESET NODESET})
* then an <code>IllegalArgumentException</code> is thrown.</p>
*
* <p>If a <code>null</code> value is provided for
* <code>item</code>, an empty document will be used for the
* context.
* If <code>expression</code> or <code>returnType</code> is <code>null</code>, then a
* <code>NullPointerException</code> is thrown.</p>
*
* @param expression The XPath expression.
* @param returnType The desired return type.
*
* @return Result of evaluating an XPath expression as an <code>Object</code> of <code>returnType</code>.
*
* @throws XPathExpressionException If <code>expression</code> cannot be evaluated.
* @throws IllegalArgumentException If <code>returnType</code> is not one of the types defined in {@link XPathConstants}.
* @throws NullPointerException If <code>expression</code> or <code>returnType</code> is <code>null</code>.
*/
public Object evaluate(String expression, QName returnType) {
try {
return delegate.evaluate(expression, withNode, returnType);
} catch (XPathExpressionException e) {
throw new RuntimeException("Failed to evaluate " + expression, e);
}
}
/**
* Evaluates the expression as a Boolean. This can be
* checking for the presence of a node, e.g. "/Path/To/CheckboxInd"
* It may also explicit xpath boolean functions. Note that
* this is *different* from parsing a text value like "true"!
*
* <pre>
* // true if an address element exists
* xml.asBoolean("/person/address");
* </pre>
*
* Any
* {@link XPathExpressionException}s thrown will be wrapped
* as unchecked exceptions.
* @param expression xpath expression
* @return {@link Boolean}
*/
public boolean asBoolean(String expression){
try {
return (Boolean) delegate.evaluate(expression, withNode, XPathConstants.BOOLEAN);
} catch (XPathExpressionException e) {
throw new RuntimeException("Failed to evaluate " + expression, e);
}
}
/**
* Parses the {@code xsd:boolean} value from the given expression.
* This is much different than {@link #asBoolean(String)}!
*
* <pre>
* // returns true if &lt;citizen>true&lt;/citizen> or
* // &lt;citizen>1&lt;/citizen>
* xml.toBoolean("/person/citizen");
* </pre>
* @param expression
* @return whether or not the expression parsed into true or false,
* based on the definition of {@code xsd:boolean}. Values are
* {@code 0, 1, false, true}.
* @see DatatypeConverter#parseBoolean(String)
*/
public boolean toBoolean(String expression){
try {
return DatatypeConverter.parseBoolean(delegate.evaluate(expression, withNode));
} catch (XPathExpressionException e) {
throw new RuntimeException("Failed to evaluate " + expression, e);
}
}
/**
* Evaluates the given expression to a NodeList. Any
* {@link XPathExpressionException}s thrown will be wrapped
* as unchecked exceptions.
* @param expression which will return a {@link XPathConstants#NODESET}
* @return NodeList (possibly empty)
*/
public NodeList asNodeList(String expression){
try {
return (NodeList) delegate.evaluate(expression, withNode, XPathConstants.NODESET);
} catch (XPathExpressionException e) {
throw new RuntimeException("Failed to evaluate " + expression, e);
}
}
/**
* A list consisting of the {@link Node#getTextContent()} of each matching
* node.
* <pre>
* List&lt;String> cities = xml.asList("/person/address/city");
* </pre>
*
* @param expression which returns a list of nodes (whose {@link Node#getTextContent()} will be used)
* @return {@link List} of {@link String}
*/
public List<String> asList(String expression){
List<String> list = new ArrayList<>();
NodeList nodeList = asNodeList(expression);
for( int i = 0; i < nodeList.getLength(); i++){
list.add(nodeList.item(i).getTextContent());
}
return list;
}
/**
* Evaluates the given expression into a {@link List}. Each
* node in the list will have the given callback applied.
*
* <pre>
* // all cities, UPPERCASE
* List&lt;String> cities = xml.asList("/person/address/city", node -> node.getTextContent().toUppercase());
* </pre>
* @param expression
* @param callback
* @return
*/
public <T> List<T> asList(String expression, Function<Node, T> callback){
List<T> list = new ArrayList<>();
NodeList nodeList = asNodeList(expression);
for( int i = 0; i < nodeList.getLength(); i++){
Node node = nodeList.item(i);
T val = callback.apply(node);
list.add(val);
}
return list;
}
/**
* Apply a callback to every {@link Node} matched by the expression
*
* @param expression xpath expression
* @param callback for each {@link Node} found
*/
public void forEach(String expression, Consumer<Node> callback){
NodeList nodeList = asNodeList(expression);
for( int i = 0; i < nodeList.getLength(); i++){
Node node = nodeList.item(i);
callback.accept(node);
}
}
/**
* Evaluates the given expression into a {@link Stream} of {@link Node}.
*
* <pre>
* //print all zip codes anywhere in the document tree
* xml.stream("//zip").forEach(node -> System.out.println(node.getTextContent()));
* </pre>
* @param expression
* @return {@link Stream} of {@link Node}
*/
public Stream<Node> stream(String expression){
NodeList nodes = asNodeList(expression);
Stream.Builder<Node> builder = Stream.builder();
for(int i = 0; i < nodes.getLength(); i++){
Node n = nodes.item(i);
builder.accept(n);
}
return builder.build();
}
/**
* Evaluate the given expression against the current {@link #withNode}.
* For each {@link Node} in the result, apply the given callback.
* <br/>
*
* <pre>
* //print a person's ZIP codes
* xml.map("/person/address")
* .map(node -> new XML(node).asString("zip"))
* .forEach(System::println);
* </pre>
*
* @param expression xpath expression which will return a {@link NodeList}
* @param callback {@link Function} which takes a {@link Node} as an argument
* @return a {@link Stream}, with callback applied to each item
*/
public <T> Stream<T> map(String expression, Function<Node, T> callback){
NodeList nodes = asNodeList(expression);
Stream.Builder<T> builder = Stream.builder();
for(int i = 0; i < nodes.getLength(); i++){
Node n = nodes.item(i);
T val = callback.apply(n);
builder.accept(val);
}
return builder.build();
}
/**
* Evaluate the given expression against the current {@link #withNode}.
* Join the {@link Node#getTextContent()} of each using the given
* delimiter.
* <br/>
*
* <pre>
* // 68123, 68142, 68144
* String zips = xml.join("/person/address/zip", ", ");
* </pre>
*
* @param expression xpath expression which will return a {@link NodeList}
* @param callback {@link Function} which takes a {@link Node} as an argument
* @return a {@link Stream}, with callback applied to each item
*/
public String join(String expression, String delimiter){
StringJoiner j = new StringJoiner(delimiter);
asList(expression).forEach(j::add);
return j.toString();
}
/**
* Evaluates the given expression to a Node. Any
* {@link XPathExpressionException}s thrown will be wrapped
* as unchecked exceptions.
* @param expression
* @return Node or {@code null}
*/
public Node asNode(String expression){
try {
return (Node) delegate.evaluate(expression, withNode, XPathConstants.NODE);
} catch (XPathExpressionException e) {
throw new RuntimeException("Failed to evaluate " + expression, e);
}
}
/**
* Evaluate the {@code expression} as a {@link Calendar}. The result
* should be in the same format as {@code xsd:date}. Any
* {@link XPathExpressionException}s thrown will be wrapped
* as unchecked exceptions.
* @param expression xpath expression which can be evaluated to a String
* @return {@link Calendar}
* @throws IllegalArgumentException if the value isn't an xsd:date
* @see DatatypeConverter#parseDate(String)
*/
public Calendar toDate(String expression){
String s = asString(expression);
return DatatypeConverter.parseDate(s);
}
/**
* Parses the {@code xsd:date} returned by the expression and
* formats it using a {@link SimpleDateFormat}.
* @param expression
* @param format
* @return formatted Date
*/
public String formatDate(String expression, String format){
Calendar cal = toDate(expression);
Date date = cal.getTime();
SimpleDateFormat formatter = new SimpleDateFormat(format);
return formatter.format(date);
}
/**
* Parses the {@code xsd:dateTime} returned by the expression and
* formats it using a {@link SimpleDateFormat}.
* @param expression
* @param format
* @return formatted Date
*/
public String formatDateTime(String expression, String format){
Calendar cal = toDateTime(expression);
Date date = cal.getTime();
SimpleDateFormat formatter = new SimpleDateFormat(format);
return formatter.format(date);
}
/**
* Format using {@link DecimalFormat}. Examples of format.
*
* <pre>
* ####.##
* -###.##
* -000.00
* ##%
* </pre>
* @param expression
* @param pattern
* @return
*/
public String formatDecimal(String expression, String pattern){
BigDecimal val = toBigDecimal(expression);
DecimalFormat format = new DecimalFormat(pattern);
return format.format(val.doubleValue());
}
/**
* Evaluate the {@code expression} as a {@link Calendar}. The result
* should be in the same format as {@code xsd:dateTime}. Any
* {@link XPathExpressionException}s thrown will be wrapped
* as unchecked exceptions.
* @param expression xpath expression which can be evaluated to a String
* @return {@link Calendar}
* @throws IllegalArgumentException if the value isn't an xsd:dateTime
* @see DatatypeConverter#parseDate(String)
*/
public Calendar toDateTime(String expression){
String s = asString(expression);
return DatatypeConverter.parseDateTime(s);
}
/**
* Evaluate the {@code expression} as a {@link BigDecimal}. Any
* {@link XPathExpressionException}s thrown will be wrapped
* as unchecked exceptions.
* @param expression xpath expression which can be evaluated to a String
* @return {@link BigDecimal} or {@link BigDecimal#ZERO} if no value
* is returned
* @throws IllegalArgumentException if the expression can't be parsed
* as {@link BigDecimal} or isn't an empty string.
* @see DatatypeConverter#parseDecimal(String)
*/
public BigDecimal toBigDecimal(String expression){
String s = asString(expression);
if(s.isEmpty()){
return BigDecimal.ZERO;
} else {
return DatatypeConverter.parseDecimal(s);
}
}
public <T> T toObject (String expression, Function<Node, T> callback){
Node n = asNode(expression);
return callback.apply(n);
}
/**
* Evaluate the {@code expression} as a {@link BigInteger}. Any
* {@link XPathExpressionException}s thrown will be wrapped
* as unchecked exceptions.
* @param expression xpath expression which can be evaluated to a String
* @return {@link BigInteger} or {@link BigInteger#ZERO} if no value
* is returned
* @throws IllegalArgumentException if the expression can't be parsed
* as {@link BigInteger} or isn't an empty string.
* @see DatatypeConverter#parseInteger(String)
*/
public BigInteger toBigInteger(String expression){
String s = asString(expression);
if(s.isEmpty()){
return BigInteger.ZERO;
} else {
return DatatypeConverter.parseInteger(s);
}
}
/**
* @param expression xpath expression which can be evaluated to a String
* @return {@link Integer} or 0 if no value
* is returned
* @throws IllegalArgumentException if the expression can't be parsed
* as {@link Integer} or isn't an empty string.
* @see DatatypeConverter#parseInteger(String)
*/
public Integer toInteger(String expression){
return toBigInteger(expression).intValue();
}
/**
* Evaluates xpath expression to a String, wrapping any
* {@link XPathExpressionException}s as unchecked exceptions.
* @param expression string-valued xpath expression
* @return the String value of the result, or "" if nothing is found
* @see XPath#evaluate(String, Object)
*/
public String asString(String expression) {
try {
return delegate.evaluate(expression, withNode);
} catch (XPathExpressionException e) {
throw new RuntimeException("Failed to evaluate " + expression, e);
}
}
/**
* Will this instance indent output when performing {@link #transform()}
* operations?
* @return
*/
public boolean isIndent() {
return indent;
}
/**
* Tells {@link Transformer} instances to ident output (or not).
* @param ident whether or not to indent transformation output
*/
public void setIndent(boolean ident) {
this.indent = ident;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment