Skip to content

Instantly share code, notes, and snippets.

@Crydust
Last active October 20, 2022 13:39
Show Gist options
  • Save Crydust/99ce9fb9b9f67bf61419 to your computer and use it in GitHub Desktop.
Save Crydust/99ce9fb9b9f67bf61419 to your computer and use it in GitHub Desktop.
encapsulate org.w3c.dom.Document and javax.xml.xpath.XPath to parse a small xml document (needs more testing, not thread safe, ...)
import java.io.IOException;
import java.io.InputStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import javax.xml.XMLConstants;
import javax.xml.namespace.NamespaceContext;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpression;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;
import javax.xml.xpath.XPathFactoryConfigurationException;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;
public class XmlDocument {
private static DocumentBuilder builder = null;
public static XmlDocument parse(Path path) {
try (final InputStream inputStream = Files.newInputStream(path)) {
return parse(inputStream);
} catch (IOException e) {
throw new RuntimeException(e);
}
}
public static XmlDocument parse(InputStream inputStream) {
try {
if (builder == null) {
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
// begin avoid XXE
// https://stackoverflow.com/questions/56777287/how-to-fix-disable-xml-external-entity-xxe-processing-vulnerabilities-in-jav
// https://stackoverflow.com/questions/40649152/how-to-prevent-xxe-attack
// https://owasp.org/www-project-cheat-sheets/cheatsheets/XML_External_Entity_Prevention_Cheat_Sheet.html#jaxp-documentbuilderfactory-saxparserfactory-and-dom4j
// https://cheatsheetseries.owasp.org/cheatsheets/XML_External_Entity_Prevention_Cheat_Sheet.html#jaxp-documentbuilderfactory-saxparserfactory-and-dom4j
factory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true);
factory.setAttribute(XMLConstants.ACCESS_EXTERNAL_SCHEMA, "");
factory.setAttribute(XMLConstants.ACCESS_EXTERNAL_DTD, "");
factory.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
factory.setFeature("http://xml.org/sax/features/external-general-entities", false);
factory.setFeature("http://xml.org/sax/features/external-parameter-entities", false);
factory.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
factory.setXIncludeAware(false);
factory.setExpandEntityReferences(false);
// end avoid XXE
factory.setNamespaceAware(true);
builder = factory.newDocumentBuilder();
}
return new XmlDocument(builder.parse(inputStream));
} catch (ParserConfigurationException ex) {
throw new RuntimeException("parse failed", ex);
} catch (SAXException ex) {
throw new RuntimeException("parse failed", ex);
} catch (IOException ex) {
throw new RuntimeException("parse failed", ex);
}
}
private final Document doc;
private final MapNamespaceContext namespaceContext = new MapNamespaceContext();
private XPathFactory xpathFactory = null;
private XPath xpath = null;
private final Map<String, XPathExpression> xPathExpressions = new HashMap<String, XPathExpression>();
public XmlDocument(Document doc) {
this.doc = doc;
}
public void addNamespaceURI(String prefix, String namespaceURI) {
if (xpath != null) {
throw new IllegalStateException("Can't addNamespaceURI after valueOf");
}
namespaceContext.addNamespaceURI(prefix, namespaceURI);
}
public String getText(String xpathExpression) throws XPathExpressionException {
XPathExpression expr = xPathExpressions.get(xpathExpression);
if (expr == null) {
if (xpath == null) {
try {
if (xpathFactory == null) {
xpathFactory = XPathFactory.newInstance();
xpathFactory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true);
}
xpath = xpathFactory.newXPath();
xpath.setNamespaceContext(namespaceContext);
} catch (XPathFactoryConfigurationException ex) {
throw new RuntimeException("createXPath failed", ex);
}
}
expr = xpath.compile(xpathExpression);
xPathExpressions.put(xpathExpression, expr);
}
return (String) expr.evaluate(doc, XPathConstants.STRING);
}
public String[] getTexts(String xpathExpression) throws XPathExpressionException {
XPathExpression expr = xPathExpressions.get(xpathExpression);
if (expr == null) {
if (xpath == null) {
try {
if (xpathFactory == null) {
xpathFactory = XPathFactory.newInstance();
xpathFactory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true);
}
xpath = xpathFactory.newXPath();
xpath.setNamespaceContext(namespaceContext);
} catch (XPathFactoryConfigurationException ex) {
throw new RuntimeException("createXPath failed", ex);
}
}
expr = xpath.compile(xpathExpression);
xPathExpressions.put(xpathExpression, expr);
}
NodeList nodes = (NodeList) expr.evaluate(doc, XPathConstants.NODESET);
List<String> list = new ArrayList<>(nodes.getLength());
for (int i = 0; i < nodes.getLength(); i++) {
Node node = nodes.item(i);
list.add(node.getTextContent());
}
return list.toArray(new String[list.size()]);
}
private static class MapNamespaceContext implements NamespaceContext {
private final Map<String, String> uris = new HashMap<String, String>();
public MapNamespaceContext() {
}
public void addNamespaceURI(String prefix, String namespaceURI) {
uris.put(prefix, namespaceURI);
}
@Override
public String getNamespaceURI(String prefix) {
String namespace = uris.get(prefix);
if (namespace != null) {
return namespace;
}
return XMLConstants.NULL_NS_URI;
}
@Override
public String getPrefix(String namespaceURI) {
throw new UnsupportedOperationException("Not supported yet.");
}
@Override
public Iterator getPrefixes(String namespaceURI) {
throw new UnsupportedOperationException("Not supported yet.");
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment