Last active
October 20, 2022 13:39
-
-
Save Crydust/99ce9fb9b9f67bf61419 to your computer and use it in GitHub Desktop.
encapsulate org.w3c.dom.Document and javax.xml.xpath.XPath to parse a small xml document (needs more testing, not thread safe, ...)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import java.io.IOException; | |
import java.io.InputStream; | |
import java.nio.file.Files; | |
import java.nio.file.Path; | |
import java.util.ArrayList; | |
import java.util.HashMap; | |
import java.util.Iterator; | |
import java.util.List; | |
import java.util.Map; | |
import javax.xml.XMLConstants; | |
import javax.xml.namespace.NamespaceContext; | |
import javax.xml.parsers.DocumentBuilder; | |
import javax.xml.parsers.DocumentBuilderFactory; | |
import javax.xml.parsers.ParserConfigurationException; | |
import javax.xml.xpath.XPath; | |
import javax.xml.xpath.XPathConstants; | |
import javax.xml.xpath.XPathExpression; | |
import javax.xml.xpath.XPathExpressionException; | |
import javax.xml.xpath.XPathFactory; | |
import javax.xml.xpath.XPathFactoryConfigurationException; | |
import org.w3c.dom.Document; | |
import org.w3c.dom.Node; | |
import org.w3c.dom.NodeList; | |
import org.xml.sax.SAXException; | |
public class XmlDocument { | |
private static DocumentBuilder builder = null; | |
public static XmlDocument parse(Path path) { | |
try (final InputStream inputStream = Files.newInputStream(path)) { | |
return parse(inputStream); | |
} catch (IOException e) { | |
throw new RuntimeException(e); | |
} | |
} | |
public static XmlDocument parse(InputStream inputStream) { | |
try { | |
if (builder == null) { | |
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); | |
// begin avoid XXE | |
// https://stackoverflow.com/questions/56777287/how-to-fix-disable-xml-external-entity-xxe-processing-vulnerabilities-in-jav | |
// https://stackoverflow.com/questions/40649152/how-to-prevent-xxe-attack | |
// https://owasp.org/www-project-cheat-sheets/cheatsheets/XML_External_Entity_Prevention_Cheat_Sheet.html#jaxp-documentbuilderfactory-saxparserfactory-and-dom4j | |
// https://cheatsheetseries.owasp.org/cheatsheets/XML_External_Entity_Prevention_Cheat_Sheet.html#jaxp-documentbuilderfactory-saxparserfactory-and-dom4j | |
factory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true); | |
factory.setAttribute(XMLConstants.ACCESS_EXTERNAL_SCHEMA, ""); | |
factory.setAttribute(XMLConstants.ACCESS_EXTERNAL_DTD, ""); | |
factory.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); | |
factory.setFeature("http://xml.org/sax/features/external-general-entities", false); | |
factory.setFeature("http://xml.org/sax/features/external-parameter-entities", false); | |
factory.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false); | |
factory.setXIncludeAware(false); | |
factory.setExpandEntityReferences(false); | |
// end avoid XXE | |
factory.setNamespaceAware(true); | |
builder = factory.newDocumentBuilder(); | |
} | |
return new XmlDocument(builder.parse(inputStream)); | |
} catch (ParserConfigurationException ex) { | |
throw new RuntimeException("parse failed", ex); | |
} catch (SAXException ex) { | |
throw new RuntimeException("parse failed", ex); | |
} catch (IOException ex) { | |
throw new RuntimeException("parse failed", ex); | |
} | |
} | |
private final Document doc; | |
private final MapNamespaceContext namespaceContext = new MapNamespaceContext(); | |
private XPathFactory xpathFactory = null; | |
private XPath xpath = null; | |
private final Map<String, XPathExpression> xPathExpressions = new HashMap<String, XPathExpression>(); | |
public XmlDocument(Document doc) { | |
this.doc = doc; | |
} | |
public void addNamespaceURI(String prefix, String namespaceURI) { | |
if (xpath != null) { | |
throw new IllegalStateException("Can't addNamespaceURI after valueOf"); | |
} | |
namespaceContext.addNamespaceURI(prefix, namespaceURI); | |
} | |
public String getText(String xpathExpression) throws XPathExpressionException { | |
XPathExpression expr = xPathExpressions.get(xpathExpression); | |
if (expr == null) { | |
if (xpath == null) { | |
try { | |
if (xpathFactory == null) { | |
xpathFactory = XPathFactory.newInstance(); | |
xpathFactory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true); | |
} | |
xpath = xpathFactory.newXPath(); | |
xpath.setNamespaceContext(namespaceContext); | |
} catch (XPathFactoryConfigurationException ex) { | |
throw new RuntimeException("createXPath failed", ex); | |
} | |
} | |
expr = xpath.compile(xpathExpression); | |
xPathExpressions.put(xpathExpression, expr); | |
} | |
return (String) expr.evaluate(doc, XPathConstants.STRING); | |
} | |
public String[] getTexts(String xpathExpression) throws XPathExpressionException { | |
XPathExpression expr = xPathExpressions.get(xpathExpression); | |
if (expr == null) { | |
if (xpath == null) { | |
try { | |
if (xpathFactory == null) { | |
xpathFactory = XPathFactory.newInstance(); | |
xpathFactory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true); | |
} | |
xpath = xpathFactory.newXPath(); | |
xpath.setNamespaceContext(namespaceContext); | |
} catch (XPathFactoryConfigurationException ex) { | |
throw new RuntimeException("createXPath failed", ex); | |
} | |
} | |
expr = xpath.compile(xpathExpression); | |
xPathExpressions.put(xpathExpression, expr); | |
} | |
NodeList nodes = (NodeList) expr.evaluate(doc, XPathConstants.NODESET); | |
List<String> list = new ArrayList<>(nodes.getLength()); | |
for (int i = 0; i < nodes.getLength(); i++) { | |
Node node = nodes.item(i); | |
list.add(node.getTextContent()); | |
} | |
return list.toArray(new String[list.size()]); | |
} | |
private static class MapNamespaceContext implements NamespaceContext { | |
private final Map<String, String> uris = new HashMap<String, String>(); | |
public MapNamespaceContext() { | |
} | |
public void addNamespaceURI(String prefix, String namespaceURI) { | |
uris.put(prefix, namespaceURI); | |
} | |
@Override | |
public String getNamespaceURI(String prefix) { | |
String namespace = uris.get(prefix); | |
if (namespace != null) { | |
return namespace; | |
} | |
return XMLConstants.NULL_NS_URI; | |
} | |
@Override | |
public String getPrefix(String namespaceURI) { | |
throw new UnsupportedOperationException("Not supported yet."); | |
} | |
@Override | |
public Iterator getPrefixes(String namespaceURI) { | |
throw new UnsupportedOperationException("Not supported yet."); | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment