Created
November 26, 2023 02:27
-
-
Save orellabac/c6fa22b8fe2c408f8c5fa1e55f9fc3cc to your computer and use it in GitHub Desktop.
Snowpark Java UDF to get a value from an XML using XPATH
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
XPATHGET Function | |
============== | |
Description: | |
The XPATHGET function is designed to extract values from XML documents using XPath expressions. It takes as input an XML string, an XPath expression, an optional instance parameter, and an optional prefixes parameter. | |
Parameters: | |
============ | |
xml (String): The XML string from which the function extracts values. | |
xpath (String): The XPath expression used to navigate and locate elements in the XML document. | |
instance (NUMBER, default 0): An optional parameter specifying the instance of the XPath result to retrieve. If not provided, the default is set to 0. | |
prefixes (Object, default null): An optional parameter allowing the specification of namespace prefixes. If not provided, the default is set to null. | |
Return Value: | |
The function returns the value(s) extracted from the XML document based on the provided XPath expression. The result can be a single value or a collection of values, depending on the XPath expression and the structure of the XML document. | |
Example Usage: | |
sql | |
Copy code | |
SELECT XPATHGET('<book><title>Introduction to SQL</title></book>', '/book/title') as book_title; | |
-- Output: <title>Introduction to SQL</title> | |
Note: | |
Ensure that the XML string is well-formed, and the XPath expression accurately reflects the structure of the XML document for proper extraction. | |
REMEMBER to ADJUST STAGE for your environment in targetpath | |
*/ | |
create or replace function XPATHGET(xml String, xpath String,instance NUMBER DEFAULT 0, prefixes object DEFAULT null) | |
returns varchar | |
language java | |
handler='XmlProcessor.xpathget' | |
target_path='@MYSTAGE/XPATHGET.jar' | |
as | |
$$ | |
import java.io.ByteArrayInputStream; | |
import java.io.File; | |
import java.io.FileInputStream; | |
import java.io.StringWriter; | |
import java.util.ArrayList; | |
import java.util.Iterator; | |
import javax.xml.XMLConstants; | |
import javax.xml.namespace.NamespaceContext; | |
import javax.xml.parsers.DocumentBuilder; | |
import javax.xml.parsers.DocumentBuilderFactory; | |
import javax.xml.transform.OutputKeys; | |
import javax.xml.transform.Transformer; | |
import javax.xml.transform.TransformerFactory; | |
import javax.xml.transform.dom.DOMSource; | |
import javax.xml.transform.stream.StreamResult; | |
import javax.xml.xpath.XPath; | |
import javax.xml.xpath.XPathConstants; | |
import javax.xml.xpath.XPathExpression; | |
import javax.xml.xpath.XPathFactory; | |
import org.w3c.dom.Document; | |
import org.w3c.dom.NodeList; | |
import org.w3c.dom.Element; | |
import org.w3c.dom.Node; | |
import java.util.Map; | |
import java.util.Spliterator; | |
import java.util.Spliterators; | |
import java.util.stream.Stream; | |
import java.util.stream.StreamSupport; | |
class OutputRow { | |
public String node; | |
public OutputRow(String node) { this.node = node;} | |
} | |
class CustomNamespaceContext implements NamespaceContext { | |
private final Map<String, String> namespaceMap; | |
public CustomNamespaceContext(Map<String, String> namespaceMap) { this.namespaceMap = namespaceMap; } | |
@Override | |
public String getNamespaceURI(String prefix) | |
{ return (namespaceMap!=null)?namespaceMap.get(prefix):null; } | |
@Override | |
public String getPrefix(String namespaceURI) { return null; } | |
@Override | |
public Iterator<String> getPrefixes(String namespaceURI) { return null; } | |
} | |
class XmlProcessor | |
{ | |
static TransformerFactory TRANSFORMERFACTORY = TransformerFactory.newInstance(); | |
static Transformer TRANSFORMER = null; | |
static { | |
try { | |
TRANSFORMER = TRANSFORMERFACTORY.newTransformer(); | |
TRANSFORMER.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes"); | |
} | |
catch (Exception ex) {} | |
} | |
public static String elementToString(Node element) { | |
try { | |
DOMSource source = new DOMSource(element); | |
StreamResult result = new StreamResult(new StringWriter()); | |
TRANSFORMER.transform(source, result); | |
return result.getWriter().toString(); | |
} catch (Exception e) | |
{ return null; } | |
} | |
private static class NodeListIterator implements Iterator<OutputRow> { | |
private final NodeList nodeList; | |
private int currentIndex; | |
NodeListIterator(NodeList nodeList) { this.nodeList = nodeList;this.currentIndex = 0;} | |
@Override | |
public boolean hasNext() { return currentIndex < nodeList.getLength(); } | |
@Override | |
public OutputRow next() | |
{ | |
var xml = elementToString(nodeList.item(currentIndex++)); | |
return new OutputRow(xml); | |
} | |
} | |
public static NodeList extractTags(Map<String, String> namespaceMap, String xmlString, String xpathExpression) throws Exception { | |
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); | |
factory.setNamespaceAware(true); | |
DocumentBuilder builder = factory.newDocumentBuilder(); | |
Document doc = builder.parse(new ByteArrayInputStream(xmlString.getBytes())); | |
//Get XPath expression | |
XPathFactory xpathfactory = XPathFactory.newInstance(); | |
XPath xpath = xpathfactory.newXPath(); | |
xpath.setNamespaceContext(new CustomNamespaceContext(namespaceMap)); | |
XPathExpression expr = xpath.compile(xpathExpression); | |
//Search XPath expression | |
Object result = expr.evaluate(doc, XPathConstants.NODESET); | |
//Iterate over results and fetch book names | |
NodeList nodes = (NodeList) result; | |
return nodes; | |
} | |
public static String xpathget( | |
String xml, | |
String xpath, int instance,Map<String, String> prefixes) | |
{ | |
try { | |
NodeList elements = extractTags(prefixes, xml, xpath); | |
if (elements.getLength() > 0 && elements.getLength() > instance) | |
return elementToString(elements.item(0)); | |
} catch (Exception e) { } | |
return null; | |
} | |
} | |
$$; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment