Skip to content

Instantly share code, notes, and snippets.

@orellabac
Created November 26, 2023 02:27
Show Gist options
  • Save orellabac/c6fa22b8fe2c408f8c5fa1e55f9fc3cc to your computer and use it in GitHub Desktop.
Save orellabac/c6fa22b8fe2c408f8c5fa1e55f9fc3cc to your computer and use it in GitHub Desktop.
Snowpark Java UDF to get a value from an XML using XPATH
/*
XPATHGET Function
==============
Description:
The XPATHGET function is designed to extract values from XML documents using XPath expressions. It takes as input an XML string, an XPath expression, an optional instance parameter, and an optional prefixes parameter.
Parameters:
============
xml (String): The XML string from which the function extracts values.
xpath (String): The XPath expression used to navigate and locate elements in the XML document.
instance (NUMBER, default 0): An optional parameter specifying the instance of the XPath result to retrieve. If not provided, the default is set to 0.
prefixes (Object, default null): An optional parameter allowing the specification of namespace prefixes. If not provided, the default is set to null.
Return Value:
The function returns the value(s) extracted from the XML document based on the provided XPath expression. The result can be a single value or a collection of values, depending on the XPath expression and the structure of the XML document.
Example Usage:
sql
Copy code
SELECT XPATHGET('<book><title>Introduction to SQL</title></book>', '/book/title') as book_title;
-- Output: <title>Introduction to SQL</title>
Note:
Ensure that the XML string is well-formed, and the XPath expression accurately reflects the structure of the XML document for proper extraction.
REMEMBER to ADJUST STAGE for your environment in targetpath
*/
create or replace function XPATHGET(xml String, xpath String,instance NUMBER DEFAULT 0, prefixes object DEFAULT null)
returns varchar
language java
handler='XmlProcessor.xpathget'
target_path='@MYSTAGE/XPATHGET.jar'
as
$$
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.StringWriter;
import java.util.ArrayList;
import java.util.Iterator;
import javax.xml.XMLConstants;
import javax.xml.namespace.NamespaceContext;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpression;
import javax.xml.xpath.XPathFactory;
import org.w3c.dom.Document;
import org.w3c.dom.NodeList;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import java.util.Map;
import java.util.Spliterator;
import java.util.Spliterators;
import java.util.stream.Stream;
import java.util.stream.StreamSupport;
class OutputRow {
public String node;
public OutputRow(String node) { this.node = node;}
}
class CustomNamespaceContext implements NamespaceContext {
private final Map<String, String> namespaceMap;
public CustomNamespaceContext(Map<String, String> namespaceMap) { this.namespaceMap = namespaceMap; }
@Override
public String getNamespaceURI(String prefix)
{ return (namespaceMap!=null)?namespaceMap.get(prefix):null; }
@Override
public String getPrefix(String namespaceURI) { return null; }
@Override
public Iterator<String> getPrefixes(String namespaceURI) { return null; }
}
class XmlProcessor
{
static TransformerFactory TRANSFORMERFACTORY = TransformerFactory.newInstance();
static Transformer TRANSFORMER = null;
static {
try {
TRANSFORMER = TRANSFORMERFACTORY.newTransformer();
TRANSFORMER.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");
}
catch (Exception ex) {}
}
public static String elementToString(Node element) {
try {
DOMSource source = new DOMSource(element);
StreamResult result = new StreamResult(new StringWriter());
TRANSFORMER.transform(source, result);
return result.getWriter().toString();
} catch (Exception e)
{ return null; }
}
private static class NodeListIterator implements Iterator<OutputRow> {
private final NodeList nodeList;
private int currentIndex;
NodeListIterator(NodeList nodeList) { this.nodeList = nodeList;this.currentIndex = 0;}
@Override
public boolean hasNext() { return currentIndex < nodeList.getLength(); }
@Override
public OutputRow next()
{
var xml = elementToString(nodeList.item(currentIndex++));
return new OutputRow(xml);
}
}
public static NodeList extractTags(Map<String, String> namespaceMap, String xmlString, String xpathExpression) throws Exception {
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
factory.setNamespaceAware(true);
DocumentBuilder builder = factory.newDocumentBuilder();
Document doc = builder.parse(new ByteArrayInputStream(xmlString.getBytes()));
//Get XPath expression
XPathFactory xpathfactory = XPathFactory.newInstance();
XPath xpath = xpathfactory.newXPath();
xpath.setNamespaceContext(new CustomNamespaceContext(namespaceMap));
XPathExpression expr = xpath.compile(xpathExpression);
//Search XPath expression
Object result = expr.evaluate(doc, XPathConstants.NODESET);
//Iterate over results and fetch book names
NodeList nodes = (NodeList) result;
return nodes;
}
public static String xpathget(
String xml,
String xpath, int instance,Map<String, String> prefixes)
{
try {
NodeList elements = extractTags(prefixes, xml, xpath);
if (elements.getLength() > 0 && elements.getLength() > instance)
return elementToString(elements.item(0));
} catch (Exception e) { }
return null;
}
}
$$;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment