orellabac/XPATHGET.sql

## XPATHGET.sql
/*
XPATHGET Function
==============
Description:
The XPATHGET function is designed to extract values from XML documents using XPath expressions. It takes as input an XML string, an XPath expression, an optional instance parameter, and an optional prefixes parameter.

Parameters:
============
xml                    (String): The XML string from which the function extracts values.
xpath                  (String): The XPath expression used to navigate and locate elements in the XML document.
instance    (NUMBER, default 0): An optional parameter specifying the instance of the XPath result to retrieve. If not provided, the default is set to 0.
prefixes (Object, default null): An optional parameter allowing the specification of namespace prefixes. If not provided, the default is set to null.

Return Value:
The function returns the value(s) extracted from the XML document based on the provided XPath expression. The result can be a single value or a collection of values, depending on the XPath expression and the structure of the XML document.

Example Usage:

sql
Copy code
SELECT XPATHGET('<book><title>Introduction to SQL</title></book>', '/book/title') as book_title;
-- Output: <title>Introduction to SQL</title>
Note:
Ensure that the XML string is well-formed, and the XPath expression accurately reflects the structure of the XML document for proper extraction.

REMEMBER to ADJUST STAGE for your environment in targetpath
*/
create or replace function XPATHGET(xml String, xpath String,instance NUMBER DEFAULT 0, prefixes object DEFAULT null)
returns varchar
language java
handler='XmlProcessor.xpathget'
target_path='@MYSTAGE/XPATHGET.jar'
as
$$
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.StringWriter;
import java.util.ArrayList;
import java.util.Iterator;
import javax.xml.XMLConstants;
import javax.xml.namespace.NamespaceContext;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpression;
import javax.xml.xpath.XPathFactory;
import org.w3c.dom.Document;
import org.w3c.dom.NodeList;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import java.util.Map;
import java.util.Spliterator;
import java.util.Spliterators;
import java.util.stream.Stream;
import java.util.stream.StreamSupport;

class OutputRow {
    public String node;
    public OutputRow(String node) { this.node = node;}
}

class CustomNamespaceContext implements NamespaceContext {
  private final Map<String, String> namespaceMap;
  public CustomNamespaceContext(Map<String, String> namespaceMap) { this.namespaceMap = namespaceMap; }
  @Override
  public String getNamespaceURI(String prefix)
  { return (namespaceMap!=null)?namespaceMap.get(prefix):null; }
  @Override
  public String getPrefix(String namespaceURI) { return null; }
  @Override
  public Iterator<String> getPrefixes(String namespaceURI) { return null; }
}

class XmlProcessor
{
    static TransformerFactory TRANSFORMERFACTORY = TransformerFactory.newInstance();
    static Transformer TRANSFORMER = null;
    static {
        try {
            TRANSFORMER = TRANSFORMERFACTORY.newTransformer();
            TRANSFORMER.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");
        }
        catch (Exception ex) {}
    }

    public static String elementToString(Node element) {
        try {
            DOMSource source = new DOMSource(element);
            StreamResult result = new StreamResult(new StringWriter());
            TRANSFORMER.transform(source, result);
            return result.getWriter().toString();
        } catch (Exception e)
        { return null; }
    }

    private static class NodeListIterator implements Iterator<OutputRow> {
        private final NodeList nodeList;
        private int currentIndex;
        NodeListIterator(NodeList nodeList) { this.nodeList = nodeList;this.currentIndex = 0;}
        @Override
        public boolean hasNext() { return currentIndex < nodeList.getLength(); }
        @Override
        public OutputRow next()
        {
            var xml = elementToString(nodeList.item(currentIndex++));
            return new OutputRow(xml);
        }
    }

    public static NodeList extractTags(Map<String, String> namespaceMap, String xmlString, String xpathExpression) throws Exception {
        DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
        factory.setNamespaceAware(true);
        DocumentBuilder builder = factory.newDocumentBuilder();
        Document doc = builder.parse(new ByteArrayInputStream(xmlString.getBytes()));
        //Get XPath expression
        XPathFactory xpathfactory = XPathFactory.newInstance();
        XPath xpath = xpathfactory.newXPath();
        xpath.setNamespaceContext(new CustomNamespaceContext(namespaceMap));
        XPathExpression expr = xpath.compile(xpathExpression);
        //Search XPath expression
        Object result = expr.evaluate(doc, XPathConstants.NODESET);
        //Iterate over results and fetch book names
        NodeList nodes = (NodeList) result;
        return nodes;
  }

    public static String xpathget(
      String xml,
      String xpath, int instance,Map<String, String> prefixes)
    {
        try {
            NodeList elements = extractTags(prefixes, xml, xpath);
            if (elements.getLength() > 0 && elements.getLength() > instance)
                return elementToString(elements.item(0));
        } catch (Exception e) { }
        return null;
    }
}
$$;
	/*
	XPATHGET Function
	==============
	Description:
	The XPATHGET function is designed to extract values from XML documents using XPath expressions. It takes as input an XML string, an XPath expression, an optional instance parameter, and an optional prefixes parameter.

	Parameters:
	============
	xml (String): The XML string from which the function extracts values.
	xpath (String): The XPath expression used to navigate and locate elements in the XML document.
	instance (NUMBER, default 0): An optional parameter specifying the instance of the XPath result to retrieve. If not provided, the default is set to 0.
	prefixes (Object, default null): An optional parameter allowing the specification of namespace prefixes. If not provided, the default is set to null.

	Return Value:
	The function returns the value(s) extracted from the XML document based on the provided XPath expression. The result can be a single value or a collection of values, depending on the XPath expression and the structure of the XML document.

	Example Usage:

	sql
	Copy code
	SELECT XPATHGET('<book><title>Introduction to SQL</title></book>', '/book/title') as book_title;
	-- Output: <title>Introduction to SQL</title>
	Note:
	Ensure that the XML string is well-formed, and the XPath expression accurately reflects the structure of the XML document for proper extraction.

	REMEMBER to ADJUST STAGE for your environment in targetpath
	*/
	create or replace function XPATHGET(xml String, xpath String,instance NUMBER DEFAULT 0, prefixes object DEFAULT null)
	returns varchar
	language java
	handler='XmlProcessor.xpathget'
	target_path='@MYSTAGE/XPATHGET.jar'
	as
	$$
	import java.io.ByteArrayInputStream;
	import java.io.File;
	import java.io.FileInputStream;
	import java.io.StringWriter;
	import java.util.ArrayList;
	import java.util.Iterator;
	import javax.xml.XMLConstants;
	import javax.xml.namespace.NamespaceContext;
	import javax.xml.parsers.DocumentBuilder;
	import javax.xml.parsers.DocumentBuilderFactory;
	import javax.xml.transform.OutputKeys;
	import javax.xml.transform.Transformer;
	import javax.xml.transform.TransformerFactory;
	import javax.xml.transform.dom.DOMSource;
	import javax.xml.transform.stream.StreamResult;
	import javax.xml.xpath.XPath;
	import javax.xml.xpath.XPathConstants;
	import javax.xml.xpath.XPathExpression;
	import javax.xml.xpath.XPathFactory;
	import org.w3c.dom.Document;
	import org.w3c.dom.NodeList;
	import org.w3c.dom.Element;
	import org.w3c.dom.Node;
	import java.util.Map;
	import java.util.Spliterator;
	import java.util.Spliterators;
	import java.util.stream.Stream;
	import java.util.stream.StreamSupport;

	class OutputRow {
	public String node;
	public OutputRow(String node) { this.node = node;}
	}

	class CustomNamespaceContext implements NamespaceContext {
	private final Map<String, String> namespaceMap;
	public CustomNamespaceContext(Map<String, String> namespaceMap) { this.namespaceMap = namespaceMap; }
	@Override
	public String getNamespaceURI(String prefix)
	{ return (namespaceMap!=null)?namespaceMap.get(prefix):null; }
	@Override
	public String getPrefix(String namespaceURI) { return null; }
	@Override
	public Iterator<String> getPrefixes(String namespaceURI) { return null; }
	}

	class XmlProcessor
	{
	static TransformerFactory TRANSFORMERFACTORY = TransformerFactory.newInstance();
	static Transformer TRANSFORMER = null;
	static {
	try {
	TRANSFORMER = TRANSFORMERFACTORY.newTransformer();
	TRANSFORMER.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");
	}
	catch (Exception ex) {}
	}

	public static String elementToString(Node element) {
	try {
	DOMSource source = new DOMSource(element);
	StreamResult result = new StreamResult(new StringWriter());
	TRANSFORMER.transform(source, result);
	return result.getWriter().toString();
	} catch (Exception e)
	{ return null; }
	}

	private static class NodeListIterator implements Iterator<OutputRow> {
	private final NodeList nodeList;
	private int currentIndex;
	NodeListIterator(NodeList nodeList) { this.nodeList = nodeList;this.currentIndex = 0;}
	@Override
	public boolean hasNext() { return currentIndex < nodeList.getLength(); }
	@Override
	public OutputRow next()
	{
	var xml = elementToString(nodeList.item(currentIndex++));
	return new OutputRow(xml);
	}
	}

	public static NodeList extractTags(Map<String, String> namespaceMap, String xmlString, String xpathExpression) throws Exception {
	DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
	factory.setNamespaceAware(true);
	DocumentBuilder builder = factory.newDocumentBuilder();
	Document doc = builder.parse(new ByteArrayInputStream(xmlString.getBytes()));
	//Get XPath expression
	XPathFactory xpathfactory = XPathFactory.newInstance();
	XPath xpath = xpathfactory.newXPath();
	xpath.setNamespaceContext(new CustomNamespaceContext(namespaceMap));
	XPathExpression expr = xpath.compile(xpathExpression);
	//Search XPath expression
	Object result = expr.evaluate(doc, XPathConstants.NODESET);
	//Iterate over results and fetch book names
	NodeList nodes = (NodeList) result;
	return nodes;
	}

	public static String xpathget(
	String xml,
	String xpath, int instance,Map<String, String> prefixes)
	{
	try {
	NodeList elements = extractTags(prefixes, xml, xpath);
	if (elements.getLength() > 0 && elements.getLength() > instance)
	return elementToString(elements.item(0));
	} catch (Exception e) { }
	return null;
	}
	}
	$$;