orellabac/XPATHGET_TO_TABLE.sql

## XPATHGET_TO_TABLE.sql
/*
XPATHGET_TO_TABLE Function

Description:
The XPATHGET_TO_TABLE function is designed to extract and return XML nodes as a table based on the provided XPath expression. It takes as input an XML string, an XPath expression, and an object specifying namespace prefixes.

Parameters:

xml (String): The XML string from which the function extracts nodes.
xpath (String): The XPath expression used to navigate and locate nodes in the XML document.
prefixes (Object): An object specifying namespace prefixes for the XML document.
Return Value:
The function returns a table where each row represents a distinct node selected by the XPath expression. The column node contains the value of each selected node.

Example Usage:

sql
Copy code
SELECT * FROM TABLE(XPATHGET_TO_TABLE('<bookstore><book><title>Introduction to SQL</title></book><book><title>Advanced SQL</title></book></bookstore>', '//book/title', object_construct()));
-- Output:
--<title>Introduction to SQL</title>
--<title>Advanced SQL</title>

Note:
Ensure that the XML string is well-formed, and the XPath expression accurately reflects the structure of the XML document for proper extraction. The prefixes parameter allows handling namespaces in the XPath expression.
*/

create or replace function XPATHGET_TO_TABLE(xml String, xpath String, prefixes object)
returns table(node varchar)
language java
handler='XmlExtractor'
target_path='@MYSTAGE/XPATHGET_TO_TABLE.jar'
as
$$
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.StringWriter;
import java.util.ArrayList;
import java.util.Iterator;
import javax.xml.XMLConstants;
import javax.xml.namespace.NamespaceContext;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpression;
import javax.xml.xpath.XPathFactory;
import org.w3c.dom.Document;
import org.w3c.dom.NodeList;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import java.util.Map;
import java.util.Spliterator;
import java.util.Spliterators;
import java.util.stream.Stream;
import java.util.stream.StreamSupport;

class OutputRow {
    public String node;
    public OutputRow(String node) { this.node = node;}
}

class CustomNamespaceContext implements NamespaceContext {
  private final Map<String, String> namespaceMap;
  public CustomNamespaceContext(Map<String, String> namespaceMap) { this.namespaceMap = namespaceMap; }
  @Override
  public String getNamespaceURI(String prefix)
  { return (namespaceMap!=null)?namespaceMap.get(prefix):null; }
  @Override
  public String getPrefix(String namespaceURI) { return null; }
  @Override
  public Iterator<String> getPrefixes(String namespaceURI) { return null; }
}

class XmlProcessor
{
    static TransformerFactory TRANSFORMERFACTORY = TransformerFactory.newInstance();
    static Transformer TRANSFORMER = null;
    static {
        try {
            TRANSFORMER = TRANSFORMERFACTORY.newTransformer();
            TRANSFORMER.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");
        }
        catch (Exception ex) {}
    }

    public static String elementToString(Node element) {
        try {
            DOMSource source = new DOMSource(element);
            StreamResult result = new StreamResult(new StringWriter());
            TRANSFORMER.transform(source, result);
            return result.getWriter().toString();
        } catch (Exception e)
        { return null; }
    }

    private static class NodeListIterator implements Iterator<OutputRow> {
        private final NodeList nodeList;
        private int currentIndex;
        NodeListIterator(NodeList nodeList) { this.nodeList = nodeList;this.currentIndex = 0;}
        @Override
        public boolean hasNext() { return currentIndex < nodeList.getLength(); }
        @Override
        public OutputRow next()
        {
            var xml = elementToString(nodeList.item(currentIndex++));
            return new OutputRow(xml);
        }
    }

    public static NodeList extractTags(Map<String, String> namespaceMap, String xmlString, String xpathExpression) throws Exception {
        DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
        factory.setNamespaceAware(true);
        DocumentBuilder builder = factory.newDocumentBuilder();
        Document doc = builder.parse(new ByteArrayInputStream(xmlString.getBytes()));
        //Get XPath expression
        XPathFactory xpathfactory = XPathFactory.newInstance();
        XPath xpath = xpathfactory.newXPath();
        xpath.setNamespaceContext(new CustomNamespaceContext(namespaceMap));
        XPathExpression expr = xpath.compile(xpathExpression);
        //Search XPath expression
        Object result = expr.evaluate(doc, XPathConstants.NODESET);
        //Iterate over results and fetch book names
        NodeList nodes = (NodeList) result;
        return nodes;
  }

  public static Stream<OutputRow> processXmlData(Map<String, String> prefixToNamespaceMap, String xmlString, String xpathExpression) {
        try {
            NodeList elements = extractTags(prefixToNamespaceMap, xmlString, xpathExpression);
            Iterator<OutputRow> elementIterator = new NodeListIterator(elements);
            return StreamSupport.stream(
                    Spliterators.spliteratorUnknownSize(elementIterator, Spliterator.ORDERED),
                    false
                );
        } catch (Exception e)
        { return Stream.empty();}
    }
}

public class XmlExtractor {
    public static Class getOutputClass() { return OutputRow.class; }
    public Stream<OutputRow> process(String xml, String xpath,Map<String,String> prefixes)
    { return XmlProcessor.processXmlData(prefixes, xml, xpath); }
    public Stream<OutputRow> endPartition() { return Stream.empty(); }
}
$$;
	/*
	XPATHGET_TO_TABLE Function

	Description:
	The XPATHGET_TO_TABLE function is designed to extract and return XML nodes as a table based on the provided XPath expression. It takes as input an XML string, an XPath expression, and an object specifying namespace prefixes.

	Parameters:

	xml (String): The XML string from which the function extracts nodes.
	xpath (String): The XPath expression used to navigate and locate nodes in the XML document.
	prefixes (Object): An object specifying namespace prefixes for the XML document.
	Return Value:
	The function returns a table where each row represents a distinct node selected by the XPath expression. The column node contains the value of each selected node.

	Example Usage:

	sql
	Copy code
	SELECT * FROM TABLE(XPATHGET_TO_TABLE('<bookstore><book><title>Introduction to SQL</title></book><book><title>Advanced SQL</title></book></bookstore>', '//book/title', object_construct()));
	-- Output:
	--<title>Introduction to SQL</title>
	--<title>Advanced SQL</title>

	Note:
	Ensure that the XML string is well-formed, and the XPath expression accurately reflects the structure of the XML document for proper extraction. The prefixes parameter allows handling namespaces in the XPath expression.
	*/

	create or replace function XPATHGET_TO_TABLE(xml String, xpath String, prefixes object)
	returns table(node varchar)
	language java
	handler='XmlExtractor'
	target_path='@MYSTAGE/XPATHGET_TO_TABLE.jar'
	as
	$$
	import java.io.ByteArrayInputStream;
	import java.io.File;
	import java.io.FileInputStream;
	import java.io.StringWriter;
	import java.util.ArrayList;
	import java.util.Iterator;
	import javax.xml.XMLConstants;
	import javax.xml.namespace.NamespaceContext;
	import javax.xml.parsers.DocumentBuilder;
	import javax.xml.parsers.DocumentBuilderFactory;
	import javax.xml.transform.OutputKeys;
	import javax.xml.transform.Transformer;
	import javax.xml.transform.TransformerFactory;
	import javax.xml.transform.dom.DOMSource;
	import javax.xml.transform.stream.StreamResult;
	import javax.xml.xpath.XPath;
	import javax.xml.xpath.XPathConstants;
	import javax.xml.xpath.XPathExpression;
	import javax.xml.xpath.XPathFactory;
	import org.w3c.dom.Document;
	import org.w3c.dom.NodeList;
	import org.w3c.dom.Element;
	import org.w3c.dom.Node;
	import java.util.Map;
	import java.util.Spliterator;
	import java.util.Spliterators;
	import java.util.stream.Stream;
	import java.util.stream.StreamSupport;

	class OutputRow {
	public String node;
	public OutputRow(String node) { this.node = node;}
	}

	class CustomNamespaceContext implements NamespaceContext {
	private final Map<String, String> namespaceMap;
	public CustomNamespaceContext(Map<String, String> namespaceMap) { this.namespaceMap = namespaceMap; }
	@Override
	public String getNamespaceURI(String prefix)
	{ return (namespaceMap!=null)?namespaceMap.get(prefix):null; }
	@Override
	public String getPrefix(String namespaceURI) { return null; }
	@Override
	public Iterator<String> getPrefixes(String namespaceURI) { return null; }
	}

	class XmlProcessor
	{
	static TransformerFactory TRANSFORMERFACTORY = TransformerFactory.newInstance();
	static Transformer TRANSFORMER = null;
	static {
	try {
	TRANSFORMER = TRANSFORMERFACTORY.newTransformer();
	TRANSFORMER.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");
	}
	catch (Exception ex) {}
	}

	public static String elementToString(Node element) {
	try {
	DOMSource source = new DOMSource(element);
	StreamResult result = new StreamResult(new StringWriter());
	TRANSFORMER.transform(source, result);
	return result.getWriter().toString();
	} catch (Exception e)
	{ return null; }
	}

	private static class NodeListIterator implements Iterator<OutputRow> {
	private final NodeList nodeList;
	private int currentIndex;
	NodeListIterator(NodeList nodeList) { this.nodeList = nodeList;this.currentIndex = 0;}
	@Override
	public boolean hasNext() { return currentIndex < nodeList.getLength(); }
	@Override
	public OutputRow next()
	{
	var xml = elementToString(nodeList.item(currentIndex++));
	return new OutputRow(xml);
	}
	}

	public static NodeList extractTags(Map<String, String> namespaceMap, String xmlString, String xpathExpression) throws Exception {
	DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
	factory.setNamespaceAware(true);
	DocumentBuilder builder = factory.newDocumentBuilder();
	Document doc = builder.parse(new ByteArrayInputStream(xmlString.getBytes()));
	//Get XPath expression
	XPathFactory xpathfactory = XPathFactory.newInstance();
	XPath xpath = xpathfactory.newXPath();
	xpath.setNamespaceContext(new CustomNamespaceContext(namespaceMap));
	XPathExpression expr = xpath.compile(xpathExpression);
	//Search XPath expression
	Object result = expr.evaluate(doc, XPathConstants.NODESET);
	//Iterate over results and fetch book names
	NodeList nodes = (NodeList) result;
	return nodes;
	}

	public static Stream<OutputRow> processXmlData(Map<String, String> prefixToNamespaceMap, String xmlString, String xpathExpression) {
	try {
	NodeList elements = extractTags(prefixToNamespaceMap, xmlString, xpathExpression);
	Iterator<OutputRow> elementIterator = new NodeListIterator(elements);
	return StreamSupport.stream(
	Spliterators.spliteratorUnknownSize(elementIterator, Spliterator.ORDERED),
	false
	);
	} catch (Exception e)
	{ return Stream.empty();}
	}
	}

	public class XmlExtractor {
	public static Class getOutputClass() { return OutputRow.class; }
	public Stream<OutputRow> process(String xml, String xpath,Map<String,String> prefixes)
	{ return XmlProcessor.processXmlData(prefixes, xml, xpath); }
	public Stream<OutputRow> endPartition() { return Stream.empty(); }
	}
	$$;