gkhays/JrxmlParser.java

## JrxmlParser.java
package org.gkh.test;

import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Set;

import org.jdom2.Document;
import org.jdom2.Element;
import org.jdom2.JDOMException;
import org.jdom2.Namespace;
import org.jdom2.filter.Filters;
import org.jdom2.input.SAXBuilder;
import org.jdom2.xpath.XPathExpression;
import org.jdom2.xpath.XPathFactory;

public class JrxmlParser {

	public static Element getFirstNode(Document doc,
			XPathExpression<Element> expression) {
		return expression.evaluateFirst(doc);
	}

	public static List<Element> getNodes(Document doc,
			XPathExpression<Element> expression) {
		List<Element> foundNodes = expression.evaluate(doc);
		return foundNodes;
	}

	private static XPathExpression<Element> getXPathExpression(
			String xpathString, String ns, String uri) {
		XPathFactory xpf = XPathFactory.instance();
		XPathExpression<Element> xpath = xpf.compile(xpathString,
				Filters.element(), null, Namespace.getNamespace(ns, uri));
		return xpath;
	}

	// This is pretty specific to Jasper Reports, but it does show how to use
	// XPath to locate an XML node.
	public static Set<String> parseQuery(Document doc, String regex) {
		XPathFactory xpf = XPathFactory.instance();
		XPathExpression<Element> xpath = xpf.compile(
				"/jr:jasperReport/jr:queryString", Filters.element(), null,
				Namespace.getNamespace("jr",
						"http://jasperreports.sourceforge.net/jasperreports"));
		Element queryNode = xpath.evaluateFirst(doc);

		Set<String> sortedResults = new LinkedHashSet<String>();

		if (queryNode != null) {
			String queryText = queryNode.getText();
			// Do something with the text we just found...
		}

		return sortedResults;
	}

	/**
	 * There can be issues when parsing a UTF-8 encoded XML document. A colleague
	 * solved it by looking at the first 3 bytes of the XML file. It can also be
	 * solved by setting the encoding on an input source before passing it to a
	 * SAX parser.
	 *
	 * {@code
	 * File file = new File("c:\\file-utf.xml");
	 * InputStream inputStream= new FileInputStream(file);
	 * Reader reader = new InputStreamReader(inputStream,"UTF-8");
	 *
	 * InputSource is = new InputSource(reader);
	 * is.setEncoding("UTF-8");
	 *
	 * saxParser.parse(is, handler);
	 * }
	 *
	 * @see <a href="http://www.mkyong.com/java/how-to-read-utf-8-xml-file-in-java-sax-parser/">UTF-8 in XML file using SAX Parser</a>
	 * @see <a href="http://stackoverflow.com/questions/11397678/why-using-inputsource-fixes-sax-parser-when-file-contains-special-utf-8-characte">Stack Overflow - UTF-8 SAX Parser</a>
	 *
	 * More information on input sources
	 * @see <a href="http://stackoverflow.com/questions/17118585/how-to-convert-inputstream-to-inputsource">Convert InputStream to InputSource</a>
	 *
	 * @param source
	 * @param isUtf
	 * @return
	 * @throws IOException
	 * @throws JDOMException
	 */
	public static Document parseXml(File source, boolean isUtf)
			throws IOException, JDOMException {
		if (isUtf) {
			// http://www.rgagnon.com/javadetails/java-handle-utf8-file-with-bom.html
			byte[] buffer = new byte[3];
			BufferedInputStream b = new BufferedInputStream(
					new FileInputStream(source));
			b.mark(0);

			int count = b.read(buffer);
			if (count == 3) {
				// see if they are the BOM
				// [-17][-69][-65]
				if (!(buffer[0] == -17 && buffer[1] == -69 && buffer[2] == -65))
					b.reset();
			} else
				b.reset();

			return new SAXBuilder().build(b);
		} else {
//			InputSource is = new InputSource(new InputStreamReader(
//					new FileInputStream(source)));
//			return new SAXBuilder().build(is);
			return new SAXBuilder().build(new FileInputStream(source));
		}
	}

	public static void printElements(List<Element> elementList, String addAttribute) {
		StringBuilder sb = new StringBuilder();
		for (Element e : elementList) {
			sb.append(e.getAttributeValue("name"));
			if (addAttribute != null) {
				sb.append(": " + e.getAttributeValue(addAttribute));
			}
			System.out.println("Found " + sb.toString());
			sb.setLength(0);
		}
	}

	/**
	 * @param args
	 * @throws Exception
	 */
	public static void main(String[] args) throws Exception {
		File xmlFile = new File(args[0]);
		Document doc = parseXml(xmlFile, true);

		String ns = "jr";
		String uri = "http://jasperreports.sourceforge.net/jasperreports";

		// (1) Get all the field nodes, e.g. <field name="taxonomy_group" class="java.lang.String"/>.
		String xpathString = "/jr:jasperReport/jr:field";
		XPathExpression<Element> xpath = getXPathExpression(xpathString, ns,
				uri);
		List<Element> fieldNodes = getNodes(doc, xpath);
		printElements(fieldNodes, "class");

		// (2) Get all the sort field nodes, e.g. <sortField name="target_user_domain"/>
		System.out.println("\nSort Fields");
		xpathString = "/jr:jasperReport/jr:sortField";
		xpath = getXPathExpression(xpathString, ns, uri);
		fieldNodes = getNodes(doc, xpath);
		printElements(fieldNodes, null);

		// (3) Get the queryString node.
		System.out.println("\nQuery Node");
		xpathString = "/jr:jasperReport/jr:queryString";
		xpath = getXPathExpression(xpathString, ns, uri);
		Element queryNode = getFirstNode(doc, xpath);
		System.out.println(queryNode.getText());
	}
}
	package org.gkh.test;

	import java.io.BufferedInputStream;
	import java.io.File;
	import java.io.FileInputStream;
	import java.io.IOException;
	import java.util.LinkedHashSet;
	import java.util.List;
	import java.util.Set;

	import org.jdom2.Document;
	import org.jdom2.Element;
	import org.jdom2.JDOMException;
	import org.jdom2.Namespace;
	import org.jdom2.filter.Filters;
	import org.jdom2.input.SAXBuilder;
	import org.jdom2.xpath.XPathExpression;
	import org.jdom2.xpath.XPathFactory;

	public class JrxmlParser {

	public static Element getFirstNode(Document doc,
	XPathExpression<Element> expression) {
	return expression.evaluateFirst(doc);
	}

	public static List<Element> getNodes(Document doc,
	XPathExpression<Element> expression) {
	List<Element> foundNodes = expression.evaluate(doc);
	return foundNodes;
	}

	private static XPathExpression<Element> getXPathExpression(
	String xpathString, String ns, String uri) {
	XPathFactory xpf = XPathFactory.instance();
	XPathExpression<Element> xpath = xpf.compile(xpathString,
	Filters.element(), null, Namespace.getNamespace(ns, uri));
	return xpath;
	}

	// This is pretty specific to Jasper Reports, but it does show how to use
	// XPath to locate an XML node.
	public static Set<String> parseQuery(Document doc, String regex) {
	XPathFactory xpf = XPathFactory.instance();
	XPathExpression<Element> xpath = xpf.compile(
	"/jr:jasperReport/jr:queryString", Filters.element(), null,
	Namespace.getNamespace("jr",
	"http://jasperreports.sourceforge.net/jasperreports"));
	Element queryNode = xpath.evaluateFirst(doc);

	Set<String> sortedResults = new LinkedHashSet<String>();

	if (queryNode != null) {
	String queryText = queryNode.getText();
	// Do something with the text we just found...
	}

	return sortedResults;
	}

	/**
	* There can be issues when parsing a UTF-8 encoded XML document. A colleague
	* solved it by looking at the first 3 bytes of the XML file. It can also be
	* solved by setting the encoding on an input source before passing it to a
	* SAX parser.
	*
	* {@code
	* File file = new File("c:\\file-utf.xml");
	* InputStream inputStream= new FileInputStream(file);
	* Reader reader = new InputStreamReader(inputStream,"UTF-8");
	*
	* InputSource is = new InputSource(reader);
	* is.setEncoding("UTF-8");
	*
	* saxParser.parse(is, handler);
	* }
	*
	* @see <a href="http://www.mkyong.com/java/how-to-read-utf-8-xml-file-in-java-sax-parser/">UTF-8 in XML file using SAX Parser</a>
	* @see <a href="http://stackoverflow.com/questions/11397678/why-using-inputsource-fixes-sax-parser-when-file-contains-special-utf-8-characte">Stack Overflow - UTF-8 SAX Parser</a>
	*
	* More information on input sources
	* @see <a href="http://stackoverflow.com/questions/17118585/how-to-convert-inputstream-to-inputsource">Convert InputStream to InputSource</a>
	*
	* @param source
	* @param isUtf
	* @return
	* @throws IOException
	* @throws JDOMException
	*/
	public static Document parseXml(File source, boolean isUtf)
	throws IOException, JDOMException {
	if (isUtf) {
	// http://www.rgagnon.com/javadetails/java-handle-utf8-file-with-bom.html
	byte[] buffer = new byte[3];
	BufferedInputStream b = new BufferedInputStream(
	new FileInputStream(source));
	b.mark(0);

	int count = b.read(buffer);
	if (count == 3) {
	// see if they are the BOM
	// [-17][-69][-65]
	if (!(buffer[0] == -17 && buffer[1] == -69 && buffer[2] == -65))
	b.reset();
	} else
	b.reset();

	return new SAXBuilder().build(b);
	} else {
	// InputSource is = new InputSource(new InputStreamReader(
	// new FileInputStream(source)));
	// return new SAXBuilder().build(is);
	return new SAXBuilder().build(new FileInputStream(source));
	}
	}

	public static void printElements(List<Element> elementList, String addAttribute) {
	StringBuilder sb = new StringBuilder();
	for (Element e : elementList) {
	sb.append(e.getAttributeValue("name"));
	if (addAttribute != null) {
	sb.append(": " + e.getAttributeValue(addAttribute));
	}
	System.out.println("Found " + sb.toString());
	sb.setLength(0);
	}
	}

	/**
	* @param args
	* @throws Exception
	*/
	public static void main(String[] args) throws Exception {
	File xmlFile = new File(args[0]);
	Document doc = parseXml(xmlFile, true);

	String ns = "jr";
	String uri = "http://jasperreports.sourceforge.net/jasperreports";

	// (1) Get all the field nodes, e.g. <field name="taxonomy_group" class="java.lang.String"/>.
	String xpathString = "/jr:jasperReport/jr:field";
	XPathExpression<Element> xpath = getXPathExpression(xpathString, ns,
	uri);
	List<Element> fieldNodes = getNodes(doc, xpath);
	printElements(fieldNodes, "class");

	// (2) Get all the sort field nodes, e.g. <sortField name="target_user_domain"/>
	System.out.println("\nSort Fields");
	xpathString = "/jr:jasperReport/jr:sortField";
	xpath = getXPathExpression(xpathString, ns, uri);
	fieldNodes = getNodes(doc, xpath);
	printElements(fieldNodes, null);

	// (3) Get the queryString node.
	System.out.println("\nQuery Node");
	xpathString = "/jr:jasperReport/jr:queryString";
	xpath = getXPathExpression(xpathString, ns, uri);
	Element queryNode = getFirstNode(doc, xpath);
	System.out.println(queryNode.getText());
	}
	}