Created
July 7, 2014 19:40
-
-
Save gkhays/1fa3b7a9ee705863dee1 to your computer and use it in GitHub Desktop.
A simple JasperReports parser that also demonstrates issues with UTF-8 encoded XML files.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package org.gkh.test; | |
import java.io.BufferedInputStream; | |
import java.io.File; | |
import java.io.FileInputStream; | |
import java.io.IOException; | |
import java.util.LinkedHashSet; | |
import java.util.List; | |
import java.util.Set; | |
import org.jdom2.Document; | |
import org.jdom2.Element; | |
import org.jdom2.JDOMException; | |
import org.jdom2.Namespace; | |
import org.jdom2.filter.Filters; | |
import org.jdom2.input.SAXBuilder; | |
import org.jdom2.xpath.XPathExpression; | |
import org.jdom2.xpath.XPathFactory; | |
public class JrxmlParser { | |
public static Element getFirstNode(Document doc, | |
XPathExpression<Element> expression) { | |
return expression.evaluateFirst(doc); | |
} | |
public static List<Element> getNodes(Document doc, | |
XPathExpression<Element> expression) { | |
List<Element> foundNodes = expression.evaluate(doc); | |
return foundNodes; | |
} | |
private static XPathExpression<Element> getXPathExpression( | |
String xpathString, String ns, String uri) { | |
XPathFactory xpf = XPathFactory.instance(); | |
XPathExpression<Element> xpath = xpf.compile(xpathString, | |
Filters.element(), null, Namespace.getNamespace(ns, uri)); | |
return xpath; | |
} | |
// This is pretty specific to Jasper Reports, but it does show how to use | |
// XPath to locate an XML node. | |
public static Set<String> parseQuery(Document doc, String regex) { | |
XPathFactory xpf = XPathFactory.instance(); | |
XPathExpression<Element> xpath = xpf.compile( | |
"/jr:jasperReport/jr:queryString", Filters.element(), null, | |
Namespace.getNamespace("jr", | |
"http://jasperreports.sourceforge.net/jasperreports")); | |
Element queryNode = xpath.evaluateFirst(doc); | |
Set<String> sortedResults = new LinkedHashSet<String>(); | |
if (queryNode != null) { | |
String queryText = queryNode.getText(); | |
// Do something with the text we just found... | |
} | |
return sortedResults; | |
} | |
/** | |
* There can be issues when parsing a UTF-8 encoded XML document. A colleague | |
* solved it by looking at the first 3 bytes of the XML file. It can also be | |
* solved by setting the encoding on an input source before passing it to a | |
* SAX parser. | |
* | |
* {@code | |
* File file = new File("c:\\file-utf.xml"); | |
* InputStream inputStream= new FileInputStream(file); | |
* Reader reader = new InputStreamReader(inputStream,"UTF-8"); | |
* | |
* InputSource is = new InputSource(reader); | |
* is.setEncoding("UTF-8"); | |
* | |
* saxParser.parse(is, handler); | |
* } | |
* | |
* @see <a href="http://www.mkyong.com/java/how-to-read-utf-8-xml-file-in-java-sax-parser/">UTF-8 in XML file using SAX Parser</a> | |
* @see <a href="http://stackoverflow.com/questions/11397678/why-using-inputsource-fixes-sax-parser-when-file-contains-special-utf-8-characte">Stack Overflow - UTF-8 SAX Parser</a> | |
* | |
* More information on input sources | |
* @see <a href="http://stackoverflow.com/questions/17118585/how-to-convert-inputstream-to-inputsource">Convert InputStream to InputSource</a> | |
* | |
* @param source | |
* @param isUtf | |
* @return | |
* @throws IOException | |
* @throws JDOMException | |
*/ | |
public static Document parseXml(File source, boolean isUtf) | |
throws IOException, JDOMException { | |
if (isUtf) { | |
// http://www.rgagnon.com/javadetails/java-handle-utf8-file-with-bom.html | |
byte[] buffer = new byte[3]; | |
BufferedInputStream b = new BufferedInputStream( | |
new FileInputStream(source)); | |
b.mark(0); | |
int count = b.read(buffer); | |
if (count == 3) { | |
// see if they are the BOM | |
// [-17][-69][-65] | |
if (!(buffer[0] == -17 && buffer[1] == -69 && buffer[2] == -65)) | |
b.reset(); | |
} else | |
b.reset(); | |
return new SAXBuilder().build(b); | |
} else { | |
// InputSource is = new InputSource(new InputStreamReader( | |
// new FileInputStream(source))); | |
// return new SAXBuilder().build(is); | |
return new SAXBuilder().build(new FileInputStream(source)); | |
} | |
} | |
public static void printElements(List<Element> elementList, String addAttribute) { | |
StringBuilder sb = new StringBuilder(); | |
for (Element e : elementList) { | |
sb.append(e.getAttributeValue("name")); | |
if (addAttribute != null) { | |
sb.append(": " + e.getAttributeValue(addAttribute)); | |
} | |
System.out.println("Found " + sb.toString()); | |
sb.setLength(0); | |
} | |
} | |
/** | |
* @param args | |
* @throws Exception | |
*/ | |
public static void main(String[] args) throws Exception { | |
File xmlFile = new File(args[0]); | |
Document doc = parseXml(xmlFile, true); | |
String ns = "jr"; | |
String uri = "http://jasperreports.sourceforge.net/jasperreports"; | |
// (1) Get all the field nodes, e.g. <field name="taxonomy_group" class="java.lang.String"/>. | |
String xpathString = "/jr:jasperReport/jr:field"; | |
XPathExpression<Element> xpath = getXPathExpression(xpathString, ns, | |
uri); | |
List<Element> fieldNodes = getNodes(doc, xpath); | |
printElements(fieldNodes, "class"); | |
// (2) Get all the sort field nodes, e.g. <sortField name="target_user_domain"/> | |
System.out.println("\nSort Fields"); | |
xpathString = "/jr:jasperReport/jr:sortField"; | |
xpath = getXPathExpression(xpathString, ns, uri); | |
fieldNodes = getNodes(doc, xpath); | |
printElements(fieldNodes, null); | |
// (3) Get the queryString node. | |
System.out.println("\nQuery Node"); | |
xpathString = "/jr:jasperReport/jr:queryString"; | |
xpath = getXPathExpression(xpathString, ns, uri); | |
Element queryNode = getFirstNode(doc, xpath); | |
System.out.println(queryNode.getText()); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment