Created
June 1, 2010 14:08
-
-
Save dolmen/420975 to your computer and use it in GitHub Desktop.
XSLT processor in Java, with an URI Resolver
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import javax.xml.transform.URIResolver; | |
import javax.xml.transform.Source; | |
import javax.xml.transform.TransformerException; | |
import javax.xml.transform.dom.DOMSource; | |
import org.htmlcleaner.HtmlCleaner; | |
import org.htmlcleaner.CleanerProperties; | |
import org.htmlcleaner.TagNode; | |
import org.htmlcleaner.DomSerializer; | |
import org.htmlcleaner.PrettyXmlSerializer; | |
class CleanHtmlURIResolver implements URIResolver | |
{ | |
private final URIResolver baseResolver; | |
public final HtmlCleaner cleaner = new HtmlCleaner(); | |
public final CleanerProperties properties = cleaner.getProperties(); | |
public CleanHtmlURIResolver(final URIResolver r) | |
{ | |
baseResolver = r; | |
} | |
public Source resolve(String href, String base) throws TransformerException | |
{ | |
//System.out.println("href="+href+" base="+base); | |
Source s = null; | |
String systemId; | |
if (base == null || base.length() == 0 || baseResolver == null) { | |
systemId = href; | |
} else { | |
s = baseResolver.resolve(href, base); | |
systemId = s.getSystemId(); | |
} | |
try { | |
//System.out.println("systemId="+systemId); | |
final TagNode rootTagNode = cleaner.clean(new java.net.URL(systemId)); | |
//new PrettyXmlSerializer(props).writeXmlToFile(rootTagNode, "tmp.xml"); | |
org.w3c.dom.Document document = new DomSerializer(properties).createDOM(rootTagNode); | |
s = new javax.xml.transform.dom.DOMSource(document, systemId); | |
} catch (Exception e) { | |
} | |
return s; | |
} | |
} | |
// vim: set ts=4 sw=4 sts=4 : |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import javax.xml.transform.TransformerFactory; | |
import javax.xml.transform.Transformer; | |
import javax.xml.transform.URIResolver; | |
import javax.xml.transform.stream.StreamResult; | |
import javax.xml.transform.stream.StreamSource; | |
import java.io.FileReader; | |
class XSLT { | |
public static void main(String[] args) throws Exception | |
{ | |
TransformerFactory factory = TransformerFactory.newInstance(); | |
URIResolver resolver = new CleanHtmlURIResolver(factory.getURIResolver()); | |
factory.setURIResolver(resolver); | |
Transformer transformer = factory.newTransformer(new StreamSource(new FileReader(args[0]))); | |
// http://osdir.com/ml/text.xml.resin.user/2003-06/msg00027.html | |
transformer.setURIResolver(resolver); | |
transformer.transform(resolver.resolve(args[1], ""), new StreamResult(System.out)); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
CleanHtmlURIResolver swallows exceptions (empty catch block). Is there better way how to handle exceptions? Should they be wrapped by TransformerException?