Created
April 26, 2012 17:42
-
-
Save kaja47/2501227 to your computer and use it in GitHub Desktop.
Tagsoup Xpath
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import org.ccil.cowan.tagsoup.Parser | |
import org.xml.sax.InputSource | |
import javax.xml.transform | |
import java.net.URL | |
val url = new URL("http://boards.4chan.org/b/") | |
val reader = new Parser | |
reader.setFeature(Parser.namespacesFeature, false) | |
reader.setFeature(Parser.namespacePrefixesFeature, false) | |
val transformer = transform.TransformerFactory.newInstance.newTransformer | |
val result = new transform.dom.DOMResult | |
transformer.transform(new transform.sax.SAXSource(reader, new InputSource(url.openStream)), result) | |
val doc = result.getNode | |
val xpath = XPathFactory.newInstance.newXPath | |
val expr = xpath.compile("//table") | |
val ns = expr.evaluate(doc, XPathConstants.NODESET).asInstanceOf[NodeList] | |
for (i <- 0 until ns.getLength) println( ns.item(i).getTextContent ) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment