October 10, 2010
// g100pon #83 webスクレイピング(nekohtmlを内部で使っているHtmlUnit使用)
// HtmlUnit are using nekohtml under the hood.
import com.gargoylesoftware.htmlunit.BrowserVersion;
import com.gargoylesoftware.htmlunit.WebClient;
proxyHost = null // set the proxy host name if you are in Firewall
proxyPort = 18080
if (proxyHost != null) {
c = new WebClient(BrowserVersion.FIREFOX_3, proxyHost , proxyPort);
else {
c = new WebClient(BrowserVersion.FIREFOX_3);
ins = new BufferedReader(new InputStreamReader(
userid = null
password = null
if (System.console() != null) {
userid = System.console().readLine("Gmail Mail Address: ")
password = new String(System.console().readPassword("Password: "))
else {
print "Gmail Mail Address: "
userid = ins.readLine()
print "Password: "
password = ins.readLine()
url = ""
loginPage = c.getPage(new URL(url));
loginPage.getElementById("Email").valueAttribute = userid
loginPage.getElementById("Passwd").valueAttribute = password
threadsPage = loginPage.getElementByName("signIn").click()
while (true) {
threads = threadsPage.getByXPath("//a[starts-with(@id,'subj')]")
threads.eachWithIndex { it,idx->
println "[$idx]"+it.asText()
println "[p]rev [n]ext [q]uit"
print "> "
def cmd = ins.readLine()
switch (cmd) {
case ~/([0-9]+)/:
i = java.util.regex.Matcher.getLastMatcher().group(1) as int
thread = threads[i].click()
println thread.asText()
case ~/p/:
next = threadsPage.getByXPath("//a[@id='thn']")
if (next.size() != 0) {
threadsPage = next[0].click()
case ~/n/:
prev = threadsPage.getByXPath("//a[@id='tho']")
if (prev.size() != 0) {
threadsPage = prev[0].click()
case ~/q/:
