Created
November 5, 2011 12:23
-
-
Save nobusue/1341448 to your computer and use it in GitHub Desktop.
Wikipedia scraping: groovy wikipedia.groovy <keyword> キャッシュ機能付き
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
@Grab(group='org.jsoup', module='jsoup', version='1.6.1') | |
import org.jsoup.* | |
def keyword = 'Groovy' | |
if(args && args[0]) keyword = args[0] | |
def home = System.getProperty('user.home') | |
def tmp = home + '/tmp' | |
println getOrRestore(keyword, tmp) | |
def getOrRestore(name, tmp){ | |
def file = new File("${tmp}/${name}Exp.txt") | |
if(file.exists()){ | |
return file.text | |
} else { | |
def exp = Jsoup.connect("http://ja.wikipedia.org/wiki/${URLEncoder.encode(name,'UTF-8')}").get().text() | |
file.withWriter{ it.print exp } | |
return exp | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment