Skip to content

Instantly share code, notes, and snippets.

@fumokmm
Created October 18, 2009 01:53
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save fumokmm/212517 to your computer and use it in GitHub Desktop.
Save fumokmm/212517 to your computer and use it in GitHub Desktop.
はてなハイクキーワードサーチ用
// 正規表現版で実行
getKeywordList1('', 3000).with {
println "${it.size()}件"
it.each { li ->
println "[${li.title}](${li.entryCount}) ${li.url}"
}
}
// XmlSlurper版で実行
getKeywordList2('', 3000).with {
println "${it.size()}件"
it.each { li ->
println "[${li.title}](${li.entryCount}) ${li.url}"
}
}
/** 正規表現版 */
def getKeywordList1(String word = '', int page = 1) {
def text = new URL("http://h.hatena.ne.jp/keywords.body?word=${word}&page=${page}").getText('UTF-8')
def reg = /(?s)<li>.*?<a href="(.+?)" class="keyword">(.+?)<\/a>.*?(?:<span class="entry-count">\((\d+?)\)<\/span>)?.*?<\/li>/
def result = []
text.findAll(reg){ _0, _1, _2, _3 ->
result << [
'url' : 'http://h.hatena.ne.jp' + _1,
'title' : _2,
'entryCount' : _3 ? _3 : 0
]
}
result
}
/** XmlSlurper版 */
def getKeywordList2(String word = '', int page = 1) {
def text = new URL("http://h.hatena.ne.jp/keywords.body?word=${word}&page=${page}").getText('UTF-8')
text = """<?xml version=\"1.0\" encoding=\"utf-8\" ?>
<keyword_list>
${text}
</keyword_list>
"""
def html = new XmlSlurper().parseText(text)
def lists = html.li
def result = []
lists.each { li ->
result << [
'url' : 'http://h.hatena.ne.jp' + li.a.@href,
'title' : li.a.text(),
'entryCount' : li.span.size() ? li.span.text() : 0
]
}
result
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment