Created
May 3, 2013 13:04
-
-
Save bouzuya/5509002 to your computer and use it in GitHub Desktop.
はてなブックマークからブックマーク数をスクレイピング
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package info.bouzuya.hateb; | |
import java.io.IOException; | |
import java.util.List; | |
import org.jsoup.Jsoup; | |
import org.jsoup.nodes.Document; | |
import org.jsoup.nodes.Element; | |
import org.jsoup.nodes.TextNode; | |
import org.jsoup.select.Elements; | |
public class HatenaBookmark { | |
private final HatenaAuth auth; | |
public HatenaBookmark() { | |
this(null); | |
} | |
public HatenaBookmark(HatenaAuth auth) { | |
this.auth = auth; | |
} | |
public int getCount() { | |
if (auth == null) { | |
throw new IllegalStateException("need login"); | |
} | |
return getCount(this.auth.getUsername()); | |
} | |
public int getCount(String username) { | |
if (username == null || username.isEmpty()) { | |
throw new IllegalArgumentException("username is null or empty."); | |
} | |
String url = "http://b.hatena.ne.jp/" + username + "/"; | |
String cssQuery = ".sidebar .hatena-module-profile ul li"; | |
Document document = getDocument(url); | |
Elements elements = document.select(cssQuery); | |
for (Element element : elements) { | |
Element label = element.select("span.label").first(); | |
String labelText = (label == null ? "" : label.text()); | |
if (!labelText.equals("ブックマーク数")) { | |
continue; | |
} | |
List<TextNode> textNodes = element.textNodes(); | |
StringBuilder sb = new StringBuilder(); | |
for (TextNode textNode : textNodes) { | |
sb.append(textNode.text()); | |
} | |
String countString = sb.toString().trim(); | |
return Integer.parseInt(countString); | |
} | |
throw new IllegalStateException("not found"); | |
} | |
private Document getDocument(String url) { | |
try { | |
return Jsoup.connect(url).get(); | |
} catch (IOException e) { | |
throw new RuntimeException(e); | |
} | |
} | |
public static void main(String[] args) { | |
HatenaBookmark hateb = new HatenaBookmark(); | |
System.out.println(hateb.getCount("bouzuya")); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Jsoup の使用例を兼ねている。