Skip to content

Instantly share code, notes, and snippets.

@bouzuya
Created May 3, 2013 13:04
Show Gist options
  • Save bouzuya/5509002 to your computer and use it in GitHub Desktop.
Save bouzuya/5509002 to your computer and use it in GitHub Desktop.
はてなブックマークからブックマーク数をスクレイピング
package info.bouzuya.hateb;
import java.io.IOException;
import java.util.List;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.nodes.TextNode;
import org.jsoup.select.Elements;
public class HatenaBookmark {
private final HatenaAuth auth;
public HatenaBookmark() {
this(null);
}
public HatenaBookmark(HatenaAuth auth) {
this.auth = auth;
}
public int getCount() {
if (auth == null) {
throw new IllegalStateException("need login");
}
return getCount(this.auth.getUsername());
}
public int getCount(String username) {
if (username == null || username.isEmpty()) {
throw new IllegalArgumentException("username is null or empty.");
}
String url = "http://b.hatena.ne.jp/" + username + "/";
String cssQuery = ".sidebar .hatena-module-profile ul li";
Document document = getDocument(url);
Elements elements = document.select(cssQuery);
for (Element element : elements) {
Element label = element.select("span.label").first();
String labelText = (label == null ? "" : label.text());
if (!labelText.equals("ブックマーク数")) {
continue;
}
List<TextNode> textNodes = element.textNodes();
StringBuilder sb = new StringBuilder();
for (TextNode textNode : textNodes) {
sb.append(textNode.text());
}
String countString = sb.toString().trim();
return Integer.parseInt(countString);
}
throw new IllegalStateException("not found");
}
private Document getDocument(String url) {
try {
return Jsoup.connect(url).get();
} catch (IOException e) {
throw new RuntimeException(e);
}
}
public static void main(String[] args) {
HatenaBookmark hateb = new HatenaBookmark();
System.out.println(hateb.getCount("bouzuya"));
}
}
@bouzuya
Copy link
Author

bouzuya commented May 3, 2013

Jsoup の使用例を兼ねている。

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment