Skip to content

Instantly share code, notes, and snippets.

@bouzuya
Last active December 17, 2015 00:19
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save bouzuya/5520460 to your computer and use it in GitHub Desktop.
Save bouzuya/5520460 to your computer and use it in GitHub Desktop.
ブクログから「読み終えた」冊数を取得するスクレイパーをJavaで
package info.bouzuya.booklog;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.nodes.TextNode;
import org.jsoup.select.Elements;
public class Booklog {
public int getReadCount(String username) {
String url = "http://booklog.jp/users/" + username;
Document document = getDocument(url);
Map<Status, Integer> counts = getCounts(document);
return counts.get(Status.READ).intValue();
}
private Document getDocument(String url) {
try {
return Jsoup.connect(url).get();
} catch (IOException e) {
throw new RuntimeException(e);
}
}
private Map<Status, Integer> getCounts(Document document) {
Map<Status, Integer> counts = new HashMap<Status, Integer>();
Elements conditions = document.select("div.box h5");
for (Element condition : conditions) {
String conditionText = condition.text();
if (!conditionText.equals("読書状況")) {
continue;
}
Element ul = condition.nextElementSibling();
Elements children = ul.children();
for (Element child : children) {
Element childLabel = child.children().first();
String statusString = childLabel.text();
Status key = Status.parse(statusString);
StringBuilder sb = new StringBuilder();
for (TextNode tn : child.textNodes()) {
sb.append(tn.text());
}
String valueWithParen = sb.toString();
Pattern pattern = Pattern.compile("^\\s*\\((\\d+)\\)\\s*$");
Matcher matcher = pattern.matcher(valueWithParen);
if (!matcher.matches()) {
throw new IllegalStateException("no match found");
}
String valueAsString = matcher.group(1);
Integer value = Integer.valueOf(valueAsString);
counts.put(key, value);
}
}
return counts;
}
public static void main(String[] args) {
Booklog booklog = new Booklog();
System.out.println(booklog.getReadCount("bouzuya"));
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment