Skip to content

Instantly share code, notes, and snippets.

@lacucaracha-jp
Created February 20, 2016 03:35
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save lacucaracha-jp/fd0e7d857d6041ed53c4 to your computer and use it in GitHub Desktop.
Save lacucaracha-jp/fd0e7d857d6041ed53c4 to your computer and use it in GitHub Desktop.
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.math.BigDecimal;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLEncoder;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.List;
import java.util.Locale;
import net.arnx.jsonic.JSON;
import org.apache.xmlrpc.client.XmlRpcClient;
import org.apache.xmlrpc.client.XmlRpcClientConfigImpl;
import org.jsoup.Jsoup;
/*
* To change this license header, choose License Headers in Project Properties.
* To change this template file, choose Tools | Templates
* and open the template in the editor.
*/
/**
*
* @author Amano
*/
public class HatenaBookmark {
static InputStream in = null;
static HttpURLConnection hconn = null;
static String url = null;
static InputStream is = null;
static String str = null;
static org.jsoup.nodes.Document document;
static PreparedStatement pstmt;
static java.sql.Connection sconn = null;
static ResultSet rs;
static SimpleDateFormat bookmarkformat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssXXX", Locale.ENGLISH);
public static void main(String[] args) {
try {
String dbUrl = "jdbc:sqlite:" + args[2];
String driver = "org.sqlite.JDBC";
DriverManager.setLoginTimeout(5000);
Class.forName(driver).newInstance();
sconn = DriverManager.getConnection(dbUrl);
sconn.setAutoCommit(false);
pstmt = sconn.prepareStatement("delete from bookmarks");
pstmt.executeUpdate();
pstmt = sconn.prepareStatement("delete from pages");
pstmt.executeUpdate();
pstmt = sconn.prepareStatement("delete from hatena_id");
pstmt.executeUpdate();
pstmt = sconn.prepareStatement("delete from blog_subscriber_id");
pstmt.executeUpdate();
sconn.commit();
System.out.println("処理開始");
getHatenaIdData(args[0]);
for (int i = 0; i < Integer.parseInt(args[1]); i++) {
System.out.println(i + ":" + recentUpdate());
getBlogSubscriber(recentUpdate());
}
} catch (Exception e) {
System.out.println(e);
}
}
static void insertBookmarkData(String burl, int pageCnt) {
for (int i = 0; i <= pageCnt - 1; i++) {
try {
url = "http://b.hatena.ne.jp/entrylist?sort=hot&layout=headline&url=" + burl + "&of=" + i * 20;
document = Jsoup.connect(url).timeout(100000).get();
for (org.jsoup.nodes.Element element : document.select("li.entrylist-unit")) {
try {
url = URLEncoder.encode(element.getElementsByTag("a").attr("href"), "utf-8");
url = "http://b.hatena.ne.jp/entry/jsonlite/?url=" + url;
hconn = (HttpURLConnection) new URL(url).openConnection();//サイトに接続
hconn.setRequestMethod("GET");//プロトコルの設定
hconn.setConnectTimeout(100000);
in = hconn.getInputStream();//ファイルを開く
str = convertString(in);//1行読み取り
Page page = JSON.decode(str, Page.class);
pstmt = sconn.prepareStatement("select * from PAGES where eid = ?");
pstmt.setString(1, page.getEid());
rs = pstmt.executeQuery();
if (!rs.next()) {
for (Bookmark bookmark : page.getBookmarks()) {
System.out.println(page.getTitle() + "," + bookmark.getUser() + "," + bookmark.getComment());
pstmt = sconn.prepareStatement("insert into BOOKMARKS(URL,EID,BOOKMARKUSER,STARCOUNT,TIMESTAMP,COMMENT,TAG) values(?,?,?,?,?,?,?) ");
pstmt.setString(1, page.getUrl());
pstmt.setString(2, page.getEid());
pstmt.setString(3, bookmark.getUser());
pstmt.setInt(4, 0);
pstmt.setDate(5, new java.sql.Date(bookmark.getTimestamp().getTime()));
pstmt.setString(6, bookmark.getComment());
pstmt.setString(7, bookmark.getTags());
pstmt.executeUpdate();
}
pstmt = sconn.prepareStatement("insert into PAGES(DATE,CATEGORY,EID,ENTRYRANK,COUNT,URL,TITLE) values(?,?,?,?,?,?,?) ");
pstmt.setDate(1, null);
pstmt.setString(2, element.select("li.category").text());
pstmt.setString(3, page.getEid());
pstmt.setBigDecimal(4, new BigDecimal(0));
pstmt.setBigDecimal(5, new BigDecimal(page.getCount()));
pstmt.setString(6, page.getUrl());
pstmt.setString(7, page.getTitle());
pstmt.executeUpdate();
sconn.commit();
}
} catch (Exception e) {
System.out.println(e.toString());
}
}
} catch (Exception e) {
System.out.println(e.toString());
}
}
}
static void getBlogSubscriber(String blogurl) {
try {
String furl;
url = blogurl + "/about";
document = Jsoup.connect(url).timeout(100000).get();
//読者情報の取得
for (org.jsoup.nodes.Element element : document.select("div.info").select("img.profile-icon")) {
pstmt = sconn.prepareStatement("insert into BLOG_SUBSCRIBER_ID(URL,SUBSCRIBER_ID) values(?,?) ");
pstmt.setString(1, blogurl);
pstmt.setString(2, element.getElementsByTag("img").attr("title"));
pstmt.executeUpdate();
getHatenaIdData(element.getElementsByTag("img").attr("title"));
try {
} catch (Exception e) {
System.out.println(e.toString());
}
}
//読者情報の取得
int subscriber = 0;
try {
document = Jsoup.connect(url).timeout(100000).get();
subscriber = Integer.parseInt(document.select("span.about-subscription-count").text().replaceAll(" 人", "").replaceAll(" people", ""));
} catch (Exception e) {
subscriber = 0;
}
//feedly情報の取得
String feedlystr = "";
int feedlycnt = 0;
try {
furl = "http://cloud.feedly.com/v3/search/feeds?query=" + URLEncoder.encode(blogurl, "utf-8");
hconn = (HttpURLConnection) new URL(furl).openConnection();//サイトに接続
hconn.setRequestMethod("GET");//プロトコルの設定
hconn.setConnectTimeout(100000);
in = hconn.getInputStream();//ファイルを開く
feedlystr = convertString(in);//1行読み取り
feedlystr = feedlystr.replaceAll(".*\"subscribers\":", "").replaceAll(",\"lastUpdated\".*", "");
feedlycnt = Integer.parseInt(feedlystr);
} catch (Exception e) {
System.out.println(e.toString());
}
insertBookmarkData(blogurl, 5);
pstmt = sconn.prepareStatement("update HATENA_ID set BLOG_COUNT=?,FEEDLY_COUNT=? where BLOG_URL=?");
pstmt.setInt(1, subscriber);
pstmt.setInt(2, feedlycnt);
pstmt.setString(3, blogurl);
pstmt.executeUpdate();
sconn.commit();
} catch (Exception e) {
System.out.println(e.toString());
}
}
static void getHatenaIdData(String HatenaId) {
java.sql.Time postdate = null;
String url = null;
String blogurl = null;
String blogtitle = null;
int hatenacnt = 0;
try {
pstmt = sconn.prepareStatement("select * from HATENA_ID where ID = ?");
pstmt.setString(1, HatenaId);
rs = pstmt.executeQuery();
if (!rs.next()) {
try {
url = "http://blog.hatena.ne.jp/" + HatenaId;
document = Jsoup.connect(url).timeout(100000).get();
blogurl = document.getElementsByTag("HTML").attr("data-blogs-uri-base");
blogtitle = document.getElementsByTag("TITLE").text();
postdate = new java.sql.Time(bookmarkformat.parse(document.getElementsByTag("TIME").first().attr("datetime")).getTime());
hatenacnt = getCount(blogurl);
} catch (Exception e) {
}
pstmt = sconn.prepareStatement("insert into HATENA_ID(ID,BLOG_URL,LAST_POST,BLOG_TITLE,BLOG_COUNT,FEEDLY_COUNT,HATENA_COUNT,LAST_UPDATE)"
+ " values(?,?,?,?,?,?,?,?) ");
pstmt.setString(1, HatenaId); //1.ID
pstmt.setString(2, blogurl); //2.BLOG_URL
pstmt.setTime(3, postdate); //3.LAST_POST
pstmt.setString(4, blogtitle); //4.BLOG_TITLE
pstmt.setInt(5, 0); //5.BLOG_COUNT
pstmt.setInt(6, 0); //6.FEEDLY_COUNT
pstmt.setInt(7, hatenacnt); //7.HATENA_COUNT
pstmt.setTime(8, null); //8.LAST_UPDATE
pstmt.executeUpdate();
sconn.commit();
System.out.println(blogtitle + ":" + blogurl);
}
} catch (Exception e) {
System.out.println(e.toString());
}
}
static String convertString(InputStream is) throws IOException {
BufferedReader reader = new BufferedReader(new InputStreamReader(is, "UTF-8"));
StringBuilder sb = new StringBuilder();
char[] b = new char[1024];
int line;
while (0 <= (line = reader.read(b))) {
sb.append(b, 0, line);
}
return sb.toString();
}
static int getCount(String burl) {
String API_URL = "http://b.hatena.ne.jp/xmlrpc";
String API_METHOD_NAME = "bookmark.getTotalCount";
String URLS[] = {burl};
XmlRpcClientConfigImpl config = new XmlRpcClientConfigImpl();
try {
config.setServerURL(new URL(API_URL));
} catch (MalformedURLException e) {
e.printStackTrace();
return 0;
}
XmlRpcClient client = new XmlRpcClient();
client.setConfig(config);
try {
Object result = client.execute(API_METHOD_NAME, URLS);
return Integer.parseInt(result.toString());
} catch (Exception e) {
e.printStackTrace();
}
return 0;
}
static String recentUpdate() {
String BLOG_URL = "";
try {
pstmt = sconn.prepareStatement("select t1.BLOG_URL from HATENA_ID t1 where t1.BLOG_COUNT = 0 and t1.LAST_POST is not null order by HATENA_COUNT desc");
rs = pstmt.executeQuery();
rs.next();
BLOG_URL = rs.getString("BLOG_URL");
} catch (Exception e) {
System.out.println(e.toString());
}
return BLOG_URL;
}
}
class Bookmark {
private Date timestamp;
private String comment;
private String user;
private String tag;
public Date getTimestamp() {
return timestamp;
}
public String getComment() {
return comment;
}
public String getUser() {
return user;
}
public void setTimestamp(Date timestamp) {
this.timestamp = timestamp;
}
public void setComment(String comment) {
this.comment = comment;
}
public void setUser(String user) {
this.user = user;
}
public String getTags() {
return tag;
}
public void setTags(String tag) {
this.tag = tag;
}
}
class Page {
public int count;
public String url;
public String eid;
public String title;
public String screenshot;
public String entry_url;
private List<Bookmark> bookmarkList;
public void setBookmarks(List<Bookmark> bookmarkList) {
this.bookmarkList = bookmarkList;
}
public List<Bookmark> getBookmarks() {
return bookmarkList;
}
public int getCount() {
return count;
}
public String getUrl() {
return url;
}
public String getEid() {
return eid;
}
public String getTitle() {
return title;
}
public String getScreenshot() {
return screenshot;
}
public String getEntry_url() {
return entry_url;
}
public void setCount(int count) {
this.count = count;
}
public void setUrl(String url) {
this.url = url;
}
public void setEid(String eid) {
this.eid = eid;
}
public void setTitle(String title) {
this.title = title;
}
public void setScreenshot(String screenshot) {
this.screenshot = screenshot;
}
public void setEntry_url(String entry_url) {
this.entry_url = entry_url;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment