Skip to content

Instantly share code, notes, and snippets.

@sh2
Last active December 17, 2015 00:11
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save sh2/5519338 to your computer and use it in GitHub Desktop.
Save sh2/5519338 to your computer and use it in GitHub Desktop.
Wikipedia Route Search program
package wikipedia.searcher;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.Deque;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.Set;
import wikipedia.WikipediaException;
public class RouteSearcher {
public static final int MAX_DISTANCE = 6;
public static final String JDBC_USER = "wikipedia";
public static final String JDBC_PASS = "";
private static final String SELECT_TITLE_ALL_ID = "SELECT id FROM title WHERE redirect = 0";
private static final String SELECT_TITLE_TITLE = "SELECT title FROM title WHERE id = ?";
private TransparentLinkMap forwardLinkMap;
private TransparentLinkMap backwardLinkMap;
public static void main(String[] args) {
String url = null;
Connection connection = null;
RouteSearcher searcher = null;
PreparedStatement selectTitleTitle = null;
if (args.length != 1 && args.length != 3) {
System.err
.println("java wikipedia.searcher.RouteSearcher server [source] [destination]");
System.exit(1);
}
url = "jdbc:mysql://" + args[0] + "/wikipedia?useServerPrepStmts=true";
try {
connection = DriverManager.getConnection(url, JDBC_USER, JDBC_PASS);
searcher = new RouteSearcher(connection);
selectTitleTitle = connection.prepareStatement(SELECT_TITLE_TITLE);
if (args.length == 3) {
int source = Integer.parseInt(args[1]);
int destination = Integer.parseInt(args[2]);
List<Integer> route = searcher.search(source, destination);
System.out.println("SOURCE : [" + source + "]"
+ searcher.getTitle(selectTitleTitle, source) + ", DESTINATION : ["
+ destination + "]" + searcher.getTitle(selectTitleTitle, destination));
searcher.printRoute(selectTitleTitle, route);
} else {
searcher.randomTest(connection, selectTitleTitle);
}
} catch (SQLException e) {
e.printStackTrace();
} catch (WikipediaException e) {
e.printStackTrace();
} finally {
if (selectTitleTitle != null) {
try {
selectTitleTitle.close();
} catch (SQLException e) {
// 何もしない
}
}
if (searcher != null) {
searcher.close();
}
if (connection != null) {
try {
connection.rollback();
} catch (SQLException e) {
// 何もしない
}
try {
connection.close();
} catch (SQLException e) {
// 何もしない
}
}
}
}
public RouteSearcher(Connection connection) throws WikipediaException {
this.forwardLinkMap = new TransparentLinkMap(connection, true);
this.backwardLinkMap = new TransparentLinkMap(connection, false);
}
public List<Integer> search(int source, int destination) throws WikipediaException {
forwardLinkMap.resetCounter();
backwardLinkMap.resetCounter();
int distance = 0;
int intersection = 0;
boolean isFound = false;
Deque<Integer> deque = new ArrayDeque<Integer>();
Map<Integer, Integer> forwardDestinationMap = new HashMap<Integer, Integer>();
forwardDestinationMap.put(source, 0);
Set<Integer> forwardNextSet = new HashSet<Integer>();
forwardNextSet.add(source);
Map<Integer, Integer> backwardDestinationMap = new HashMap<Integer, Integer>();
backwardDestinationMap.put(destination, 0);
Set<Integer> backwardNextSet = new HashSet<Integer>();
backwardNextSet.add(destination);
if (backwardNextSet.contains(source)) {
// 距離0
intersection = source;
isFound = true;
} else {
SEARCH: for (distance = 1; distance <= MAX_DISTANCE; distance++) {
Set<Integer> workingSet = null;
// 頂点数の少ない方から探索する
if (forwardNextSet.size() < backwardNextSet.size()) {
// 前方探索
workingSet = forwardNextSet;
forwardNextSet = new HashSet<Integer>();
for (int id1 : workingSet) {
for (int id2 : forwardLinkMap.get(id1)) {
if (!forwardDestinationMap.containsKey(id2)) {
forwardDestinationMap.put(id2, id1);
forwardNextSet.add(id2);
}
if (backwardNextSet.contains(id2)) {
// backward側の先端に触れたら探索終了
intersection = id2;
isFound = true;
break SEARCH;
}
}
}
if (forwardNextSet.isEmpty()) {
// 経路が塞がっていたら探索終了
break SEARCH;
}
} else {
// 後方探索
workingSet = backwardNextSet;
backwardNextSet = new HashSet<Integer>();
for (int id2 : workingSet) {
for (int id1 : backwardLinkMap.get(id2)) {
if (!backwardDestinationMap.containsKey(id1)) {
backwardDestinationMap.put(id1, id2);
backwardNextSet.add(id1);
}
if (forwardNextSet.contains(id1)) {
// forward側の先端に触れたら探索終了
intersection = id1;
isFound = true;
break SEARCH;
}
}
}
if (backwardNextSet.isEmpty()) {
// 経路が塞がっていたら探索終了
break SEARCH;
}
}
}
}
if (isFound) {
deque.offerFirst(intersection);
// 前方の経路取得
int parent = forwardDestinationMap.get(intersection);
while (parent != 0) {
deque.offerFirst(parent);
parent = forwardDestinationMap.get(parent);
}
// 後方の経路取得
parent = backwardDestinationMap.get(intersection);
while (parent != 0) {
deque.offerLast(parent);
parent = backwardDestinationMap.get(parent);
}
}
return new ArrayList<Integer>(deque);
}
public void close() {
forwardLinkMap.close();
backwardLinkMap.close();
}
private void randomTest(Connection connection, PreparedStatement selectTitle)
throws WikipediaException {
List<Integer> idList = new ArrayList<Integer>();
Random random = new Random();
Statement selectTitleAllId = null;
ResultSet resultSet = null;
try {
selectTitleAllId = connection.createStatement();
resultSet = selectTitleAllId.executeQuery(SELECT_TITLE_ALL_ID);
while (resultSet.next()) {
idList.add(resultSet.getInt(1));
}
resultSet.close();
selectTitleAllId.close();
while (true) {
int source = idList.get(random.nextInt(idList.size()));
int destination = idList.get(random.nextInt(idList.size()));
List<Integer> route = search(source, destination);
System.out.println("SOURCE : [" + source + "]" + getTitle(selectTitle, source)
+ ", DESTINATION : [" + destination + "]"
+ getTitle(selectTitle, destination));
printRoute(selectTitle, route);
System.out.println();
}
} catch (SQLException e) {
throw new WikipediaException(e);
} finally {
if (resultSet != null) {
try {
resultSet.close();
} catch (SQLException e) {
// 何もしない
}
}
if (selectTitleAllId != null) {
try {
selectTitleAllId.close();
} catch (SQLException e) {
// 何もしない
}
}
}
}
private void printRoute(PreparedStatement selectTitle, List<Integer> route)
throws WikipediaException {
if (route.size() == 0) {
System.out.println("NOT FOUND");
} else {
boolean isFirst = true;
System.out.print("FOUND, DISTANCE : " + (route.size() - 1) + ", ");
for (int id : route) {
if (!isFirst) {
System.out.print(" => ");
}
isFirst = false;
System.out.print("[" + id + "]" + getTitle(selectTitle, id));
}
System.out.println();
}
System.out.println("FORWARD, " + forwardLinkMap.getStatistics());
System.out.println("BACKWARD, " + backwardLinkMap.getStatistics());
}
private String getTitle(PreparedStatement selectTitle, int id) throws WikipediaException {
String title = null;
ResultSet resultSet = null;
try {
selectTitle.clearParameters();
selectTitle.setInt(1, id);
resultSet = selectTitle.executeQuery();
if (resultSet.next()) {
title = resultSet.getString(1);
}
} catch (SQLException e) {
throw new WikipediaException(e);
} finally {
if (resultSet != null) {
try {
resultSet.close();
} catch (SQLException e) {
// 何もしない
}
}
}
return title;
}
}
package wikipedia.searcher;
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import wikipedia.WikipediaException;
/**
* linkテーブルをHashMapのように扱うためのクラスです。
* linkテーブルの全データをMapに格納せず必要に応じてデータベースから
* 取得するとともに、高速化のため一定数のデータをキャッシュします。
*
* @author Sadao Hiratsuka
*/
public class TransparentLinkMap {
private static final String SELECT_LINK_FORWARD_ID = "SELECT id2 FROM link WHERE id1 = ?";
private static final String SELECT_LINK_BACKWARD_ID = "SELECT id1 FROM link WHERE id2 = ?";
private Map<Integer, List<Integer>> cache;
private PreparedStatement selectLinkId;
private long getCount;
private long sqlCount;
public TransparentLinkMap(Connection connection, boolean isForward) throws WikipediaException {
this.cache = new LinkCache();
try {
if (isForward) {
this.selectLinkId = connection.prepareStatement(SELECT_LINK_FORWARD_ID);
} else {
this.selectLinkId = connection.prepareStatement(SELECT_LINK_BACKWARD_ID);
}
} catch (SQLException e) {
if (selectLinkId != null) {
try {
selectLinkId.close();
} catch (SQLException e2) {
// 何もしない
}
}
throw new WikipediaException(e);
}
}
public List<Integer> get(Integer key) throws WikipediaException {
getCount++;
if (cache.containsKey(key)) {
return cache.get(key);
} else {
sqlCount++;
ArrayList<Integer> value = new ArrayList<Integer>();
ResultSet resultSet = null;
try {
selectLinkId.clearParameters();
selectLinkId.setInt(1, key);
resultSet = selectLinkId.executeQuery();
while (resultSet.next()) {
value.add(resultSet.getInt(1));
}
value.trimToSize();
cache.put(key, value);
} catch (SQLException e) {
if (selectLinkId != null) {
try {
selectLinkId.close();
} catch (SQLException e2) {
// 何もしない
}
}
throw new WikipediaException(e);
} finally {
if (resultSet != null) {
try {
resultSet.close();
} catch (SQLException e) {
// 何もしない
}
}
}
return value;
}
}
public void resetCounter() {
this.getCount = 0;
this.sqlCount = 0;
}
public String getStatistics() {
int hitRate = 0;
if (getCount != 0) {
hitRate = (int) (100L * (getCount - sqlCount) / getCount);
}
return "GET_COUNT : " + getCount + ", SQL_COUNT : " + sqlCount + ", HIT_RATE : " + hitRate
+ " CACHE_SIZE : " + cache.size();
}
public void close() {
if (selectLinkId != null) {
try {
selectLinkId.close();
} catch (SQLException e) {
// 何もしない
}
}
}
private class LinkCache extends LinkedHashMap<Integer, List<Integer>> {
public static final int CACHE_SIZE = 10000;
private static final long serialVersionUID = 1L;
public LinkCache() {
super(CACHE_SIZE, 1.0f, true);
}
@Override
protected boolean removeEldestEntry(java.util.Map.Entry<Integer, List<Integer>> eldest) {
if (size() > CACHE_SIZE) {
return true;
} else {
return false;
}
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment