Skip to content

Instantly share code, notes, and snippets.

@mocobeta
Created June 30, 2012 14:40
Show Gist options
  • Save mocobeta/3024041 to your computer and use it in GitHub Desktop.
Save mocobeta/3024041 to your computer and use it in GitHub Desktop.
Lucene入門 4章 インデックス検索プログラム - Lucene 3.6 バージョン
package searcher;
import java.io.File;
import java.io.IOException;
import java.lang.reflect.Constructor;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.TotalHitCountCollector;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
public abstract class BaseSearcher {
protected static final String DEFAULT_ANALYZER =
"org.apache.lucene.analysis.ja.JapaneseAnalyzer";
protected static final String GOSEN_ANALYZER =
"org.apache.lucene.analysis.gosen.GosenAnalyzer";
protected static final String CJK_ANALYZER =
"org.apache.lucene.analysis.cjk.CJKAnalyzer";
protected static final String PROP_ANALYZER = "analyzer";
protected Analyzer analyzer;
protected String indexDir;
private String analyzerClass;
protected BaseSearcher(String indexDir, String analyzerClass) {
this.indexDir = indexDir;
this.analyzerClass = analyzerClass;
}
/** Analyzerを取得する */
protected Analyzer getAnalyzer() {
try {
if (analyzer == null) {
// Analyzerクラスを生成
Class clazz = getClass().getClassLoader().loadClass(analyzerClass);
Constructor<Analyzer> constructor = clazz.getConstructor(Version.class);
Object[] args = new Object[]{ Version.LUCENE_36 };
analyzer = constructor.newInstance(args);
System.out.println("* Analyzer : " + analyzerClass);
}
return analyzer;
} catch (Exception e) {
throw new SearcherException(e);
}
}
/** インデックス格納先Directoryを取得する */
protected Directory getDirectory() {
try {
Directory d = FSDirectory.open(new File(indexDir));
return d;
} catch (IOException e) {
throw new SearcherException(e);
}
}
/** 指定されたQueryでインデックスを検索する */
protected void searchIndex(Query query) {
System.out.println("Query = \"" + query + "\"");
IndexReader reader = null;
IndexSearcher searcher = null;
try {
// IndexReader作成
reader = IndexReader.open(getDirectory());
// IndexReaderを与えてIndexSearcherを作成
searcher = new IndexSearcher(reader);
// Queryで検索されるトータルドキュメント数を取得する
TotalHitCountCollector total = new TotalHitCountCollector();
searcher.search(query, total);
int totalHits = total.getTotalHits();
// ランキング上位(最大)10件のドキュメント取得する
TopDocs docs = searcher.search(query, 10);
ScoreDoc[] hits = docs.scoreDocs;
System.out.println(Integer.toString(totalHits) + " 件中 " + Integer.toString(hits.length) + " 件取得しました。");
// ヒットしたドキュメントを表示する
for (ScoreDoc hit : hits) {
// ScoreDocオブジェクトはドキュメントIDしか持っていないので、searcherを経由してドキュメント本体を取得
Document doc = searcher.doc(hit.doc);
printDocument(doc);
System.out.println(Integer.toString(hit.doc) + ", スコア = " + Float.toString(hit.score));
}
} catch (IOException e) {
throw new SearcherException(e);
} finally {
try {
if (searcher != null) {
searcher.close();
}
if (reader != null) {
reader.close();
}
} catch (IOException e) {}
}
}
protected abstract void printDocument(Document doc);
public static class SearcherException extends RuntimeException {
public SearcherException(Exception e) {
super(e);
}
}
}
package searcher;
import indexer.BookIndexer;
import org.apache.lucene.document.Document;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Query;
import org.apache.lucene.util.NumericUtils;
import org.apache.lucene.util.Version;
public class BookSearcher extends BaseSearcher {
public static void main(String[] args) {
BookSearcher bs = new BookSearcher("book-index/kuromoji", DEFAULT_ANALYZER);
QueryParser qp = new QueryParser(Version.LUCENE_36, BookIndexer.F_TITLE, bs.getAnalyzer());
try {
Query query = qp.parse("Java プログラミング");
bs.searchIndex(query);
} catch (ParseException e) {
throw new SearcherException(e);
}
}
protected BookSearcher(String indexDir, String analyzerClass) {
super(indexDir, analyzerClass);
}
@Override
protected void printDocument(Document doc) {
System.out.println("=============================================================================");
System.out.println(BookIndexer.F_PUBLISHER + " = " + doc.get(BookIndexer.F_PUBLISHER));
System.out.println(BookIndexer.F_CATEGORY + " = " + doc.get(BookIndexer.F_CATEGORY));
System.out.println(BookIndexer.F_TITLE + " = " + doc.get(BookIndexer.F_TITLE));
System.out.println(BookIndexer.F_AUTHOR + " = " + doc.get(BookIndexer.F_AUTHOR));
System.out.println(NumericUtils.prefixCodedToLong(doc.get(BookIndexer.F_PAGES)) + " ページ");
System.out.println(BookIndexer.F_ISBN + " = " + doc.get(BookIndexer.F_ISBN));
System.out.println(BookIndexer.F_DATE + " = " + doc.get(BookIndexer.F_DATE));
System.out.println(NumericUtils.prefixCodedToLong(doc.get(BookIndexer.F_PRICE)) + " 円");
System.out.println(BookIndexer.F_SUMMARY + " = " + doc.get(BookIndexer.F_SUMMARY));
}
}
http://mocobeta-backup.tumblr.com/post/26212093829/lucene-3-6
package searcher;
import indexer.PostIndexer;
import org.apache.lucene.document.Document;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Query;
import org.apache.lucene.util.Version;
public class PostSearcher extends BaseSearcher {
public static void main(String[] args) {
PostSearcher ps = new PostSearcher("post-index/kuromoji", DEFAULT_ANALYZER);
QueryParser qp = new QueryParser(Version.LUCENE_36, PostIndexer.F_NAME, ps.getAnalyzer());
try {
Query q = qp.parse("日立製作所");
ps.searchIndex(q);
} catch (ParseException e) {
throw new SearcherException(e);
}
}
protected PostSearcher(String indexDir, String analyzerClass) {
super(indexDir, analyzerClass);
}
@Override
protected void printDocument(Document doc) {
System.out.println("=============================================================================");
System.out.println(PostIndexer.F_CODE + " = " + doc.get(PostIndexer.F_CODE));
System.out.println(PostIndexer.F_KANA + " = " + doc.get(PostIndexer.F_KANA));
System.out.println(PostIndexer.F_NAME + " = " + doc.get(PostIndexer.F_NAME));
System.out.println(PostIndexer.F_ADDR + " = " + doc.get(PostIndexer.F_ADDR));
System.out.println(PostIndexer.F_CONTENT + " = " + doc.get(PostIndexer.F_CONTENT));
System.out.println(PostIndexer.F_INDZIP + " = " + doc.get(PostIndexer.F_INDZIP));
System.out.println(PostIndexer.F_ZIP + " = " + doc.get(PostIndexer.F_ZIP));
System.out.println(PostIndexer.F_POST + " = " + doc.get(PostIndexer.F_POST));
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment