Skip to content

Instantly share code, notes, and snippets.

@maxpert
Created October 8, 2012 16:59
Show Gist options
  • Save maxpert/3853598 to your computer and use it in GitHub Desktop.
Save maxpert/3853598 to your computer and use it in GitHub Desktop.
Lucene redis example
package mxp;
import java.io.IOException;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.NumericField;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.util.Version;
public class DumpIndexWriter {
private Directory dir;
private IndexWriter writer;
public DumpIndexWriter(Directory d) {
dir = d;
}
public void open() throws CorruptIndexException, LockObtainFailedException, IOException {
Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_36);
IndexWriterConfig writerConfig = new IndexWriterConfig(
Version.LUCENE_36, analyzer);
writer = new IndexWriter(dir, writerConfig);
}
public boolean parseAndIndex(String line){
//3281929|MacGyver.S01E02.DVDRip.XviD-MEDiEVAL|376764032|1|0|6e43d748d9446e3cddec69ce9b2ababb51bbf827
String[] fields = line.split("\\|");
if( fields.length < 6 ) return false;
Document doc = new Document();
System.out.printf("%s -> %s \n", fields[1], fields[5]);
doc.add(new Field("title", fields[1], Field.Store.YES, Field.Index.ANALYZED));
doc.add(new Field("link", fields[5], Field.Store.YES, Field.Index.NOT_ANALYZED));
try {
writer.addDocument(doc);
} catch (Exception e) {
e.printStackTrace();
return false;
}
return true;
}
public void close() throws CorruptIndexException, IOException {
writer.close();
}
}
package mxp;
import java.io.BufferedReader;
import java.io.DataInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.List;
import java.util.zip.CRC32;
import mxp.lucene.store.RedisDirectory;
import mxp.lucene.store.RedisFile;
import org.apache.commons.pool.impl.GenericObjectPool;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopScoreDocCollector;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.Version;
import redis.clients.jedis.JedisShardInfo;
import redis.clients.jedis.ShardedJedisPool;
public class Main {
/**
* @param args
* @throws IOException
* @throws CorruptIndexException
* @throws ParseException
* @throws InterruptedException
*/
public static void main(String[] args) throws CorruptIndexException,
IOException, ParseException, InterruptedException {
List<JedisShardInfo> shards = new ArrayList<JedisShardInfo>();
JedisShardInfo si = new JedisShardInfo("localhost", 6379);
JedisShardInfo si2 = new JedisShardInfo("localhost", 6389);
JedisShardInfo si3 = new JedisShardInfo("localhost", 6399);
shards.add(si);
shards.add(si2);
shards.add(si3);
ShardedJedisPool pool = new ShardedJedisPool(new GenericObjectPool.Config(), shards);
RedisDirectory redisDir = new RedisDirectory("piratebay", pool);
FSDirectory fsDir = FSDirectory.open(new File("/Users/maxpert/labs/pbindex"));
long start = System.currentTimeMillis();
System.out.println("Indexing in Redis...");
DumpPirateBay(redisDir);
System.out.printf("Redis indexing took %d...", System.currentTimeMillis() - start);
Thread.sleep(5000);
//Test searching ;)
search( redisDir, "title", "batman" );
pool.destroy();
}
private static void DumpPirateBay(Directory redisDir)
throws CorruptIndexException, LockObtainFailedException, IOException
{
DumpIndexWriter writer = new DumpIndexWriter(redisDir);
FileInputStream fstream = new FileInputStream("/Users/maxpert/labs/complete");
DataInputStream in = new DataInputStream(fstream);
BufferedReader br = new BufferedReader(new InputStreamReader(in));
String strLine;
writer.open();
while ((strLine = br.readLine()) != null) {
writer.parseAndIndex(strLine);
}
writer.close();
in.close();
}
private static void search(Directory index, String field, String query)
throws ParseException, CorruptIndexException, IOException
{
int hitsPerPage = 10;
Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_36);
QueryParser qp = new QueryParser(Version.LUCENE_36, field, analyzer);
Query q = qp.parse(query);
IndexReader reader = IndexReader.open(index);
IndexSearcher searcher = new IndexSearcher(reader);
TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage, true);
searcher.search(q, collector);
ScoreDoc[] hits = collector.topDocs().scoreDocs;
System.out.println("Found " + hits.length + " hits.");
for(int i=0;i<hits.length;++i) {
int docId = hits[i].doc;
Document d = searcher.doc(docId);
System.out.println((i + 1) + ". " + d.get("title"));
}
searcher.close();
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment