maxpert/DumpIndexWriter.java

## DumpIndexWriter.java
package mxp;

import java.io.IOException;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.NumericField;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.util.Version;


public class DumpIndexWriter {
	private Directory dir;
	private IndexWriter writer;

	public DumpIndexWriter(Directory d) {
		dir = d;
	}

	public void open() throws CorruptIndexException, LockObtainFailedException, IOException {
		Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_36);
		IndexWriterConfig writerConfig = new IndexWriterConfig(
				Version.LUCENE_36, analyzer);
		writer = new IndexWriter(dir, writerConfig);
	}

	public boolean parseAndIndex(String line){
		//3281929|MacGyver.S01E02.DVDRip.XviD-MEDiEVAL|376764032|1|0|6e43d748d9446e3cddec69ce9b2ababb51bbf827
		String[] fields = line.split("\\|");
		if( fields.length < 6 ) return false;
		Document doc = new Document();

		System.out.printf("%s -> %s \n", fields[1], fields[5]);
		doc.add(new Field("title", fields[1], Field.Store.YES, Field.Index.ANALYZED));
		doc.add(new Field("link", fields[5], Field.Store.YES, Field.Index.NOT_ANALYZED));

		try {
			writer.addDocument(doc);
		} catch (Exception e) {
			e.printStackTrace();
			return false;
		}

		return true;
	}

	public void close() throws CorruptIndexException, IOException {
		writer.close();
	}
}

## Main.java
package mxp;

import java.io.BufferedReader;
import java.io.DataInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.List;
import java.util.zip.CRC32;

import mxp.lucene.store.RedisDirectory;
import mxp.lucene.store.RedisFile;

import org.apache.commons.pool.impl.GenericObjectPool;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopScoreDocCollector;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.Version;

import redis.clients.jedis.JedisShardInfo;
import redis.clients.jedis.ShardedJedisPool;

public class Main {

	/**
	 * @param args
	 * @throws IOException
	 * @throws CorruptIndexException
	 * @throws ParseException
	 * @throws InterruptedException
	 */
	public static void main(String[] args) throws CorruptIndexException,
			IOException, ParseException, InterruptedException {
		List<JedisShardInfo> shards = new ArrayList<JedisShardInfo>();
		JedisShardInfo si = new JedisShardInfo("localhost", 6379);
		JedisShardInfo si2 = new JedisShardInfo("localhost", 6389);
		JedisShardInfo si3 = new JedisShardInfo("localhost", 6399);
		shards.add(si);
		shards.add(si2);
		shards.add(si3);

		ShardedJedisPool pool = new ShardedJedisPool(new GenericObjectPool.Config(), shards);
		RedisDirectory redisDir = new RedisDirectory("piratebay", pool);
		FSDirectory fsDir = FSDirectory.open(new File("/Users/maxpert/labs/pbindex"));

		long start = System.currentTimeMillis();
		System.out.println("Indexing in Redis...");
		DumpPirateBay(redisDir);
		System.out.printf("Redis indexing took %d...", System.currentTimeMillis() - start);
		Thread.sleep(5000);

		//Test searching ;)
		search( redisDir, "title", "batman" );

		pool.destroy();
	}

	private static void DumpPirateBay(Directory redisDir)
		throws CorruptIndexException, LockObtainFailedException, IOException
	{
		DumpIndexWriter writer = new DumpIndexWriter(redisDir);

		FileInputStream fstream = new FileInputStream("/Users/maxpert/labs/complete");
		DataInputStream in = new DataInputStream(fstream);
		BufferedReader br = new BufferedReader(new InputStreamReader(in));
		String strLine;
		writer.open();
		while ((strLine = br.readLine()) != null) {
			writer.parseAndIndex(strLine);
		}
		writer.close();

		in.close();
	}


	private static void search(Directory index, String field, String query)
		throws ParseException, CorruptIndexException, IOException
	{
		int hitsPerPage = 10;
		Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_36);
		QueryParser qp = new QueryParser(Version.LUCENE_36, field, analyzer);
		Query q = qp.parse(query);
		IndexReader reader = IndexReader.open(index);
		IndexSearcher searcher = new IndexSearcher(reader);
		TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage, true);
		searcher.search(q, collector);
		ScoreDoc[] hits = collector.topDocs().scoreDocs;

		System.out.println("Found " + hits.length + " hits.");
		for(int i=0;i<hits.length;++i) {
		    int docId = hits[i].doc;
		    Document d = searcher.doc(docId);
		    System.out.println((i + 1) + ". " + d.get("title"));
		}
		searcher.close();
	}

}
	package mxp;

	import java.io.IOException;

	import org.apache.lucene.analysis.Analyzer;
	import org.apache.lucene.analysis.standard.StandardAnalyzer;
	import org.apache.lucene.document.Document;
	import org.apache.lucene.document.Field;
	import org.apache.lucene.document.NumericField;
	import org.apache.lucene.index.CorruptIndexException;
	import org.apache.lucene.index.IndexWriter;
	import org.apache.lucene.index.IndexWriterConfig;
	import org.apache.lucene.store.Directory;
	import org.apache.lucene.store.LockObtainFailedException;
	import org.apache.lucene.util.Version;


	public class DumpIndexWriter {
	private Directory dir;
	private IndexWriter writer;

	public DumpIndexWriter(Directory d) {
	dir = d;
	}

	public void open() throws CorruptIndexException, LockObtainFailedException, IOException {
	Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_36);
	IndexWriterConfig writerConfig = new IndexWriterConfig(
	Version.LUCENE_36, analyzer);
	writer = new IndexWriter(dir, writerConfig);
	}

	public boolean parseAndIndex(String line){
	//3281929\|MacGyver.S01E02.DVDRip.XviD-MEDiEVAL\|376764032\|1\|0\|6e43d748d9446e3cddec69ce9b2ababb51bbf827
	String[] fields = line.split("\\\|");
	if( fields.length < 6 ) return false;
	Document doc = new Document();

	System.out.printf("%s -> %s \n", fields[1], fields[5]);
	doc.add(new Field("title", fields[1], Field.Store.YES, Field.Index.ANALYZED));
	doc.add(new Field("link", fields[5], Field.Store.YES, Field.Index.NOT_ANALYZED));

	try {
	writer.addDocument(doc);
	} catch (Exception e) {
	e.printStackTrace();
	return false;
	}

	return true;
	}

	public void close() throws CorruptIndexException, IOException {
	writer.close();
	}
	}
	package mxp;

	import java.io.BufferedReader;
	import java.io.DataInputStream;
	import java.io.File;
	import java.io.FileInputStream;
	import java.io.FileNotFoundException;
	import java.io.IOException;
	import java.io.InputStreamReader;
	import java.util.ArrayList;
	import java.util.List;
	import java.util.zip.CRC32;

	import mxp.lucene.store.RedisDirectory;
	import mxp.lucene.store.RedisFile;

	import org.apache.commons.pool.impl.GenericObjectPool;
	import org.apache.lucene.analysis.Analyzer;
	import org.apache.lucene.analysis.standard.StandardAnalyzer;
	import org.apache.lucene.document.Document;
	import org.apache.lucene.document.Field;
	import org.apache.lucene.index.CorruptIndexException;
	import org.apache.lucene.index.IndexReader;
	import org.apache.lucene.index.IndexWriter;
	import org.apache.lucene.index.IndexWriterConfig;
	import org.apache.lucene.queryParser.ParseException;
	import org.apache.lucene.queryParser.QueryParser;
	import org.apache.lucene.search.IndexSearcher;
	import org.apache.lucene.search.Query;
	import org.apache.lucene.search.ScoreDoc;
	import org.apache.lucene.search.TopScoreDocCollector;
	import org.apache.lucene.store.Directory;
	import org.apache.lucene.store.FSDirectory;
	import org.apache.lucene.store.LockObtainFailedException;
	import org.apache.lucene.store.RAMDirectory;
	import org.apache.lucene.util.Version;

	import redis.clients.jedis.JedisShardInfo;
	import redis.clients.jedis.ShardedJedisPool;

	public class Main {

	/**
	* @param args
	* @throws IOException
	* @throws CorruptIndexException
	* @throws ParseException
	* @throws InterruptedException
	*/
	public static void main(String[] args) throws CorruptIndexException,
	IOException, ParseException, InterruptedException {
	List<JedisShardInfo> shards = new ArrayList<JedisShardInfo>();
	JedisShardInfo si = new JedisShardInfo("localhost", 6379);
	JedisShardInfo si2 = new JedisShardInfo("localhost", 6389);
	JedisShardInfo si3 = new JedisShardInfo("localhost", 6399);
	shards.add(si);
	shards.add(si2);
	shards.add(si3);

	ShardedJedisPool pool = new ShardedJedisPool(new GenericObjectPool.Config(), shards);
	RedisDirectory redisDir = new RedisDirectory("piratebay", pool);
	FSDirectory fsDir = FSDirectory.open(new File("/Users/maxpert/labs/pbindex"));

	long start = System.currentTimeMillis();
	System.out.println("Indexing in Redis...");
	DumpPirateBay(redisDir);
	System.out.printf("Redis indexing took %d...", System.currentTimeMillis() - start);
	Thread.sleep(5000);

	//Test searching ;)
	search( redisDir, "title", "batman" );

	pool.destroy();
	}

	private static void DumpPirateBay(Directory redisDir)
	throws CorruptIndexException, LockObtainFailedException, IOException
	{
	DumpIndexWriter writer = new DumpIndexWriter(redisDir);

	FileInputStream fstream = new FileInputStream("/Users/maxpert/labs/complete");
	DataInputStream in = new DataInputStream(fstream);
	BufferedReader br = new BufferedReader(new InputStreamReader(in));
	String strLine;
	writer.open();
	while ((strLine = br.readLine()) != null) {
	writer.parseAndIndex(strLine);
	}
	writer.close();

	in.close();
	}


	private static void search(Directory index, String field, String query)
	throws ParseException, CorruptIndexException, IOException
	{
	int hitsPerPage = 10;
	Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_36);
	QueryParser qp = new QueryParser(Version.LUCENE_36, field, analyzer);
	Query q = qp.parse(query);
	IndexReader reader = IndexReader.open(index);
	IndexSearcher searcher = new IndexSearcher(reader);
	TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage, true);
	searcher.search(q, collector);
	ScoreDoc[] hits = collector.topDocs().scoreDocs;

	System.out.println("Found " + hits.length + " hits.");
	for(int i=0;i<hits.length;++i) {
	int docId = hits[i].doc;
	Document d = searcher.doc(docId);
	System.out.println((i + 1) + ". " + d.get("title"));
	}
	searcher.close();
	}

	}