Skip to content

Instantly share code, notes, and snippets.

@gevorghari
Created November 12, 2013 20:26
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save gevorghari/7438048 to your computer and use it in GitHub Desktop.
Save gevorghari/7438048 to your computer and use it in GitHub Desktop.
Simple function that creates a Lucene index out of 4 documents and queries it in 4 different ways (regular query, prefixFiltered query, .queryWrapperFiltered query, and boolean query with a prefixQuery added to it). The DefaultSimilarity class has been extended to print out the tf*idf values: term frequency, total number of documents, and docume…
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.*;
import org.apache.lucene.index.*;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.*;
import org.apache.lucene.search.similarities.DefaultSimilarity;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.Version;
import java.io.*;
public class ReducingSearchSpaceWithFilters {
public static void main(String[] args) throws IOException, ParseException {
RAMDirectory ramDirectory = new RAMDirectory();
Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_45);
IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_45, analyzer);
IndexWriter writer = new IndexWriter(ramDirectory, iwc) {{
addDocument(createDoc("This is a black dog", "type.colors")); //doc=0
addDocument(createDoc("This is a black cat", "type.pets")); //doc=1
addDocument(createDoc("The cat is white", "type.colors")); //doc=2
addDocument(createDoc("The cat is black", "type.pets")); //doc=3
}};
writer.close();
Query blackQuery = new QueryParser(Version.LUCENE_45, "title", analyzer).parse("black");
IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(ramDirectory));
searcher.setSimilarity(new SimpleDefaultSimilarity(true));
BooleanQuery booleanQuery = new BooleanQuery();
booleanQuery.add(new PrefixQuery(new Term("type", "type.colors")), BooleanClause.Occur.MUST);
booleanQuery.add(blackQuery, BooleanClause.Occur.SHOULD);
TopDocs results = searcher.search(blackQuery, 5);
// TopDocs results = searcher.search(blackQuery, new PrefixFilter(new Term("type", "type.colors")), 5);
// TopDocs results = searcher.search(blackQuery, new QueryWrapperFilter(new PrefixQuery(new Term("type", "type.colors"))), 5);
// TopDocs results = searcher.search(booleanQuery, 5);
ScoreDoc[] hits = results.scoreDocs;
for(int i = 0; i < hits.length; i++) {
Document doc = searcher.doc(hits[i].doc);
System.out.println("doc=" + hits[i].doc + " score=" + hits[i].score + " type=" + doc.get("type") + " title=" + doc.get("title"));
}
}
private static Document createDoc(String text, String type) throws IOException {
Document doc = new Document();
doc.add(new TextField("title", text, Field.Store.YES));
doc.add(new StringField("type", type, Field.Store.YES));
return doc;
}
}
class SimpleDefaultSimilarity extends DefaultSimilarity {
private boolean debug;
public SimpleDefaultSimilarity(boolean debug) {
this.debug = debug;
}
@Override
public float tf(float freq) {
if(debug)
System.out.println("freq=" + freq);
return super.tf(freq);
}
@Override
public float idf(long docFreq, long numDocs) {
if(debug)
System.out.println("docFreq=" + docFreq + " - numDocs=" + numDocs);
return super.idf(docFreq, numDocs);
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment