Skip to content

Instantly share code, notes, and snippets.

@vthacker
Created September 12, 2013 10:08
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save vthacker/6535332 to your computer and use it in GitHub Desktop.
Save vthacker/6535332 to your computer and use it in GitHub Desktop.
Perculator
package com.varun.perculator;
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.core.SimpleAnalyzer;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.memory.MemoryIndex;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.util.Version;
public class Percolator {
public static final Version VERSION = Version.LUCENE_43;
public static final String CONTENT = "content";
private List<Query> queries;
private MemoryIndex index;
public Percolator() {
queries = new ArrayList<Query>();
index = new MemoryIndex();
}
public void addQuery(String query) throws ParseException {
Analyzer analyzer = new SimpleAnalyzer(VERSION);
QueryParser parser = new QueryParser(VERSION, CONTENT, analyzer);
queries.add(parser.parse(query));
}
private void addDirectQuery(String string) {
Query query = new TermQuery(new Term(CONTENT, string));
queries.add(query);
}
/*
* TODO maybe use automation/ trie fields if list<queries> is large( like 1 million registered queries)
*/
public synchronized List<Query> getMatchingQueries(String doc) {
index.reset();
index.addField(CONTENT, doc, new SimpleAnalyzer(VERSION));
List<Query> matching = new ArrayList<Query>();
for (Query query : queries) {
if (index.search(query) > 0.0f) {
matching.add(query);
}
}
return matching;
}
public static void main(String[] args) throws ParseException {
long start = System.currentTimeMillis();
Percolator percolator = new Percolator();
percolator.addDirectQuery("one");
percolator.addDirectQuery("two");
percolator.addDirectQuery("three");
String docs[] = {
"one two three",
"two",
"three",
"four"
};
for (String doc : docs) {
System.out.println(doc + " -> " + percolator.getMatchingQueries(doc));
}
long end = System.currentTimeMillis();
System.out.println(end - start);
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment