Skip to content

Instantly share code, notes, and snippets.

@bdulac
Last active March 1, 2016 08:36
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save bdulac/2ba47e38b383fb885427 to your computer and use it in GitHub Desktop.
Save bdulac/2ba47e38b383fb885427 to your computer and use it in GitHub Desktop.
A Java map implementation using Apache Lucene 5.0
import java.io.IOException;
import java.nio.file.FileSystems;
import java.nio.file.Path;
import java.util.AbstractMap;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.RAMDirectory;
/** Implementation of a map (possibly persistent) using <em>Apache Lucene</em> 5.0. */
public class LuceneMap implements Map<String, String> {
private Directory luceneDirectory;
private Analyzer analyzer;
/** Name of the <em>Lucene</em> field used for storing the map keys */
private String keyField;
/** Name of the <em>Lucene</em> field used for storing the map values */
private String valueField;
/**
* Construction of a map using an index stored in the RAM with the default
* setup (the <em>Lucene</em> field names used are {@code key} and  
* {@code value}).
* @throws IOException
*/
public LuceneMap() throws IOException {
this(null);
}
/**
* Construction of a map using an index with the default
* setup (the <em>Lucene</em> field names used are {@code key} and  
* {@code value}).
* @param fPath
* Path of the index in the filesystem, null value to use the RAM.
* @throws IOException
*/
public LuceneMap(String fPath)
throws IOException {
this(fPath, "key", "value");
}
/**
* Construction of a map using an index.
* @param fPath
* Path of the index in the filesystem, null value to use the RAM.
* @param kField
* Name of the <em>Lucene</em> field used for storing the map keys
* @param vField
* Name of the <em>Lucene</em> field used for storing the map values
* @throws IOException
* @throws NullPointerException
* If one of the field names is null
*/
public LuceneMap(String fPath, String kField, String vField)
throws IOException {
if(kField == null)throw new NullPointerException();
if(vField == null)throw new NullPointerException();
if(fPath != null) {
Path path = FileSystems.getDefault().getPath(fPath);
luceneDirectory = FSDirectory.open(path);
}
else luceneDirectory = new RAMDirectory();
analyzer = new StandardAnalyzer();
keyField = kField;
valueField = vField;
// Pour initialisation des segments
IndexWriterConfig config = new IndexWriterConfig(analyzer);
IndexWriter luceneWriter =
new IndexWriter(luceneDirectory, config);
luceneWriter.commit();
luceneWriter.close();
}
@Override
public int size() {
try {
IndexReader luceneReader =
DirectoryReader.open(luceneDirectory);
luceneReader = DirectoryReader.open(luceneDirectory);
int result = luceneReader.numDocs();
luceneReader.close();
return result;
} catch (IOException e) {
throw new IllegalStateException(e);
}
}
private Document getDocumentByKey(String key) {
QueryParser parser = new QueryParser(keyField, analyzer);
Query query;
try {
String str = null;
if(key != null) {
str = QueryParser.escape(key);
}
query = parser.parse(keyField + ":" + str);
IndexReader luceneReader =
DirectoryReader.open(luceneDirectory);
IndexSearcher luceneSearcher = new IndexSearcher(luceneReader);
ScoreDoc[] hits =
luceneSearcher.search(query, null, 1000).scoreDocs;
if(hits.length > 0) {
Document hitDoc = luceneSearcher.doc(hits[0].doc);
// Checking the key
String check = hitDoc.get(keyField);
if((check == null) || (!check.equals(key))) {
hitDoc = null;
}
luceneReader.close();
return hitDoc;
}
else luceneReader.close();
} catch (ParseException e) {
throw new IllegalArgumentException(e);
} catch (IOException e) {
throw new IllegalStateException(e);
}
return null;
}
private Document getDocumentByValue(String value) {
QueryParser parser = new QueryParser(valueField, analyzer);
Query query;
try {
String str = null;
if(value != null) {
str = QueryParser.escape(value);
}
query = parser.parse(valueField + ":" + str);
IndexReader luceneReader =
DirectoryReader.open(luceneDirectory);
IndexSearcher luceneSearcher = new IndexSearcher(luceneReader);
ScoreDoc[] hits =
luceneSearcher.search(query, null, 1000).scoreDocs;
if(hits.length > 0) {
Document hitDoc = luceneSearcher.doc(hits[0].doc);
luceneReader.close();
return hitDoc;
}
else luceneReader.close();
} catch (ParseException e) {
throw new IllegalArgumentException(e);
} catch (IOException e) {
throw new IllegalStateException(e);
}
return null;
}
@Override
public String get(Object key) {
if(key == null)return null;
Document doc = getDocumentByKey(key.toString());
if(doc != null)return doc.get(valueField);
else return null;
}
@Override
public String put(String key, String value) {
Document previous = getDocumentByKey(key);
Document doc = new Document();
doc.add(new Field(keyField, key, TextField.TYPE_STORED));
doc.add(new Field(valueField, value, TextField.TYPE_STORED));
String previousValue = null;
try {
IndexWriterConfig config = new IndexWriterConfig(analyzer);
IndexWriter luceneWriter =
new IndexWriter(luceneDirectory, config);
// Pour initialisation des segments
if(previous != null) {
previousValue = previous.get(valueField);
luceneWriter.updateDocument(new Term(keyField, key), doc);
}
else {
luceneWriter.addDocument(doc);
}
luceneWriter.commit();
luceneWriter.close();
} catch (IOException e) {
throw new IllegalStateException(e);
}
return previousValue;
}
@Override
public String remove(Object key) {
if(key == null)return null;
Document previous = getDocumentByKey(key.toString());
String previousValue = null;
try {
IndexWriterConfig config = new IndexWriterConfig(analyzer);
IndexWriter luceneWriter =
new IndexWriter(luceneDirectory, config);
if(previous != null) {
previousValue = previous.get(valueField);
luceneWriter.deleteDocuments(
new Term(keyField, key.toString())
);
}
luceneWriter.commit();
luceneWriter.close();
} catch (IOException e) {
throw new IllegalStateException(e);
}
return previousValue;
}
@Override
public boolean isEmpty() {
return size() == 0;
}
@Override
public boolean containsKey(Object key) {
return get(key) != null;
}
@Override
public void putAll(Map<? extends String, ? extends String> m) {
for(String key : m.keySet()) {
String value = m.get(key);
put(key, value);
}
}
@Override
public void clear() {
try {
IndexWriterConfig config = new IndexWriterConfig(analyzer);
IndexWriter luceneWriter =
new IndexWriter(luceneDirectory, config);
luceneWriter.deleteAll();
luceneWriter.commit();
luceneWriter.close();
} catch (IOException e) {
throw new IllegalStateException(e);
}
}
@Override
public Set<String> keySet() {
try {
IndexReader luceneReader = DirectoryReader.open(luceneDirectory);
IndexSearcher luceneSearcher = new IndexSearcher(luceneReader);
int totalRecords = luceneReader.numDocs();
int skipRecords = 0;
int takeRecords = 1000;
Set<String> result = new HashSet<String>();
QueryParser parser = new QueryParser(keyField, analyzer);
Query query = parser.parse("*:*");
while (skipRecords < totalRecords) {
TopDocs results = luceneSearcher.search(query, null,
skipRecords + takeRecords);
ScoreDoc[] scoreDocs = results.scoreDocs;
for (int i = skipRecords; i < results.totalHits; i++) {
if (i > (skipRecords + takeRecords) - 1) {
break;
}
Document doc = luceneSearcher.doc(scoreDocs[i].doc);
String key = doc.get(keyField);
result.add(key);
}
skipRecords += takeRecords;
}
luceneReader.close();
return result;
} catch (ParseException e) {
throw new IllegalArgumentException(e);
} catch (IOException e) {
throw new IllegalStateException(e);
}
}
@Override
public Collection<String> values() {
try {
IndexReader luceneReader = DirectoryReader.open(luceneDirectory);
IndexSearcher luceneSearcher = new IndexSearcher(luceneReader);
int totalRecords = luceneReader.numDocs();
int skipRecords = 0;
int takeRecords = 1000;
Collection<String> result = new ArrayList<String>();
QueryParser parser = new QueryParser(valueField, analyzer);
Query query = parser.parse("*:*");
while (skipRecords < totalRecords) {
TopDocs results = luceneSearcher.search(query, null,
skipRecords + takeRecords);
ScoreDoc[] scoreDocs = results.scoreDocs;
for (int i = skipRecords; i < results.totalHits; i++) {
if (i > (skipRecords + takeRecords) - 1) {
break;
}
Document doc = luceneSearcher.doc(scoreDocs[i].doc);
String key = doc.get(valueField);
result.add(key);
}
skipRecords += takeRecords;
}
luceneReader.close();
return result;
} catch (ParseException e) {
throw new IllegalArgumentException(e);
} catch (IOException e) {
throw new IllegalStateException(e);
}
}
@Override
public Set<java.util.Map.Entry<String, String>> entrySet() {
try {
IndexReader luceneReader = DirectoryReader.open(luceneDirectory);
IndexSearcher luceneSearcher = new IndexSearcher(luceneReader);
int totalRecords = luceneReader.numDocs();
int skipRecords = 0;
int takeRecords = 1000;
Set<java.util.Map.Entry<String, String>> result =
new HashSet<java.util.Map.Entry<String, String>>();
QueryParser parser = new QueryParser(keyField, analyzer);
Query query = parser.parse("*:*");
while (skipRecords < totalRecords) {
TopDocs results = luceneSearcher.search(query, null,
skipRecords + takeRecords);
ScoreDoc[] scoreDocs = results.scoreDocs;
for (int i = skipRecords; i < results.totalHits; i++) {
if (i > (skipRecords + takeRecords) - 1) {
break;
}
Document doc = luceneSearcher.doc(scoreDocs[i].doc);
String key = doc.get(keyField);
String value = doc.get(valueField);
result.add(
new AbstractMap.SimpleEntry<String, String>(key, value)
);
}
skipRecords += takeRecords;
}
luceneReader.close();
return result;
} catch (ParseException e) {
throw new IllegalArgumentException(e);
} catch (IOException e) {
throw new IllegalStateException(e);
}
}
@Override
public boolean containsValue(Object value) {
if(value != null) {
return (getDocumentByValue(value.toString()) != null);
}
else return (getDocumentByValue(null) != null);
}
}
@bdulac
Copy link
Author

bdulac commented Apr 14, 2015

An idea: digging into DHT, but first...

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment