Skip to content

Instantly share code, notes, and snippets.

@vishramachandran
Created August 27, 2018 05:37
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save vishramachandran/4a8443fb67ad74b2f597a58ec0913411 to your computer and use it in GitHub Desktop.
Save vishramachandran/4a8443fb67ad74b2f597a58ec0913411 to your computer and use it in GitHub Desktop.
Lucene Issue with Index Sorting
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.IntPoint;
import org.apache.lucene.document.LongPoint;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.SearcherManager;
import org.apache.lucene.search.SimpleCollector;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.store.MMapDirectory;
import org.apache.lucene.util.Bits;
public class LuceneSnag {
public static final String ID = "id";
public static final String X_VAL = "xValue";
public static final String Y_VAL = "yValue";
private Path indexDiskLocation;
private MMapDirectory mMapDirectory;
private Analyzer analyzer = new StandardAnalyzer();
private IndexWriterConfig config;
private Sort sort = new Sort(new SortField(Y_VAL, SortField.Type.LONG));
private IndexWriter indexWriter;
private SearcherManager searcherManager;
public LuceneSnag() throws IOException {
indexDiskLocation = Files.createTempDirectory("tmp");
mMapDirectory = new MMapDirectory(indexDiskLocation);
config = new IndexWriterConfig(analyzer);
config.setIndexSort(sort);
indexWriter = new IndexWriter(mMapDirectory, config);
searcherManager = new SearcherManager(indexWriter, null);
}
public static void main(String[] args) throws IOException {
LuceneSnag l = new LuceneSnag();
l.reproduceBug();
}
public void reproduceBug() throws IOException {
upsertDoc(0, 10, Long.MAX_VALUE);
upsertDoc(0, 10, 20);
upsertDoc(1, 10, Long.MAX_VALUE);
upsertDoc(2, 10, Long.MAX_VALUE);
upsertDoc(2, 10, 40);
upsertDoc(3, 10, Long.MAX_VALUE);
commitBlocking();
System.out.println("numDocs: " + indexWriter.numDocs());
System.out.println("maxDocs: " + indexWriter.maxDoc());
printDocs(LongPoint.newExactQuery(Y_VAL, Long.MAX_VALUE), 2);
printDocs(new MatchAllDocsQuery(), 4);
}
public int printDocs(Query q, int expectedCount) throws IOException {
MyCollector coll = new MyCollector();
searcherManager.acquire().search(q, coll);
System.out.println("Num Hits: " + coll.counter);
if (expectedCount != coll.counter) {
throw new AssertionError("Expecting " + expectedCount + " docs but got " + coll.counter);
}
return coll.counter;
}
public void commitBlocking() throws IOException {
searcherManager.maybeRefreshBlocking();
}
public void upsertDoc(int id,
long xVal,
long yVal) throws IOException {
// searcherManager.maybeRefreshBlocking();
indexWriter.deleteDocuments(new TermQuery(new Term(ID, String.valueOf(id))));
// indexWriter.deleteDocuments(IntPoint.newExactQuery(ID, id));
// indexWriter.deleteDocuments(new DocValuesNumbersQuery(ID, (long)id));
Document document = makeDocument(id, xVal, yVal);
indexWriter.addDocument(document);
// indexWriter.updateDocument(new Term(ID, String.valueOf(id)), document);
}
private Document makeDocument(int id, long xVal, long yVal) {
Document document = new Document();
// id
document.add(new StringField(ID, String.valueOf(id), Store.NO));
document.add(new IntPoint(ID, id));
document.add(new NumericDocValuesField(ID, id));
// xVal
document.add(new LongPoint(X_VAL, xVal));
document.add(new NumericDocValuesField(X_VAL, xVal));
// yVal
document.add(new LongPoint(Y_VAL, yVal));
document.add(new NumericDocValuesField(Y_VAL, yVal));
return document;
}
private static class MyCollector extends SimpleCollector {
private NumericDocValues idDv;
private NumericDocValues yValDv;
private Bits liveNodes;
public int counter = 0;
@Override
public boolean needsScores() {
return false;
}
@Override
public void doSetNextReader(LeafReaderContext context) throws IOException {
idDv = context.reader().getNumericDocValues(ID);
yValDv = context.reader().getNumericDocValues(Y_VAL);
liveNodes = context.reader().getLiveDocs();
}
@Override
public void collect(int doc) throws IOException {
System.out.println("\t" + doc + ". Live: " + (liveNodes == null?"true":liveNodes.get(doc)));
if (idDv.advanceExact(doc)) {
int id = (int) idDv.longValue();
System.out.println("\t\tID: " + id);
counter += 1;
} else {
throw new IllegalStateException("This shouldn't happen since every document should have a idDv");
}
if (yValDv.advanceExact(doc)) {
long yVal = yValDv.longValue();
System.out.println("\t\tyVal: " + yVal);
} else {
throw new IllegalStateException("This shouldn't happen since every document should have a yValDv");
}
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment