Created
August 27, 2018 05:37
-
-
Save vishramachandran/4a8443fb67ad74b2f597a58ec0913411 to your computer and use it in GitHub Desktop.
Lucene Issue with Index Sorting
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import java.io.IOException; | |
import java.nio.file.Files; | |
import java.nio.file.Path; | |
import org.apache.lucene.analysis.Analyzer; | |
import org.apache.lucene.analysis.standard.StandardAnalyzer; | |
import org.apache.lucene.document.Document; | |
import org.apache.lucene.document.Field.Store; | |
import org.apache.lucene.document.IntPoint; | |
import org.apache.lucene.document.LongPoint; | |
import org.apache.lucene.document.NumericDocValuesField; | |
import org.apache.lucene.document.StringField; | |
import org.apache.lucene.index.IndexWriter; | |
import org.apache.lucene.index.IndexWriterConfig; | |
import org.apache.lucene.index.LeafReaderContext; | |
import org.apache.lucene.index.NumericDocValues; | |
import org.apache.lucene.index.Term; | |
import org.apache.lucene.search.MatchAllDocsQuery; | |
import org.apache.lucene.search.Query; | |
import org.apache.lucene.search.SearcherManager; | |
import org.apache.lucene.search.SimpleCollector; | |
import org.apache.lucene.search.Sort; | |
import org.apache.lucene.search.SortField; | |
import org.apache.lucene.search.TermQuery; | |
import org.apache.lucene.store.MMapDirectory; | |
import org.apache.lucene.util.Bits; | |
public class LuceneSnag { | |
public static final String ID = "id"; | |
public static final String X_VAL = "xValue"; | |
public static final String Y_VAL = "yValue"; | |
private Path indexDiskLocation; | |
private MMapDirectory mMapDirectory; | |
private Analyzer analyzer = new StandardAnalyzer(); | |
private IndexWriterConfig config; | |
private Sort sort = new Sort(new SortField(Y_VAL, SortField.Type.LONG)); | |
private IndexWriter indexWriter; | |
private SearcherManager searcherManager; | |
public LuceneSnag() throws IOException { | |
indexDiskLocation = Files.createTempDirectory("tmp"); | |
mMapDirectory = new MMapDirectory(indexDiskLocation); | |
config = new IndexWriterConfig(analyzer); | |
config.setIndexSort(sort); | |
indexWriter = new IndexWriter(mMapDirectory, config); | |
searcherManager = new SearcherManager(indexWriter, null); | |
} | |
public static void main(String[] args) throws IOException { | |
LuceneSnag l = new LuceneSnag(); | |
l.reproduceBug(); | |
} | |
public void reproduceBug() throws IOException { | |
upsertDoc(0, 10, Long.MAX_VALUE); | |
upsertDoc(0, 10, 20); | |
upsertDoc(1, 10, Long.MAX_VALUE); | |
upsertDoc(2, 10, Long.MAX_VALUE); | |
upsertDoc(2, 10, 40); | |
upsertDoc(3, 10, Long.MAX_VALUE); | |
commitBlocking(); | |
System.out.println("numDocs: " + indexWriter.numDocs()); | |
System.out.println("maxDocs: " + indexWriter.maxDoc()); | |
printDocs(LongPoint.newExactQuery(Y_VAL, Long.MAX_VALUE), 2); | |
printDocs(new MatchAllDocsQuery(), 4); | |
} | |
public int printDocs(Query q, int expectedCount) throws IOException { | |
MyCollector coll = new MyCollector(); | |
searcherManager.acquire().search(q, coll); | |
System.out.println("Num Hits: " + coll.counter); | |
if (expectedCount != coll.counter) { | |
throw new AssertionError("Expecting " + expectedCount + " docs but got " + coll.counter); | |
} | |
return coll.counter; | |
} | |
public void commitBlocking() throws IOException { | |
searcherManager.maybeRefreshBlocking(); | |
} | |
public void upsertDoc(int id, | |
long xVal, | |
long yVal) throws IOException { | |
// searcherManager.maybeRefreshBlocking(); | |
indexWriter.deleteDocuments(new TermQuery(new Term(ID, String.valueOf(id)))); | |
// indexWriter.deleteDocuments(IntPoint.newExactQuery(ID, id)); | |
// indexWriter.deleteDocuments(new DocValuesNumbersQuery(ID, (long)id)); | |
Document document = makeDocument(id, xVal, yVal); | |
indexWriter.addDocument(document); | |
// indexWriter.updateDocument(new Term(ID, String.valueOf(id)), document); | |
} | |
private Document makeDocument(int id, long xVal, long yVal) { | |
Document document = new Document(); | |
// id | |
document.add(new StringField(ID, String.valueOf(id), Store.NO)); | |
document.add(new IntPoint(ID, id)); | |
document.add(new NumericDocValuesField(ID, id)); | |
// xVal | |
document.add(new LongPoint(X_VAL, xVal)); | |
document.add(new NumericDocValuesField(X_VAL, xVal)); | |
// yVal | |
document.add(new LongPoint(Y_VAL, yVal)); | |
document.add(new NumericDocValuesField(Y_VAL, yVal)); | |
return document; | |
} | |
private static class MyCollector extends SimpleCollector { | |
private NumericDocValues idDv; | |
private NumericDocValues yValDv; | |
private Bits liveNodes; | |
public int counter = 0; | |
@Override | |
public boolean needsScores() { | |
return false; | |
} | |
@Override | |
public void doSetNextReader(LeafReaderContext context) throws IOException { | |
idDv = context.reader().getNumericDocValues(ID); | |
yValDv = context.reader().getNumericDocValues(Y_VAL); | |
liveNodes = context.reader().getLiveDocs(); | |
} | |
@Override | |
public void collect(int doc) throws IOException { | |
System.out.println("\t" + doc + ". Live: " + (liveNodes == null?"true":liveNodes.get(doc))); | |
if (idDv.advanceExact(doc)) { | |
int id = (int) idDv.longValue(); | |
System.out.println("\t\tID: " + id); | |
counter += 1; | |
} else { | |
throw new IllegalStateException("This shouldn't happen since every document should have a idDv"); | |
} | |
if (yValDv.advanceExact(doc)) { | |
long yVal = yValDv.longValue(); | |
System.out.println("\t\tyVal: " + yVal); | |
} else { | |
throw new IllegalStateException("This shouldn't happen since every document should have a yValDv"); | |
} | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment