Skip to content

Instantly share code, notes, and snippets.

@mika76
Last active January 23, 2020 14:50
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mika76/712d93ca5938b6a165cf32ad41b5e81a to your computer and use it in GitHub Desktop.
Save mika76/712d93ca5938b6a165cf32ad41b5e81a to your computer and use it in GitHub Desktop.
QueryVisitor for rewriting lucene.net query (add wildcards) - check https://stackoverflow.com/a/5748786/11421 - originally from Random Ramblings blog (https://web.archive.org/web/20130207075825/http://devhost.se/blog/post/2011/04/21/A-QueryVisitor-for-Lucene.aspx)
var queryParser = new QueryParser("f", new StandardAnalyzer());
var query = queryParser.Parse("awesome rewrite^0.5 \"including one phrase\"");
var rewritten = new PrefixRewriter().Visit(query);
Console.WriteLine(query);
Console.WriteLine(rewritten);
// Outputs...
// f:awesome f:rewrite^0.5 f:"including one phrase"
// f:awesome* f:rewrite*^0.5 f:"including one phrase"
using Lucene.Net.Index;
using Lucene.Net.Search;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Reflection;
namespace Lucene.Helpers
{
// https://stackoverflow.com/a/5748786/11421
// https://web.archive.org/web/20130207075825/http://devhost.se/blog/post/2011/04/21/A-QueryVisitor-for-Lucene.aspx
public class QueryVisitor
{
public virtual Query Visit(Query query)
{
if (query is BooleanQuery)
return VisitBooleanQuery((BooleanQuery)query);
if (query is DisjunctionMaxQuery)
return VisitDisjunctionMaxQuery((DisjunctionMaxQuery)query);
if (query is MultiPhraseQuery)
return VisitMultiPhraseQuery((MultiPhraseQuery)query);
if (query is PhraseQuery)
return VisitPhraseQuery((PhraseQuery)query);
if (query is PrefixQuery)
return VisitPrefixQuery((PrefixQuery)query);
if (query is TermQuery)
return VisitTermQuery((TermQuery)query);
if (query is TermRangeQuery)
return VisitTermRangeQuery((TermRangeQuery)query);
return query;
}
protected virtual Query VisitMultiPhraseQuery(MultiPhraseQuery query)
{
return query;
}
protected virtual Query VisitTermQuery(TermQuery query)
{
var oldTerm = query.Term;
if (oldTerm == null)
return null;
var newTerm = VisitTerm(oldTerm);
var newQuery = (newTerm != null) ? new TermQuery(newTerm) : null;
return CopyBoost(query, newQuery);
}
protected virtual Query VisitBooleanQuery(BooleanQuery query)
{
var newClauses = query.GetClauses()
.Select(clause => VisitBooleanClause(clause))
.Where(clause => clause != null)
.ToList();
if (newClauses.Count == 0)
return null;
var newQuery = new BooleanQuery();
newClauses.ForEach(newQuery.Add);
return CopyBoost(query, newQuery);
}
protected virtual Query VisitPhraseQuery(PhraseQuery query)
{
var newTerms = query.GetTerms()
.Select(t => VisitTerm(t))
.Where(t => t != null)
.ToList();
if (newTerms.Count == 0)
return null;
var newQuery = new PhraseQuery();
newTerms.ForEach(newQuery.Add);
return CopyBoost(query, newQuery);
}
protected virtual Query VisitPrefixQuery(PrefixQuery query)
{
var oldTerm = query.Prefix;
if (oldTerm == null)
return null;
var newTerm = VisitTerm(oldTerm);
if (newTerm == null)
return null;
return CopyBoost(query, new PrefixQuery(newTerm));
}
protected virtual Query VisitDisjunctionMaxQuery(DisjunctionMaxQuery query)
{
var queries = new List<Query>();
var iterator = query.GetEnumerator();
while (iterator.MoveNext())
{
var subQuery = (Query)iterator.Current;
if (subQuery != null)
subQuery = Visit(subQuery);
if (subQuery != null)
queries.Add(subQuery);
}
if (queries.Count == 0)
return null;
var tieBreakerMultiplierField = typeof(DisjunctionMaxQuery).GetField("tieBreakerMultiplier", BindingFlags.Instance | BindingFlags.NonPublic);
var tieBreakerMultiplier = (Single)tieBreakerMultiplierField.GetValue(query);
return new DisjunctionMaxQuery(queries, tieBreakerMultiplier);
}
protected virtual TermRangeQuery VisitTermRangeQuery(TermRangeQuery query)
{
var oldField = query.Field;
var newField = VisitField(oldField);
if (newField == null)
return null;
var oldLowerTerm = query.LowerTerm;
var newLowerTerm = (oldLowerTerm != null) ? VisitTermText(oldLowerTerm) : null;
var newIncludesLower = query.IncludesLower || (newLowerTerm == null);
var oldUpperTerm = query.UpperTerm;
var newUpperTerm = (oldUpperTerm != null) ? VisitTermText(oldUpperTerm) : null;
var newIncludesUpper = query.IncludesUpper || (newUpperTerm == null);
return new TermRangeQuery(newField, newLowerTerm, newUpperTerm, newIncludesLower, newIncludesUpper);
}
protected virtual BooleanClause VisitBooleanClause(BooleanClause clause)
{
var oldQuery = clause.Query;
if (oldQuery == null)
return null;
var oldOccur = clause.Occur;
//if (oldOccur == null)
// return null;
var newQuery = Visit(oldQuery);
if (newQuery == null)
return null;
var newOccur = VisitOccur(oldOccur);
return new BooleanClause(newQuery, newOccur);
}
protected virtual Occur VisitOccur(Occur occur)
{
return occur;
}
protected virtual Term VisitTerm(Term term)
{
var oldField = term.Field;
var newField = VisitField(oldField);
if (newField == null)
return null;
var oldTermText = term.Text;
var newTermText = VisitTermText(oldTermText);
if (newTermText == null)
return null;
return new Term(newField, newTermText);
}
protected virtual String VisitField(String field)
{
return field;
}
protected virtual String VisitTermText(String termText)
{
return termText;
}
protected virtual T CopyBoost<T>(Query source, T target) where T : Query
{
if (target != null)
{
var boost = source.Boost;
target.Boost = boost;
}
return target;
}
}
public class PrefixRewriter : QueryVisitor
{
protected override Query VisitTermQuery(TermQuery query)
{
var term = query.Term;
var newQuery = new PrefixQuery(term);
return CopyBoost(query, newQuery);
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment