Skip to content

Instantly share code, notes, and snippets.

@SebastianStehle
Created November 9, 2021 19:44
Show Gist options
  • Save SebastianStehle/f8f3d27d99006903235f0c46eb2c349d to your computer and use it in GitHub Desktop.
Save SebastianStehle/f8f3d27d99006903235f0c46eb2c349d to your computer and use it in GitHub Desktop.
Parses Lucene Query Syntax and converts it to Mongo Atlas format
// See https://aka.ms/new-console-template for more information
using Lucene.Net.Analysis.Standard;
using Lucene.Net.Index;
using Lucene.Net.QueryParsers.Classic;
using Lucene.Net.Search;
using Lucene.Net.Util;
using MongoDB.Bson;
using MongoDB.Bson.IO;
using System.Globalization;
using System.Text;
var parser = new QueryParser(LuceneVersion.LUCENE_48, "*", new StandardAnalyzer(LuceneVersion.LUCENE_48));
var examples = new[]
{
"hello",
"\"hello dolly\"",
"title:\"The Right Way\" AND text:go",
"title:\"Do it right\" AND right",
"te?t",
"test*",
"roam~",
"roam~2",
"\"jakarta apache\"~10",
"\"jakarta apache\"^4 \"Apache Lucene\"",
"(jakarta OR apache) AND website",
"title:(+return +\"pink panther\")",
"mod_date:[20020101 TO 20030101]"
};
var visitor = new Visitor();
foreach (var example in examples)
{
Console.WriteLine(example);
var query = parser.Parse(example);
Console.WriteLine(query.ToString());;
Console.WriteLine(visitor.Visit(query).ToJson(new JsonWriterSettings { Indent = true }));
Console.WriteLine();
}
class Visitor
{
private readonly Func<string, string>? fieldConverter;
public Visitor(Func<string, string>? fieldConverter = null)
{
this.fieldConverter = fieldConverter;
}
public BsonDocument Visit(Query query)
{
switch (query)
{
case BooleanQuery booleanQuery:
return VisitBoolean(booleanQuery);
case TermQuery termQuery:
return VisitTerm(termQuery);
case PhraseQuery phraseQuery:
return VisitPhrase(phraseQuery);
case WildcardQuery wildcardQuery:
return VisitWilcard(wildcardQuery);
case PrefixQuery prefixQuery:
return VisitPrefix(prefixQuery);
case FuzzyQuery fuzzyQuery:
return VisitFuzzy(fuzzyQuery);
case NumericRangeQuery<float> rangeQuery:
return VisitNumericRange(rangeQuery);
case NumericRangeQuery<double> rangeQuery:
return VisitNumericRange(rangeQuery);
case NumericRangeQuery<int> rangeQuery:
return VisitNumericRange(rangeQuery);
case NumericRangeQuery<long> rangeQuery:
return VisitNumericRange(rangeQuery);
case TermRangeQuery termRangeQuery:
return VisitTermRange(termRangeQuery);
default:
throw new NotSupportedException();
}
}
private BsonDocument VisitTermRange(TermRangeQuery termRangeQuery)
{
if (!TryParseValue(termRangeQuery.LowerTerm, out var min) ||
!TryParseValue(termRangeQuery.UpperTerm, out var max))
{
throw new NotSupportedException();
}
var minField = termRangeQuery.IncludesLower ? "gte" : "gt";
var maxField = termRangeQuery.IncludesUpper ? "lte" : "lt";
var doc = new BsonDocument
{
["path"] = GetPath(termRangeQuery.Field),
[minField] = BsonValue.Create(min),
[maxField] = BsonValue.Create(max)
};
return new BsonDocument
{
["range"] = doc
};
}
private BsonDocument VisitNumericRange<T>(NumericRangeQuery<T> rangeQuery) where T : struct, IComparable<T>
{
var minField = rangeQuery.IncludesMin ? "gte" : "gt";
var maxField = rangeQuery.IncludesMin ? "lte" : "lt";
var doc = new BsonDocument
{
["path"] = GetPath(rangeQuery.Field),
[minField] = BsonValue.Create(rangeQuery.Min),
[maxField] = BsonValue.Create(rangeQuery.Max)
};
return new BsonDocument
{
["range"] = doc
};
}
private BsonDocument VisitFuzzy(FuzzyQuery fuzzyQuery)
{
var doc = CreateDefaultDoc(fuzzyQuery, fuzzyQuery.Term);
if (fuzzyQuery.MaxEdits > 0)
{
var fuzzy = new BsonDocument
{
["maxEdits"] = fuzzyQuery.MaxEdits,
};
if (fuzzyQuery.PrefixLength > 0)
{
fuzzy["prefixLength"] = fuzzyQuery.PrefixLength;
}
doc["fuzzy"] = fuzzy;
}
return new BsonDocument
{
["text"] = doc
};
}
private BsonDocument VisitPrefix(PrefixQuery prefixQuery)
{
var doc = CreateDefaultDoc(prefixQuery, prefixQuery.Prefix);
return new BsonDocument
{
["wildcard"] = doc
};
}
private BsonDocument VisitWilcard(WildcardQuery wildcardQuery)
{
var doc = CreateDefaultDoc(wildcardQuery, wildcardQuery.Term);
return new BsonDocument
{
["wildcard"] = doc
};
}
private static BsonDocument VisitPhrase(PhraseQuery phraseQuery)
{
var terms = phraseQuery.GetTerms();
var doc = new BsonDocument
{
["path"] = terms[0].Field,
};
if (terms.Length == 1)
{
doc["query"] = terms[0].Text;
}
else
{
doc["query"] = new BsonArray(terms.Select(x => x.Text));
}
if (phraseQuery.Slop != 0)
{
doc["slop"] = phraseQuery.Slop;
}
ApplyBoost(phraseQuery, doc);
return new BsonDocument
{
["phrase"] = doc
};
}
private BsonDocument VisitTerm(TermQuery termQuery)
{
var doc = CreateDefaultDoc(termQuery, termQuery.Term);
return new BsonDocument
{
["text"] = doc
};
}
private BsonDocument VisitBoolean(BooleanQuery query)
{
var doc = new BsonDocument();
BsonArray? musts = null;
BsonArray? mustNots = null;
BsonArray? shoulds = null;
foreach (var clause in query.Clauses)
{
var converted = Visit(clause.Query);
switch (clause.Occur)
{
case Occur.MUST:
musts ??= new BsonArray();
musts.Add(converted);
break;
case Occur.SHOULD:
shoulds ??= new BsonArray();
shoulds.Add(converted);
break;
case Occur.MUST_NOT:
mustNots ??= new BsonArray();
mustNots.Add(converted);
break;
}
}
if (musts != null)
{
doc.Add("must", musts);
}
if (mustNots != null)
{
doc.Add("mustNot", mustNots);
}
if (shoulds != null)
{
doc.Add("should", shoulds);
}
return new BsonDocument
{
["compund"] = doc
};
}
private BsonDocument CreateDefaultDoc(Query query, Term term)
{
var doc = new BsonDocument
{
["query"] = term.Text
};
ApplyBoost(query, doc);
doc["path"] = GetPath(term.Field);
return doc;
}
private string GetPath(string field)
{
if (field != "*" && fieldConverter != null)
{
field = fieldConverter(field);
}
return field;
}
private static void ApplyBoost(Query query, BsonDocument doc)
{
if (query.Boost != 1)
{
doc["boost"] = new BsonDocument
{
["boost"] = query.Boost
};
}
}
private static bool TryParseValue(BytesRef bytes, out object result)
{
result = null!;
try
{
var text = Encoding.ASCII.GetString(bytes.Bytes, bytes.Offset, bytes.Length);
if (!double.TryParse(text, NumberStyles.Any, CultureInfo.InvariantCulture, out var number))
{
return false;
}
if (number == Math.Round(number))
{
var integer = (long)number;
if (integer <= int.MaxValue && integer >= int.MinValue)
{
result = (int)integer;
}
else
{
result = integer;
}
}
else
{
result = number;
}
return true;
}
catch (Exception)
{
return false;
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment