Created
November 9, 2021 19:44
-
-
Save SebastianStehle/f8f3d27d99006903235f0c46eb2c349d to your computer and use it in GitHub Desktop.
Parses Lucene Query Syntax and converts it to Mongo Atlas format
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// See https://aka.ms/new-console-template for more information | |
using Lucene.Net.Analysis.Standard; | |
using Lucene.Net.Index; | |
using Lucene.Net.QueryParsers.Classic; | |
using Lucene.Net.Search; | |
using Lucene.Net.Util; | |
using MongoDB.Bson; | |
using MongoDB.Bson.IO; | |
using System.Globalization; | |
using System.Text; | |
var parser = new QueryParser(LuceneVersion.LUCENE_48, "*", new StandardAnalyzer(LuceneVersion.LUCENE_48)); | |
var examples = new[] | |
{ | |
"hello", | |
"\"hello dolly\"", | |
"title:\"The Right Way\" AND text:go", | |
"title:\"Do it right\" AND right", | |
"te?t", | |
"test*", | |
"roam~", | |
"roam~2", | |
"\"jakarta apache\"~10", | |
"\"jakarta apache\"^4 \"Apache Lucene\"", | |
"(jakarta OR apache) AND website", | |
"title:(+return +\"pink panther\")", | |
"mod_date:[20020101 TO 20030101]" | |
}; | |
var visitor = new Visitor(); | |
foreach (var example in examples) | |
{ | |
Console.WriteLine(example); | |
var query = parser.Parse(example); | |
Console.WriteLine(query.ToString());; | |
Console.WriteLine(visitor.Visit(query).ToJson(new JsonWriterSettings { Indent = true })); | |
Console.WriteLine(); | |
} | |
class Visitor | |
{ | |
private readonly Func<string, string>? fieldConverter; | |
public Visitor(Func<string, string>? fieldConverter = null) | |
{ | |
this.fieldConverter = fieldConverter; | |
} | |
public BsonDocument Visit(Query query) | |
{ | |
switch (query) | |
{ | |
case BooleanQuery booleanQuery: | |
return VisitBoolean(booleanQuery); | |
case TermQuery termQuery: | |
return VisitTerm(termQuery); | |
case PhraseQuery phraseQuery: | |
return VisitPhrase(phraseQuery); | |
case WildcardQuery wildcardQuery: | |
return VisitWilcard(wildcardQuery); | |
case PrefixQuery prefixQuery: | |
return VisitPrefix(prefixQuery); | |
case FuzzyQuery fuzzyQuery: | |
return VisitFuzzy(fuzzyQuery); | |
case NumericRangeQuery<float> rangeQuery: | |
return VisitNumericRange(rangeQuery); | |
case NumericRangeQuery<double> rangeQuery: | |
return VisitNumericRange(rangeQuery); | |
case NumericRangeQuery<int> rangeQuery: | |
return VisitNumericRange(rangeQuery); | |
case NumericRangeQuery<long> rangeQuery: | |
return VisitNumericRange(rangeQuery); | |
case TermRangeQuery termRangeQuery: | |
return VisitTermRange(termRangeQuery); | |
default: | |
throw new NotSupportedException(); | |
} | |
} | |
private BsonDocument VisitTermRange(TermRangeQuery termRangeQuery) | |
{ | |
if (!TryParseValue(termRangeQuery.LowerTerm, out var min) || | |
!TryParseValue(termRangeQuery.UpperTerm, out var max)) | |
{ | |
throw new NotSupportedException(); | |
} | |
var minField = termRangeQuery.IncludesLower ? "gte" : "gt"; | |
var maxField = termRangeQuery.IncludesUpper ? "lte" : "lt"; | |
var doc = new BsonDocument | |
{ | |
["path"] = GetPath(termRangeQuery.Field), | |
[minField] = BsonValue.Create(min), | |
[maxField] = BsonValue.Create(max) | |
}; | |
return new BsonDocument | |
{ | |
["range"] = doc | |
}; | |
} | |
private BsonDocument VisitNumericRange<T>(NumericRangeQuery<T> rangeQuery) where T : struct, IComparable<T> | |
{ | |
var minField = rangeQuery.IncludesMin ? "gte" : "gt"; | |
var maxField = rangeQuery.IncludesMin ? "lte" : "lt"; | |
var doc = new BsonDocument | |
{ | |
["path"] = GetPath(rangeQuery.Field), | |
[minField] = BsonValue.Create(rangeQuery.Min), | |
[maxField] = BsonValue.Create(rangeQuery.Max) | |
}; | |
return new BsonDocument | |
{ | |
["range"] = doc | |
}; | |
} | |
private BsonDocument VisitFuzzy(FuzzyQuery fuzzyQuery) | |
{ | |
var doc = CreateDefaultDoc(fuzzyQuery, fuzzyQuery.Term); | |
if (fuzzyQuery.MaxEdits > 0) | |
{ | |
var fuzzy = new BsonDocument | |
{ | |
["maxEdits"] = fuzzyQuery.MaxEdits, | |
}; | |
if (fuzzyQuery.PrefixLength > 0) | |
{ | |
fuzzy["prefixLength"] = fuzzyQuery.PrefixLength; | |
} | |
doc["fuzzy"] = fuzzy; | |
} | |
return new BsonDocument | |
{ | |
["text"] = doc | |
}; | |
} | |
private BsonDocument VisitPrefix(PrefixQuery prefixQuery) | |
{ | |
var doc = CreateDefaultDoc(prefixQuery, prefixQuery.Prefix); | |
return new BsonDocument | |
{ | |
["wildcard"] = doc | |
}; | |
} | |
private BsonDocument VisitWilcard(WildcardQuery wildcardQuery) | |
{ | |
var doc = CreateDefaultDoc(wildcardQuery, wildcardQuery.Term); | |
return new BsonDocument | |
{ | |
["wildcard"] = doc | |
}; | |
} | |
private static BsonDocument VisitPhrase(PhraseQuery phraseQuery) | |
{ | |
var terms = phraseQuery.GetTerms(); | |
var doc = new BsonDocument | |
{ | |
["path"] = terms[0].Field, | |
}; | |
if (terms.Length == 1) | |
{ | |
doc["query"] = terms[0].Text; | |
} | |
else | |
{ | |
doc["query"] = new BsonArray(terms.Select(x => x.Text)); | |
} | |
if (phraseQuery.Slop != 0) | |
{ | |
doc["slop"] = phraseQuery.Slop; | |
} | |
ApplyBoost(phraseQuery, doc); | |
return new BsonDocument | |
{ | |
["phrase"] = doc | |
}; | |
} | |
private BsonDocument VisitTerm(TermQuery termQuery) | |
{ | |
var doc = CreateDefaultDoc(termQuery, termQuery.Term); | |
return new BsonDocument | |
{ | |
["text"] = doc | |
}; | |
} | |
private BsonDocument VisitBoolean(BooleanQuery query) | |
{ | |
var doc = new BsonDocument(); | |
BsonArray? musts = null; | |
BsonArray? mustNots = null; | |
BsonArray? shoulds = null; | |
foreach (var clause in query.Clauses) | |
{ | |
var converted = Visit(clause.Query); | |
switch (clause.Occur) | |
{ | |
case Occur.MUST: | |
musts ??= new BsonArray(); | |
musts.Add(converted); | |
break; | |
case Occur.SHOULD: | |
shoulds ??= new BsonArray(); | |
shoulds.Add(converted); | |
break; | |
case Occur.MUST_NOT: | |
mustNots ??= new BsonArray(); | |
mustNots.Add(converted); | |
break; | |
} | |
} | |
if (musts != null) | |
{ | |
doc.Add("must", musts); | |
} | |
if (mustNots != null) | |
{ | |
doc.Add("mustNot", mustNots); | |
} | |
if (shoulds != null) | |
{ | |
doc.Add("should", shoulds); | |
} | |
return new BsonDocument | |
{ | |
["compund"] = doc | |
}; | |
} | |
private BsonDocument CreateDefaultDoc(Query query, Term term) | |
{ | |
var doc = new BsonDocument | |
{ | |
["query"] = term.Text | |
}; | |
ApplyBoost(query, doc); | |
doc["path"] = GetPath(term.Field); | |
return doc; | |
} | |
private string GetPath(string field) | |
{ | |
if (field != "*" && fieldConverter != null) | |
{ | |
field = fieldConverter(field); | |
} | |
return field; | |
} | |
private static void ApplyBoost(Query query, BsonDocument doc) | |
{ | |
if (query.Boost != 1) | |
{ | |
doc["boost"] = new BsonDocument | |
{ | |
["boost"] = query.Boost | |
}; | |
} | |
} | |
private static bool TryParseValue(BytesRef bytes, out object result) | |
{ | |
result = null!; | |
try | |
{ | |
var text = Encoding.ASCII.GetString(bytes.Bytes, bytes.Offset, bytes.Length); | |
if (!double.TryParse(text, NumberStyles.Any, CultureInfo.InvariantCulture, out var number)) | |
{ | |
return false; | |
} | |
if (number == Math.Round(number)) | |
{ | |
var integer = (long)number; | |
if (integer <= int.MaxValue && integer >= int.MinValue) | |
{ | |
result = (int)integer; | |
} | |
else | |
{ | |
result = integer; | |
} | |
} | |
else | |
{ | |
result = number; | |
} | |
return true; | |
} | |
catch (Exception) | |
{ | |
return false; | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment