Created
February 16, 2018 06:25
-
-
Save lynxz/03c811bfc9c6d656c76c629785b77f35 to your computer and use it in GitHub Desktop.
Extends RavenDB 4.0 IDocumentQuery to parse longer lucene strings into RQL
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System; | |
using System.Collections.Generic; | |
using System.Linq; | |
using Raven.Client.Documents.Session; | |
namespace Finisar.Phoenix.Raven { | |
public static class LuceneExstensions { | |
public static IDocumentQuery<T> WhereLucene<T>( this IDocumentQuery<T> query, string luceneQuery ) { | |
var luceneParser = new LuceneParser( luceneQuery ); | |
return luceneParser.Process( query ); | |
} | |
} | |
class LuceneParser { | |
const char Colon = ':'; | |
const string ColonString = ":"; | |
const char LeftParentheses = '('; | |
const string LeftParentheseString = "("; | |
const char RightParentheses = ')'; | |
const string RightParenthesesString = ")"; | |
const char LeftSquareBracket = '['; | |
const char RightSquareBracket = ']'; | |
const char LeftCurlyBrace = '{'; | |
const char RightCurlyBrace = '}'; | |
const char Quote = '"'; | |
const char Backslash = '\\'; | |
const string And = "AND"; | |
const string Or = "OR"; | |
const string not = "NOT"; | |
int _pos; | |
string _lucene; | |
public LuceneParser( string luceneQuery ) { | |
_lucene = luceneQuery?.Trim( ) ?? string.Empty; | |
_pos = 0; | |
} | |
public IDocumentQuery<T> Process<T>( IDocumentQuery<T> query ) { | |
var token = string.Empty; | |
do { | |
token = GetToken( ); | |
if( !string.IsNullOrEmpty( token ) ) { | |
if( token.Equals( And, StringComparison.InvariantCultureIgnoreCase ) ) { | |
query.AndAlso( ); | |
} | |
else if( token.Equals( Or, StringComparison.InvariantCultureIgnoreCase ) ) { | |
query.OrElse( ); | |
} | |
else if( token.Equals( not, StringComparison.InvariantCultureIgnoreCase ) ) { | |
query.NegateNext( ); | |
} | |
else if( token.Equals( LeftParentheseString ) ) { | |
query.OpenSubclause( ); | |
Process( query ); | |
query.CloseSubclause( ); | |
} | |
else if( token.Equals( RightParenthesesString ) ) { | |
break; | |
} | |
else { | |
if( GetToken( ) != ColonString ) { | |
throw new Exception( $"Unexpected token at position {_pos}!" ); | |
} | |
var fieldData = GetFieldData( ); | |
query.WhereLucene( token, fieldData ); | |
} | |
} | |
} while( !string.IsNullOrEmpty( token ) ); | |
return query; | |
} | |
string GetToken( ) { | |
if( _pos >= _lucene.Length ) { | |
return string.Empty; | |
} | |
var start = ReadWhiteSpaces( ); | |
if( _lucene[ _pos ] == Colon ) { | |
_pos++; | |
return ColonString; | |
} | |
if( _lucene[ _pos ] == LeftParentheses ) { | |
_pos++; | |
return LeftParentheseString; | |
} | |
if( _lucene[ _pos ] == RightParentheses ) { | |
_pos++; | |
return RightParenthesesString; | |
} | |
while( !char.IsWhiteSpace( _lucene[ _pos ] ) && _lucene[ _pos ] != Colon ) { | |
_pos++; | |
if( _pos >= _lucene.Length ) { | |
return string.Empty; | |
} | |
} | |
return _lucene.Substring( start, _pos - start ); | |
} | |
string GetFieldData( ) { | |
var braketCount = 0; | |
var start = ReadWhiteSpaces( ); | |
while( _pos < _lucene.Length && ( !char.IsWhiteSpace( _lucene[ _pos ] ) || braketCount > 0 ) ) { | |
if( _lucene[ _pos ] == Quote ) { | |
ReadString( ); | |
} | |
if( IsLeftBracket( ) ) { | |
braketCount++; | |
} | |
if( IsRightBracket( ) ) { | |
if( braketCount == 0 ) { | |
break; | |
} | |
braketCount--; | |
} | |
_pos++; | |
if( _pos >= _lucene.Length && braketCount > 0 ) { | |
throw new Exception( "Unexpected end of line while parsing field data." ); | |
} | |
} | |
return _lucene.Substring( start, _pos - start ); | |
} | |
bool IsLeftBracket( ) => _lucene[ _pos ] == LeftParentheses || _lucene[ _pos ] == LeftSquareBracket || _lucene[ _pos ] == LeftCurlyBrace; | |
bool IsRightBracket( ) => _lucene[ _pos ] == RightParentheses || _lucene[ _pos ] == RightSquareBracket || _lucene[ _pos ] == RightCurlyBrace; | |
void ReadString( ) { | |
do { | |
_pos++; | |
if( _lucene[ _pos ] == Backslash && _lucene[ _pos + 1 ] == Quote ) { | |
_pos += 2; | |
} | |
} while( _lucene[ _pos ] != Quote ); | |
} | |
int ReadWhiteSpaces( ) { | |
while( char.IsWhiteSpace( _lucene[ _pos ] ) && _pos < _lucene.Length ) { | |
_pos++; | |
} | |
return _pos; | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment