Created
December 20, 2014 11:39
-
-
Save abriggs-eduserv/01673d2f847b30395c0d to your computer and use it in GitHub Desktop.
Sanitise user input before parsing by Lucene
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System; | |
using System.Collections.Generic; | |
using System.Linq; | |
using System.Text.RegularExpressions; | |
using System.Web; | |
using System.Web.UI.WebControls; | |
namespace ICO.Web.Analysis | |
{ | |
public static class LuceneHelper | |
{ | |
public static string SanitiseUserQuery(string query) | |
{ | |
if (query == null) | |
{ | |
return null; | |
} | |
var specialChars = @"-+&|!(){}[]^~*?:\/"; | |
// add slash before each char. Regex.Escape() doesn't seem to escape ']' properly ?!? | |
specialChars = string.Join("", specialChars.ToCharArray().Select(x => @"\" + x)); | |
// Escape special Chars | |
query = Regex.Replace(query, "([" + specialChars + "])", @"\$1"); | |
// Escape AND OR NOT | |
foreach (var word in new[] { "AND", "OR", "NOT" }) | |
{ | |
var escapedWord = string.Join("", word.ToCharArray().Select(x => @"\" + x)); | |
query = Regex.Replace(query, word, escapedWord, RegexOptions.IgnoreCase); | |
} | |
// Escape last " if there's an odd number | |
var numQuotes = query.ToCharArray().Count(x => x == '"'); | |
if (numQuotes%2 == 1) | |
{ | |
query = Regex.Replace(query, "(.*)\"(.*)", "$1\\\"$2"); | |
} | |
return query; | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment