Skip to content

Instantly share code, notes, and snippets.

@abriggs-eduserv
Created December 20, 2014 11:39
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save abriggs-eduserv/01673d2f847b30395c0d to your computer and use it in GitHub Desktop.
Save abriggs-eduserv/01673d2f847b30395c0d to your computer and use it in GitHub Desktop.
Sanitise user input before parsing by Lucene
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text.RegularExpressions;
using System.Web;
using System.Web.UI.WebControls;
namespace ICO.Web.Analysis
{
public static class LuceneHelper
{
public static string SanitiseUserQuery(string query)
{
if (query == null)
{
return null;
}
var specialChars = @"-+&|!(){}[]^~*?:\/";
// add slash before each char. Regex.Escape() doesn't seem to escape ']' properly ?!?
specialChars = string.Join("", specialChars.ToCharArray().Select(x => @"\" + x));
// Escape special Chars
query = Regex.Replace(query, "([" + specialChars + "])", @"\$1");
// Escape AND OR NOT
foreach (var word in new[] { "AND", "OR", "NOT" })
{
var escapedWord = string.Join("", word.ToCharArray().Select(x => @"\" + x));
query = Regex.Replace(query, word, escapedWord, RegexOptions.IgnoreCase);
}
// Escape last " if there's an odd number
var numQuotes = query.ToCharArray().Count(x => x == '"');
if (numQuotes%2 == 1)
{
query = Regex.Replace(query, "(.*)\"(.*)", "$1\\\"$2");
}
return query;
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment