Skip to content

Instantly share code, notes, and snippets.

@briandonahue
Created August 4, 2012 03:59
Show Gist options
  • Save briandonahue/3254252 to your computer and use it in GitHub Desktop.
Save briandonahue/3254252 to your computer and use it in GitHub Desktop.
using System;
using System.Collections.Generic;
using System.Configuration;
using System.Globalization;
using System.IO;
using Hercules.Infrastructure.Search.Mapping;
using Lucene.Net.Analysis.Standard;
using Lucene.Net.Index;
using Lucene.Net.QueryParsers;
using Lucene.Net.Search;
using Lucene.Net.Store;
using Version = Lucene.Net.Util.Version;
using Hercules.Infrastructure;
using System.Linq;
namespace Hercules.Infrastructure.Search.QueryMaps
{
    public abstract class QueryMapBase<T> : IQueryMapper<T> where T : ISearchCriteria
    {
        readonly string baseIndexPath = ConfigurationManager.AppSettings["LuceneIndexDirectory"];
        readonly FSDirectory indexDir;
        IndexReader reader;
        protected QueryMapBase()
        {
            var indexPath = Path.Combine(baseIndexPath, typeof (T).Name.Replace("SearchCriteria", ""));
            indexDir = FSDirectory.Open(new DirectoryInfo(indexPath));
            reader = IndexReader.Open(indexDir, true);
        }
        public abstract Lucene.Net.Search.Query Map(T criteria);
    
        protected void AddPrefixQuery(string fieldName, string text, BooleanQuery query)
        {
            if (query == null) return;
            if (string.IsNullOrEmpty(text)) return;
                query.Add(new PrefixQuery(new Term(fieldName, text.ToLower())),
                          BooleanClause.Occur.MUST);
        }
        protected void AddPrefixQueries(string fieldName, IEnumerable<string> keywords, BooleanQuery query)
        {
            if (query == null || keywords.IsNullOrEmpty()) return;
            
            var statusQuery = new BooleanQuery();
            keywords.Each(keyword =>
            {
                foreach (string term in keyword.ToLower(CultureInfo.InvariantCulture).Split(' '))
                {
                    statusQuery.Add(new PrefixQuery(new Term(fieldName, term)),
                              BooleanClause.Occur.SHOULD);
                }
            });
            query.Add(statusQuery, BooleanClause.Occur.MUST);
        }
        protected void AddPhraseQuery(string fieldName, string phrase, BooleanQuery query)
        {
            if (query == null || string.IsNullOrEmpty(phrase)) return;
            // if only one term, a prefix query will suffice
            var terms = phrase.ToLower().Split(' ');
            if (terms.Count() == 1)
            {
                AddPrefixQuery(fieldName, phrase, query);
                return;
            }
            // if multiple terms, we want to match phrase with wildcard, such as "wall street jou*"
            // The process for doing this in Lucene is horrid:
            // Taken mostly from http://stackoverflow.com/questions/5075304/how-to-use-a-multiphrasequery
            var phraseQuery = new MultiPhraseQuery();
            // Add all but last term as regular terms
            terms.Take(terms.Count() - 1).Each(t => phraseQuery.Add(new Term(fieldName, t)));
            // get last term, and retrieve ALL matching terms, and add as an OR block
            var lastTermText = terms.Last();
            var lastTerm = new Term(fieldName, lastTermText);
            var termEnum = reader.Terms(lastTerm);
            var fuzzyTerms = new List<Term>();
            do {
                var currTerm = termEnum.Term();
                if (currTerm.Field() != fieldName || !currTerm.Text().StartsWith(lastTermText)) break;
                fuzzyTerms.Add(termEnum.Term());
            } while (termEnum.Next());
            // add the original term, mostly in case no terms matched, Lucene throws an error
            fuzzyTerms.Add(lastTerm);
            phraseQuery.Add(fuzzyTerms.ToArray());
            query.Add(phraseQuery, BooleanClause.Occur.MUST);
        }
    }
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment