Skip to content

Instantly share code, notes, and snippets.

@mattwarren
Created June 30, 2011 13:27
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mattwarren/1056231 to your computer and use it in GitHub Desktop.
Save mattwarren/1056231 to your computer and use it in GitHub Desktop.
Lucene string compare bug
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Globalization;
using System.Threading;
using Lucene.Net.Analysis;
using Lucene.Net.Documents;
using Lucene.Net.Index;
using Lucene.Net.QueryParsers;
using Lucene.Net.Search;
using Lucene.Net.Store;
using Version = Lucene.Net.Util.Version;
using System.Linq;
namespace SimpleLuceneTest
{
class Program
{
static void Main(string[] args)
{
var dir = new RAMDirectory();
var analyzer = new KeywordAnalyzer();
var writer = new IndexWriter(dir, analyzer, true, IndexWriter.MaxFieldLength.UNLIMITED);
var fields = new[]
{
"daab", "bcda", "dacb", "dacb",
"aacb", "aaac", "bcbb", "acba",
"aaaa", "dada"
};
foreach (var field in fields)
{
var doc = new Document();
doc.Add(new Field("Name", field, Field.Store.NO, Field.Index.ANALYZED_NO_NORMS));
writer.AddDocument(doc);
}
writer.Close(true);
IndexSearcher searcher = new IndexSearcher(dir, true);
var termEnum = searcher.GetIndexReader().Terms();
Console.WriteLine("Indexed Terms:");
while (termEnum.Next())
{
var term = termEnum.Term();
var freq = termEnum.DocFreq();
Console.WriteLine("\t {0}:{1,-20} (Count = {2})", term.Field(), term.Text(), freq);
}
Console.WriteLine();
QueryParser queryParser = new QueryParser(Version.LUCENE_29, "", analyzer);
var queries = new List<string> { "Name:da*" };
RunQueries(queries, queryParser, searcher);
Console.WriteLine("-------------------------");
Console.WriteLine("Changing locale to \"da\"");
Console.WriteLine("-------------------------\n");
var culture = CultureInfo.CreateSpecificCulture("da");
Thread.CurrentThread.CurrentCulture = culture;
Thread.CurrentThread.CurrentUICulture = culture;
RunQueries(queries, queryParser, searcher);
}
private static void RunQueries(IEnumerable<string> queries, QueryParser queryParser, IndexSearcher searcher)
{
foreach (var queryText in queries)
{
var query = queryParser.Parse(queryText);
Console.WriteLine("Query Used \'{0}\'", query);
if (query is PrefixQuery)
{
var prefixQuery = query as PrefixQuery;
var termEnum = prefixQuery.GetEnum(searcher.GetIndexReader());
do
{
Console.WriteLine("\t <" + termEnum.Term() + "> freq=" + termEnum.DocFreq());
} while (termEnum.Next());
}
var timer = Stopwatch.StartNew();
var result = searcher.Search(query, 10);
timer.Stop();
Console.WriteLine("Query took {0:0.00} ms, there were {1} total hits",
timer.ElapsedMilliseconds, result.totalHits);
Console.WriteLine(String.Join(", ", result.scoreDocs.Select(x => new { DocId = x.doc })));
Console.WriteLine();
}
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment