Skip to content

Instantly share code, notes, and snippets.

@thoward
Created December 17, 2010 03:33
Show Gist options
  • Save thoward/744444 to your computer and use it in GitHub Desktop.
Save thoward/744444 to your computer and use it in GitHub Desktop.
An example of how to do something like Solr's copy fields in a Lucene Index...
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using Lucene.Net.Analysis;
using Lucene.Net.Analysis.Standard;
using Lucene.Net.Documents;
using Lucene.Net.Index;
using Lucene.Net.QueryParsers;
using Lucene.Net.Search;
using Lucene.Net.Store;
namespace MultiTokenStreamExample
{
class Program
{
static void Main(string[] args)
{
const string text_content = "This is some content. 123 is a number. 456 is also a number.";
const string numeric_content = "789 this is normal text";
var standard = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_29);
var numeric = new NumericAnalyzer();
var perField = new PerFieldAnalyzerWrapper(standard);
perField.AddAnalyzer("numeric_content", numeric);
// typical index creation
var directory = new RAMDirectory();
var writer = new IndexWriter(directory, standard, IndexWriter.MaxFieldLength.UNLIMITED);
var document = new Document();
var text_content_field = new Field("text_content", text_content, Field.Store.YES, Field.Index.ANALYZED);
var id_field = new Field("numeric_content", numeric_content, Field.Store.YES, Field.Index.ANALYZED);
document.Add(text_content_field);
document.Add(id_field);
// special sauce
var combined_field =
new MultiField("combined",
new List<Fieldable> { text_content_field, id_field},
perField);
document.Add(combined_field);
writer.AddDocument(document);
writer.Optimize();
writer.Close();
// test searching against our combined field...
Searcher searcher = new IndexSearcher(directory, true);
// hits on first field
SearchCombined(searcher, "number");
// hits on first field
SearchCombined(searcher, "123");
// hits on second field
SearchCombined(searcher, "789");
// does not hit on second field because numeric analyzer skips that content.
SearchCombined(searcher, "normal");
Console.ReadKey();
}
private static void SearchCombined(Searcher searcher, string queryString)
{
// Build a Query object
QueryParser parser = new QueryParser("combined", new StandardAnalyzer());
Query query = parser.Parse(queryString);
// Search for the query
Hits hits = searcher.Search(query);
// Examine the Hits object to see if there were any matches
int hitCount = hits.Length();
if (hitCount == 0) {
Console.WriteLine("No matches were found for \"" + queryString + "\"");
}
else {
Console.WriteLine("Hits for \"" + queryString + "\" were found in quotes by:");
// Iterate over the Documents in the Hits object
for (int i = 0; i < hitCount; i++) {
Document doc = hits.Doc(i);
// Print the value that we stored in the "title" field. Note
// that this Field was not indexed, but (unlike the
// "contents" field) was stored verbatim and can be
// retrieved.
Console.WriteLine(" " + (i + 1) + "] [id: " +doc.Get("id") + "] [text_content: " + doc.Get("text_content") + "]");
}
}
Console.WriteLine();
}
}
public class MultiField : Fieldable
{
public MultiField(string name, List<Fieldable> fields, PerFieldAnalyzerWrapper analyzerWrapper)
{
_name = name;
_fields = fields;
_analyzerWrapper = analyzerWrapper;
}
private string _name;
private List<Fieldable> _fields;
private PerFieldAnalyzerWrapper _analyzerWrapper;
#region Fieldable Members
public void SetBoost(float boost)
{
//throw new Exception("The method or operation is not implemented.");
}
public float GetBoost()
{
return 1;
}
public string Name()
{
return _name;
}
public string StringValue()
{
throw new Exception("The method or operation is not implemented.");
//StringBuilder sb = new StringBuilder();
//foreach (var f in _fields)
// sb.AppendLine(f.StringValue());
//return sb.ToString();
}
public System.IO.TextReader ReaderValue()
{
throw new Exception("The method or operation is not implemented.");
//var combined = default(TextReader);
//foreach (var f in _fields)
// combined =
// combined == default(TextReader)
// ? f.ReaderValue()
// : combined.Union(f.ReaderValue());
//return combined;
}
public byte[] BinaryValue()
{
throw new Exception("The method or operation is not implemented.");
//var ms = new MemoryStream();
//foreach (var f in _fields)
//{
// var bytes = f.BinaryValue();
// ms.Write(bytes, 0, 0);
//}
//return ms.ToArray();
}
public TokenStream TokenStreamValue()
{
return new MultiTokenStream(_fields.Select(a => GetTokenStream(a)));
}
private TokenStream GetTokenStream(Fieldable f)
{
return
f.TokenStreamValue()
?? _analyzerWrapper.TokenStream(f.Name(), new StringReader(f.StringValue()));
}
public bool IsStored()
{
return false;
}
public bool IsIndexed()
{
return true;
}
public bool IsTokenized()
{
return true;
}
public bool IsCompressed()
{
return false;
}
public bool IsTermVectorStored()
{
return true;
}
public bool IsStoreOffsetWithTermVector()
{
return true;
}
public bool IsStorePositionWithTermVector()
{
return true;
}
public bool IsBinary()
{
foreach (var f in _fields)
if (!f.IsBinary()) return false;
return true;
}
public bool GetOmitNorms()
{
foreach (var f in _fields)
if (!f.GetOmitNorms()) return false;
return true;
}
public void SetOmitNorms(bool omitNorms)
{
throw new Exception("The method or operation is not implemented.");
}
public void SetOmitTf(bool omitTf)
{
throw new Exception("The method or operation is not implemented.");
}
public bool GetOmitTf()
{
foreach (var f in _fields)
if (!f.GetOmitTf()) return false;
return true;
}
public bool IsLazy()
{
//foreach (var f in _fields)
// if (!f.GetOmitNorms()) return false;
return true;
}
public int GetBinaryOffset()
{
return 0;
}
public int GetBinaryLength()
{
return BinaryValue().Length;
}
public byte[] GetBinaryValue()
{
return BinaryValue();
}
public byte[] GetBinaryValue(byte[] result)
{
throw new Exception("The method or operation is not implemented.");
}
#endregion
}
// this is just an example to show a different kind of token stream..
public class NumericAnalyzer : Analyzer
{
public override TokenStream TokenStream(string fieldName, System.IO.TextReader reader)
{
return new NumericTokenizer(reader);
}
}
public class NumericTokenizer : CharTokenizer
{
public NumericTokenizer(TextReader input)
: base(input)
{
}
protected override bool IsTokenChar(char c)
{
// only consider numbers to be tokenizable characters
return char.IsNumber(c);
}
}
public class MultiTokenStream : TokenStream
{
public MultiTokenStream(IEnumerable<TokenStream> tokenStreams)
{
_tokenStreams = new List<TokenStream>(tokenStreams);
}
private List<TokenStream> _tokenStreams;
private IEnumerator<TokenStream> _tokenStreamEnumerator;
private TokenStream _currentTokenStream;
public override void ClearAttributes()
{
base.ClearAttributes();
foreach (TokenStream tokenStream in _tokenStreams)
tokenStream.ClearAttributes();
}
public override bool IncrementToken()
{
if (_tokenStreamEnumerator == null)
_tokenStreamEnumerator = _tokenStreams.GetEnumerator();
if (_currentTokenStream == null)
{
if (!_tokenStreamEnumerator.MoveNext())
return false;
_currentTokenStream = _tokenStreamEnumerator.Current;
}
bool success = _currentTokenStream.IncrementToken();
base.RestoreState(_currentTokenStream.CaptureState());
if (!success)
{
if (!_tokenStreamEnumerator.MoveNext()) return false;
_currentTokenStream = _tokenStreamEnumerator.Current;
return true;
}
return true;
}
public override void Reset()
{
if(null != _tokenStreamEnumerator)
_tokenStreamEnumerator.Reset();
_currentTokenStream = null;
}
}
//// FROM: http://stackoverflow.com/questions/2925652/how-to-string-multiple-textreaders-together
//public static class Extensions
//{
// public static TextReader Union(this TextReader first, TextReader second)
// {
// return new ChainedTextReader(first, second);
// }
// private class ChainedTextReader : TextReader
// {
// private TextReader first;
// private TextReader second;
// private bool readFirst = true;
// public ChainedTextReader(TextReader first, TextReader second)
// {
// this.first = first;
// this.second = second;
// }
// public override int Peek()
// {
// if (readFirst)
// {
// return first.Peek();
// }
// else
// {
// return second.Peek();
// }
// }
// public override int Read()
// {
// if (readFirst)
// {
// int value = first.Read();
// if (value == -1)
// {
// readFirst = false;
// }
// else
// {
// return value;
// }
// }
// return second.Read();
// }
// public override void Close()
// {
// first.Close();
// second.Close();
// }
// protected override void Dispose(bool disposing)
// {
// base.Dispose(disposing);
// if (disposing)
// {
// first.Dispose();
// second.Dispose();
// }
// }
// }
//}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment