Skip to content

Instantly share code, notes, and snippets.

@chrisrichards
Created October 13, 2010 13:57
Show Gist options
  • Save chrisrichards/624076 to your computer and use it in GitHub Desktop.
Save chrisrichards/624076 to your computer and use it in GitHub Desktop.
using System.IO;
using Lucene.Net.Analysis;
namespace LuceneTest
{
public class PartNumberAnalyzer : Analyzer
{
private const string SkipCharacters = " -/\\";
#region Overrides of Analyzer
public override TokenStream TokenStream(string fieldName, TextReader reader)
{
TokenStream result = new SkipCharTokenizer(reader, SkipCharacters);
result = new LowerCaseFilter(result);
return result;
}
#endregion
}
}
using Lucene.Net.Analysis;
namespace LuceneTest
{
public class SkipCharTokenizer : Tokenizer
{
private int _offset, _bufferIndex, _dataLen;
private const int MaxWordLen = 255;
private const int IoBufferSize = 1024;
private readonly char[] _ioBuffer = new char[IoBufferSize];
private readonly string _skipCharacters;
public SkipCharTokenizer(System.IO.TextReader input, string skipCharacters)
: base(input)
{
_skipCharacters = skipCharacters;
}
public override Token Next(Token token)
{
token.Clear();
var length = 0;
var start = _bufferIndex;
var buffer = token.TermBuffer();
while (true) {
if (_bufferIndex >= _dataLen) {
_offset += _dataLen;
_dataLen = input.Read(_ioBuffer, 0, _ioBuffer.Length);
if (_dataLen <= 0) {
if (length > 0) {
break;
}
return null;
}
_bufferIndex = 0;
}
var c = _ioBuffer[_bufferIndex++];
if (_skipCharacters.IndexOf(c) != -1) {
continue;
}
if (length == 0) {
start = _offset + _bufferIndex - 1;
}
else if (length == buffer.Length) {
buffer = token.ResizeTermBuffer(1 + length);
}
buffer[length++] = c;
if (length == MaxWordLen) {
break;
}
}
token.SetTermLength(length);
token.SetStartOffset(start);
token.SetEndOffset(start + length);
return token;
}
public override void Reset(System.IO.TextReader reader)
{
base.Reset(reader);
_bufferIndex = 0;
_offset = 0;
_dataLen = 0;
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment