Skip to content

Instantly share code, notes, and snippets.

@kijun
Created February 3, 2010 07:14
Show Gist options
  • Save kijun/293436 to your computer and use it in GitHub Desktop.
Save kijun/293436 to your computer and use it in GitHub Desktop.
package org.apache.lucene.analysis;
import java.io.IOException;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
/**
* Removes words that are too long or too short from the stream.
*/
public final class LengthFilter extends TokenFilter {
final int min;
final int max;
private TermAttribute termAtt;
/**
* Build a filter that removes words that are too long or too
* short from the text.
*/
public LengthFilter(TokenStream in, int min, int max)
{
super(in);
this.min = min;
this.max = max;
// input.incrementToken()을 하면 termAtt도 그에 따라 변한다.
termAtt = addAttribute(TermAttribute.class);
}
/**
* Returns the next input Token whose term() is the right len
*/
@Override
public final boolean incrementToken() throws IOException {
while (input.incrementToken()) {
int len = termAtt.termLength();
// term 길이가 적합할 때 까지 incrementToken()을 실행한다.
if (len >= min && len <= max) {
return true;
}
// note: else we ignore it but should we index each part of it?
}
// reached EOS -- return false
return false;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment