Created
February 3, 2010 06:02
-
-
Save kijun/293372 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package org.apache.lucene.analysis; | |
import java.io.IOException; | |
import java.io.Reader; | |
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; | |
import org.apache.lucene.analysis.tokenattributes.TermAttribute; | |
import org.apache.lucene.util.AttributeSource; | |
/** | |
* Emits the entire input as a single token. | |
*/ | |
public final class KeywordTokenizer extends Tokenizer { | |
private static final int DEFAULT_BUFFER_SIZE = 256; | |
private boolean done; | |
private int finalOffset; | |
private TermAttribute termAtt; | |
private OffsetAttribute offsetAtt; | |
public KeywordTokenizer(Reader input) { | |
this(input, DEFAULT_BUFFER_SIZE); | |
} | |
public KeywordTokenizer(Reader input, int bufferSize) { | |
super(input); | |
init(bufferSize); | |
} | |
private void init(int bufferSize) { | |
this.done = false; | |
termAtt = addAttribute(TermAttribute.class); | |
offsetAtt = addAttribute(OffsetAttribute.class); | |
termAtt.resizeTermBuffer(bufferSize); | |
} | |
@Override | |
public final boolean incrementToken() throws IOException { | |
if (!done) { | |
clearAttributes(); | |
done = true; | |
int upto = 0; | |
// 미리 정의된 버퍼를 받아온다 | |
char[] buffer = termAtt.termBuffer(); | |
// 문서가 끝날 때 까지 읽는다 | |
while (true) { | |
final int length = input.read(buffer, upto, buffer.length-upto); | |
if (length == -1) break; | |
upto += length; | |
if (upto == buffer.length) | |
buffer = termAtt.resizeTermBuffer(1+buffer.length); | |
} | |
// 이 전체가 하나의 term이다 | |
termAtt.setTermLength(upto); | |
// offset 정보 설정. | |
finalOffset = correctOffset(upto); | |
offsetAtt.setOffset(correctOffset(0), finalOffset); | |
return true; | |
} | |
return false; | |
} | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment