Skip to content

Instantly share code, notes, and snippets.

Created November 9, 2013 08:27
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save anonymous/7383104 to your computer and use it in GitHub Desktop.
Save anonymous/7383104 to your computer and use it in GitHub Desktop.
dummy token filter based on the SynonymFilter from "Lucene In Action"
private static final class TwitterFilter extends TokenFilter {
private final Stack<String> synonyms;
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
public TwitterFilter(TokenStream in) {
super(in);
synonyms = new Stack<String>(); // In theory we don't need a stack here
}
@Override
public final boolean incrementToken() throws IOException {
if (synonyms.size() > 0) {
final String synonym = synonyms.pop();
termAtt.setEmpty();
termAtt.append(synonym);
return true;
} else if (input.incrementToken()) {
final char[] buffer = termAtt.buffer();
final int length = termAtt.length();
final char firstChar = buffer[0];
if ((firstChar == '#' || firstChar == '@') && length > 1) {
// Adds two synonyms to the stack so that they are recognized as separate terms
final String term = new String(buffer, 1, length);
synonyms.push(term.trim());
}
return true;
} else {
return false;
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment