Skip to content

Instantly share code, notes, and snippets.

@nolanlawson
Last active October 5, 2015 21:58
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save nolanlawson/2883484 to your computer and use it in GitHub Desktop.
Save nolanlawson/2883484 to your computer and use it in GitHub Desktop.
Slight tweak of the Solr SpellCheckComponent to make its output quieter when you only care about the collations
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>org.healthonnet.solr</groupId>
<artifactId>quiet-spellcheck-component</artifactId>
<version>0.0.1-SNAPSHOT</version>
<dependencies>
<dependency>
<groupId>org.apache.solr</groupId>
<artifactId>solr-solrj</artifactId>
<version>3.6.2</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-core</artifactId>
<version>3.6.2</version>
</dependency>
<dependency>
<groupId>org.apache.solr</groupId>
<artifactId>solr-core</artifactId>
<version>3.6.2</version>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.10</version>
<scope>test</scope>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.0</version>
<configuration>
<source>1.5</source>
<target>1.5</target>
</configuration>
</plugin>
</plugins>
</build>
</project>
package org.healthonnet.solr;
import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Collection;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.apache.lucene.index.IndexReader;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.ShardParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.handler.component.ResponseBuilder;
import org.apache.solr.handler.component.SpellCheckComponent;
import org.apache.solr.spelling.SolrSpellChecker;
import org.apache.solr.spelling.SpellingOptions;
import org.apache.solr.spelling.SpellingResult;
/**
* Wrapper around the SpellCheckComponent that gives a more quiet output. It
* only gives collations; does not give any suggestions. This is useful when you
* set spellcheck.count to a large number, but you really only want to check the
* collations. It avoids writing a ton of output and therefore cuts down on I/O.
*
* @author nolan
*
*/
public class QuietSpellCheckComponent extends SpellCheckComponent {
@Override
@SuppressWarnings("unchecked")
public void process(ResponseBuilder rb) throws IOException {
SolrParams params = rb.req.getParams();
if (!params.getBool(COMPONENT_NAME, false) || spellCheckers.isEmpty()) {
return;
}
boolean shardRequest = "true".equals(params.get(ShardParams.IS_SHARD));
String q = params.get(SPELLCHECK_Q);
SolrSpellChecker spellChecker = getSpellChecker(params);
Collection<Token> tokens = null;
if (q != null) {
// we have a spell check param, tokenize it with the query analyzer
// applicable for this spellchecker
tokens = getTokens(q, spellChecker.getQueryAnalyzer());
} else {
q = rb.getQueryString();
if (q == null) {
q = params.get(CommonParams.Q);
}
tokens = queryConverter.convert(q);
}
if (tokens != null && tokens.isEmpty() == false) {
if (spellChecker != null) {
int count = params.getInt(SPELLCHECK_COUNT, 1);
boolean onlyMorePopular = params.getBool(SPELLCHECK_ONLY_MORE_POPULAR, DEFAULT_ONLY_MORE_POPULAR);
boolean extendedResults = params.getBool(SPELLCHECK_EXTENDED_RESULTS, false);
NamedList response = new SimpleOrderedMap();
IndexReader reader = rb.req.getSearcher().getReader();
boolean collate = params.getBool(SPELLCHECK_COLLATE, false);
float accuracy = params.getFloat(SPELLCHECK_ACCURACY, Float.MIN_VALUE);
SolrParams customParams = getCustomParams(getDictionaryName(params), params, shardRequest);
SpellingOptions options = new SpellingOptions(tokens, reader,
count, onlyMorePopular, extendedResults, accuracy,
customParams);
SpellingResult spellingResult = spellChecker.getSuggestions(options);
if (spellingResult != null) {
NamedList suggestions = toNamedList(shardRequest,
spellingResult, q, extendedResults, collate);
if (collate) {
addCollationsToResponse(params, spellingResult, rb, q, suggestions);
}
// clear the spellingResult after making collations from it.
// This is the only change I'm making compared to
// SpellCheckComponent!
for (int i = suggestions.size() - 1; i >= 0; i--) {
if ("collation".equals(suggestions.getName(i))) {
Object val = suggestions.getVal(i);
if (val instanceof NamedList
&& ((NamedList) val).size() > 0
&& "collationQuery".equals(((NamedList) val).getName(0))) {
continue;
}
}
// otherwise it's a non-collation; delete it
suggestions.remove(i);
}
response.add("suggestions", suggestions);
rb.rsp.add("spellcheck", response);
}
} else {
throw new SolrException(SolrException.ErrorCode.NOT_FOUND,
"Specified dictionary does not exist.");
}
}
}
private Collection<Token> getTokens(String q, Analyzer analyzer)
throws IOException {
Collection<Token> result = new ArrayList<Token>();
TokenStream ts = analyzer.reusableTokenStream("", new StringReader(q));
ts.reset();
// TODO: support custom attributes
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class);
TypeAttribute typeAtt = ts.addAttribute(TypeAttribute.class);
FlagsAttribute flagsAtt = ts.addAttribute(FlagsAttribute.class);
PayloadAttribute payloadAtt = ts.addAttribute(PayloadAttribute.class);
PositionIncrementAttribute posIncAtt = ts
.addAttribute(PositionIncrementAttribute.class);
while (ts.incrementToken()) {
Token token = new Token();
token.copyBuffer(termAtt.buffer(), 0, termAtt.length());
token.setOffset(offsetAtt.startOffset(), offsetAtt.endOffset());
token.setType(typeAtt.type());
token.setFlags(flagsAtt.getFlags());
token.setPayload(payloadAtt.getPayload());
token.setPositionIncrement(posIncAtt.getPositionIncrement());
result.add(token);
}
ts.end();
ts.close();
return result;
}
private String getDictionaryName(SolrParams params) {
String dictName = params.get(SPELLCHECK_DICT);
if (dictName == null) {
dictName = SolrSpellChecker.DEFAULT_DICTIONARY_NAME;
}
return dictName;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment