Skip to content

Instantly share code, notes, and snippets.

What would you like to do?
Solr search component for language detection
* Solr search component for language detection. It has the following configuration parameters (are configured in solrconfig.xml):
* <ul>
* <li>list of parameters language: supported languages</li>
* <li>languageDetectionProbabilityThreshold: the minimum detection probability for which the language detection is considered reliable (default is 0.09)</li>
* </ul>
public class LanguageDetectionComponent extends SearchComponent {
public static final String LANGUAGE_REQUEST_PARAM_NAME = "language";
private static final String LANGUAGE_PARAM_NAME_IN_SOLR_CONFIG = "language";
private static final String LANGUAGE_DETECTION_PROBABILITY_THRESHOLD_PARAM_NAME_IN_SOLR_CONFIG = "languageDetectionProbabilityThreshold";
private static final String LANGUAGE_PROFILES_ZXCONF_SUBDIR_NAME = "languagesProfiles";
private static final String PRODUCTSEARCH_SOLR_ZXCONF_SUBDIR_NAME = "productsearch-solr";
private static final Logger LOGGER = Logger.getLogger(LanguageDetectionComponent.class.getName());
public static final String MIN_LENGTH_PARAM_IN_SOLR_CONFIG = "minLength";
private Set<String> supportedLanguages = null;
private float languageDetectionProbabilityThreshold = DEFAULT_LANGUAGE_DETECTION_PROBABILITY_THRESHOLD;
private Integer minLength;
public void init(NamedList args) {
SolrParams initParams = SolrParams.toSolrParams(args);
supportedLanguages = new HashSet<String>(Arrays.asList(initParams.getParams(LANGUAGE_PARAM_NAME_IN_SOLR_CONFIG)));
languageDetectionProbabilityThreshold = initParams.getFloat(LANGUAGE_DETECTION_PROBABILITY_THRESHOLD_PARAM_NAME_IN_SOLR_CONFIG);
minLength = initParams.getInt(MIN_LENGTH_PARAM_IN_SOLR_CONFIG);
// initialize the language detector
// language profiles can't be load from the classpath due to issue
File languagesProfilesDir = getLanguagesProfilesDir();
try {
} catch (LangDetectException e) {
throw new IllegalStateException("Error during initialization of the language detector factory " +
"with the language detector profiles in " + languagesProfilesDir.getAbsolutePath() + ".", e);
// do further initialization according to the parameters given in solrconfig.xml
private File getLanguagesProfilesDir() {
// TODO: return the directory with the language profiles for
// This directory can't be put in the classpath due to
public void prepare(ResponseBuilder rb) throws IOException {
SolrParams origParams = rb.req.getParams();
if (origParams.get(LANGUAGE_REQUEST_PARAM_NAME) == null) {
final String query = origParams.get("q");
// don't do language detection for short queries as it is not reliable
if (minLength != null && query != null && query.length() <= minLength) {
// do language detection from the query string
try {
final Detector languageDetector = DetectorFactory.create();
final List<Language> probabilities = languageDetector.getProbabilities();
if (!probabilities.isEmpty()) {
final Language mostLikelyLanguageWithProbability = probabilities.get(0);
double probabilityOfMostLikelyLanguage = mostLikelyLanguageWithProbability.prob;
final String mostLikelyLanguage = mostLikelyLanguageWithProbability.lang;
if (probabilityOfMostLikelyLanguage > languageDetectionProbabilityThreshold
&& this.supportedLanguages.contains(mostLikelyLanguage)) {
LOGGER.finest("language " + mostLikelyLanguage + " detected for '"
+ query + "' with probability " + probabilityOfMostLikelyLanguage);
ModifiableSolrParams modifiableSolrParams = new ModifiableSolrParams(origParams);
modifiableSolrParams.set(LANGUAGE_REQUEST_PARAM_NAME, mostLikelyLanguage);
// set the language as request parameter
} catch (LangDetectException e) {
LOGGER.warning("Error during language detection. No language is set. || action=\"LANG_DETECTIONS_ERROR\" query='" + query + " error=" + e.getMessage());
public void process(ResponseBuilder rb) throws IOException {
// Does nothing because we only modify the request.
public String getDescription() {
return "Language detection search component.";
public String getSource() {
return LanguageDetectionComponent.class.getCanonicalName();
<searchComponent name="language-detector" class="com.zanox.service.productsearch.solr.LanguageDetectionComponent">
<str name="language">en</str>
<str name="language">de</str>
<str name="language">es</str>
<str name="language">fr</str>
<str name="language">it</str>
<int name="minLength">10</int>
<!-- not yet supported: <int name="maxLength">200</int> -->
<float name="languageDetectionProbabilityThreshold">0.90</float>
<requestHandler name="/mySearchHandler" class="solr.SearchHandler">
<arr name="first-components">
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment