Navigation Menu

Skip to content

Instantly share code, notes, and snippets.

@jrochkind
Created June 16, 2011 18:08
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jrochkind/1029844 to your computer and use it in GitHub Desktop.
Save jrochkind/1029844 to your computer and use it in GitHub Desktop.
a Solr text field without stemming
<!-- for Solr 1.4, would be different for Solr 3.1+, including using
built-in ICU analyzers instead of current custom unicode filters. -->
<!-- I don't recall why I used seperate definitions of index and query stage,
or if that's needed for anything. -->
<!-- NO stopwords in use, to avoid dismax stopwords gotcha. -->
<!-- Analyzed Text, no Stemming or Synonyms -->
<fieldtype name="textNoStem" class="solr.TextField" positionIncrementGap="100">
<analyzer type="index">
<tokenizer class="solr.WhitespaceTokenizerFactory" />
<filter class="schema.UnicodeNormalizationFilterFactory" version="icu4j" composed="false" remove_diacritics="true" remove_modifiers="true" fold="true" />
<filter class="schema.CJKFilterFactory" bigrams="false"/>
<!-- <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" /> -->
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1" />
<filter class="solr.LowerCaseFilterFactory" />
<filter class="solr.RemoveDuplicatesTokenFilterFactory" />
</analyzer>
<analyzer type="query">
<tokenizer class="solr.WhitespaceTokenizerFactory" />
<filter class="schema.UnicodeNormalizationFilterFactory" version="icu4j" composed="false" remove_diacritics="true" remove_modifiers="true" fold="true" />
<filter class="schema.CJKFilterFactory" bigrams="false"/>
<!-- <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" /> -->
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1" />
<filter class="solr.LowerCaseFilterFactory" />
<filter class="solr.RemoveDuplicatesTokenFilterFactory" />
</analyzer>
</fieldtype>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment