Created
June 16, 2011 18:08
-
-
Save jrochkind/1029844 to your computer and use it in GitHub Desktop.
a Solr text field without stemming
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<!-- for Solr 1.4, would be different for Solr 3.1+, including using | |
built-in ICU analyzers instead of current custom unicode filters. --> | |
<!-- I don't recall why I used seperate definitions of index and query stage, | |
or if that's needed for anything. --> | |
<!-- NO stopwords in use, to avoid dismax stopwords gotcha. --> | |
<!-- Analyzed Text, no Stemming or Synonyms --> | |
<fieldtype name="textNoStem" class="solr.TextField" positionIncrementGap="100"> | |
<analyzer type="index"> | |
<tokenizer class="solr.WhitespaceTokenizerFactory" /> | |
<filter class="schema.UnicodeNormalizationFilterFactory" version="icu4j" composed="false" remove_diacritics="true" remove_modifiers="true" fold="true" /> | |
<filter class="schema.CJKFilterFactory" bigrams="false"/> | |
<!-- <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" /> --> | |
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1" /> | |
<filter class="solr.LowerCaseFilterFactory" /> | |
<filter class="solr.RemoveDuplicatesTokenFilterFactory" /> | |
</analyzer> | |
<analyzer type="query"> | |
<tokenizer class="solr.WhitespaceTokenizerFactory" /> | |
<filter class="schema.UnicodeNormalizationFilterFactory" version="icu4j" composed="false" remove_diacritics="true" remove_modifiers="true" fold="true" /> | |
<filter class="schema.CJKFilterFactory" bigrams="false"/> | |
<!-- <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" /> --> | |
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1" /> | |
<filter class="solr.LowerCaseFilterFactory" /> | |
<filter class="solr.RemoveDuplicatesTokenFilterFactory" /> | |
</analyzer> | |
</fieldtype> | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment