Skip to content

Instantly share code, notes, and snippets.

@billdueber
Last active December 18, 2015 03:09
Show Gist options
  • Save billdueber/5716156 to your computer and use it in GitHub Desktop.
Save billdueber/5716156 to your computer and use it in GitHub Desktop.
Umich experimental text types for solr
<!--
#########################
TEXT FIELD TYPES
#########################
In all cases, we want to perform NFKC unicode normalization,
case folding, and ASCII-folding (i.e., removal of accents so
ü => u).
ICUFoldingFilterFactory will give us *all* of those things.
For queries only, we also do synonym expansion
-->
<!-- text : the basic text type -->
<fieldtype name="text" class="solr.TextField" positionIncrementGap="1000">
<analyzer type="index">
<charFilter class="solr.PatternReplaceCharFilterFactory"
pattern="&amp;" replacement=" and " />
<tokenizer class="solr.ICUTokenizerFactory"/>
<filter class="solr.ICUFoldingFilterFactory"/>
<filter class="solr.SynonymFilterFactory"
synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
<filter class="solr.CJKWidthFilterFactory"/>
<filter class="solr.CJKBigramFilterFactory" han="true" hiragana="true"
katakana="true" hangul="true" outputUnigrams="true" />
</analyzer>
<analyzer type="query">
<charFilter class="solr.PatternReplaceCharFilterFactory"
pattern="&amp;" replacement=" and " />
<tokenizer class="solr.ICUTokenizerFactory"/>
<filter class="solr.ICUFoldingFilterFactory"/>
<filter class="solr.SynonymFilterFactory"
synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
<filter class="solr.CJKWidthFilterFactory"/>
<filter class="solr.CJKBigramFilterFactory" han="true" hiragana="true"
katakana="true" hangul="true" outputUnigrams="true" />
</analyzer>
</fieldtype>
<!-- text_l: text type anchored only on the left,,
for "starts with" matches. This is the same as text_lr
but with only the leading anchor
-->
<fieldtype name="text_l" class="solr.TextField" positionIncrementGap="1000">
<analyzer type="index">
<charFilter class="solr.PatternReplaceCharFilterFactory"
pattern="&amp;" replacement=" and " />
<charFilter class="solr.PatternReplaceCharFilterFactory"
pattern="^(.*)$" replacement="AAAA$1" />
<tokenizer class="solr.ICUTokenizerFactory"/>
<filter class="solr.ICUFoldingFilterFactory"/>
<filter class="solr.SynonymFilterFactory"
synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
<filter class="solr.CJKWidthFilterFactory"/>
<filter class="solr.CJKBigramFilterFactory" han="true" hiragana="true"
katakana="true" hangul="true" outputUnigrams="true" />
</analyzer>
<analyzer type="query">
<charFilter class="solr.PatternReplaceCharFilterFactory"
pattern="&amp;" replacement=" and " />
<charFilter class="solr.PatternReplaceCharFilterFactory"
pattern="^(.*)$" replacement="AAAA$1" />
<tokenizer class="solr.ICUTokenizerFactory"/>
<filter class="solr.ICUFoldingFilterFactory"/>
<filter class="solr.SynonymFilterFactory"
synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
<filter class="solr.CJKWidthFilterFactory"/>
<filter class="solr.CJKBigramFilterFactory" han="true" hiragana="true"
katakana="true" hangul="true" outputUnigrams="true" />
</analyzer>
</fieldtype>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment