Skip to content

Instantly share code, notes, and snippets.

@nz
Created April 20, 2012 11:25
Show Gist options
  • Save nz/2427900 to your computer and use it in GitHub Desktop.
Save nz/2427900 to your computer and use it in GitHub Desktop.
Solr 3.6.0 schema.xml with Kuromoji analysis for text_jp
<?xml version="1.0" encoding="UTF-8"?>
<schema name="sunspot" version="1.0">
<types>
<!-- Scalar field types -->
<fieldType name="boolean" class="solr.BoolField" omitNorms="true"/>
<fieldType name="date" class="solr.DateField" omitNorms="true"/>
<fieldType name="rand" class="solr.RandomSortField" omitNorms="true"/>
<fieldType name="sdouble" class="solr.SortableDoubleField" omitNorms="true"/>
<fieldType name="sfloat" class="solr.SortableFloatField" omitNorms="true"/>
<fieldType name="sint" class="solr.SortableIntField" omitNorms="true"/>
<fieldType name="slong" class="solr.SortableLongField" omitNorms="true"/>
<fieldType name="string" class="solr.StrField" omitNorms="true"/>
<fieldType name="tdate" class="solr.TrieDateField" omitNorms="true"/>
<fieldType name="tdouble" class="solr.TrieDoubleField" omitNorms="true"/>
<fieldType name="tfloat" class="solr.TrieFloatField" omitNorms="true"/>
<fieldType name="tint" class="solr.TrieIntField" omitNorms="true"/>
<!-- Simple text field for Latin alphabet languages -->
<fieldType name="text" class="solr.TextField" omitNorms="false">
<analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.StandardFilterFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
</fieldType>
<!-- Simple text field for Japanese -->
<fieldType name="text_jp" class="solr.TextField" omitNorms="false">
<analyzer>
<!-- Japanese morphological analyzer/tokenizer -->
<tokenizer class="solr.JapaneseTokenizerFactory" mode="search" />
<!-- Reduces inflected verbs and adjectives to their base/dictionary forms -->
<filter class="solr.JapaneseBaseFormFilterFactory"/>
<!-- Removes tokens with certain part-of-speech tags -->
<!-- NOTE: lang/* files are not supported on websolr quite yet -nz 25 Apr 2012 -->
<!--<filter class="solr.JapanesePartOfSpeechStopFilterFactory" tags="lang/stopwords_ja.txt" enablePositionIncrements="true"/>-->
<!-- Normalize full-width romaji to half-width, and half-width kana to full-width -->
<filter class="solr.CJKWidthFilterFactory" />
<!-- Removes common tokens typically not useful for search, but which have a negative effect on ranking -->
<!-- NOTE: lang/* files are not supported on websolr quite yet -nz 25 Apr 2012 -->
<!--<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ja.txt" enablePositionIncrements="true" />-->
<!-- Normalizes common katakana spelling variations by removing any last long sound character -->
<filter class="solr.JapaneseKatakanaStemFilterFactory" minimumLength="4"/>
<!-- Lower-case any romaji characters, or english tokens -->
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
</fieldType>
</types>
<fields>
<!-- Document identifier: a unique string -->
<field name="id" type="string" indexed="true" stored="true" multiValued="false" />
<!-- Catch-all text fields -->
<field name="text" type="text" indexed="true" stored="false" multiValued="true" />
<field name="text_jp" type="text_jp" indexed="true" stored="false" multiValued="true" />
<!-- Basic English text fields -->
<dynamicField name="*_text" type="text" indexed="true" stored="false" multiValued="true" />
<dynamicField name="*_texts" type="text" indexed="true" stored="true" multiValued="true" />
<dynamicField name="*_textsv" type="text" indexed="true" stored="true" termVectors="true" multiValued="true" />
<dynamicField name="*_textv" type="text" indexed="true" stored="false" termVectors="true" multiValued="true" />
<!-- Basic Japanese text fields -->
<dynamicField name="*_text_jp" type="text_jp" indexed="true" stored="false" multiValued="true" />
<dynamicField name="*_text_jps" type="text_jp" indexed="true" stored="true" multiValued="true" />
<dynamicField name="*_text_jpsv" type="text_jp" indexed="true" stored="true" termVectors="true" multiValued="true" />
<dynamicField name="*_text_jpv" type="text_jp" indexed="true" stored="false" termVectors="true" multiValued="true" />
<!-- Boolean fields -->
<dynamicField name="*_b" type="boolean" indexed="true" stored="false" multiValued="false" />
<dynamicField name="*_bm" type="boolean" indexed="true" stored="false" multiValued="true" />
<dynamicField name="*_bms" type="boolean" indexed="true" stored="true" multiValued="true" />
<dynamicField name="*_bs" type="boolean" indexed="true" stored="true" multiValued="false" />
<!-- Date fields -->
<dynamicField name="*_d" type="date" indexed="true" stored="false" multiValued="false" />
<dynamicField name="*_dm" type="date" indexed="true" stored="false" multiValued="true" />
<dynamicField name="*_dms" type="date" indexed="true" stored="true" multiValued="true" />
<dynamicField name="*_ds" type="date" indexed="true" stored="true" multiValued="false" />
<!-- Trie date fields -->
<dynamicField name="*_dt" type="tdate" indexed="true" stored="false" multiValued="false" />
<dynamicField name="*_dtm" type="tdate" indexed="true" stored="false" multiValued="true" />
<dynamicField name="*_dtms" type="tdate" indexed="true" stored="true" multiValued="true" />
<dynamicField name="*_dts" type="tdate" indexed="true" stored="true" multiValued="false" />
<!-- Sortable double fields -->
<dynamicField name="*_e" type="sdouble" indexed="true" stored="false" multiValued="false" />
<dynamicField name="*_em" type="sdouble" indexed="true" stored="false" multiValued="true" />
<dynamicField name="*_ems" type="sdouble" indexed="true" stored="true" multiValued="true" />
<dynamicField name="*_es" type="sdouble" indexed="true" stored="true" multiValued="false" />
<!-- Trie double fields -->
<dynamicField name="*_et" type="tdouble" indexed="true" stored="false" termVectors="true" multiValued="false" />
<dynamicField name="*_etm" type="tdouble" indexed="true" stored="false" termVectors="true" multiValued="true" />
<dynamicField name="*_etms" type="tdouble" indexed="true" stored="true" termVectors="true" multiValued="true" />
<dynamicField name="*_ets" type="tdouble" indexed="true" stored="true" termVectors="true" multiValued="false" />
<!-- Sortable float fields -->
<dynamicField name="*_f" type="sfloat" indexed="true" stored="false" multiValued="false" />
<dynamicField name="*_fm" type="sfloat" indexed="true" stored="false" multiValued="true" />
<dynamicField name="*_fms" type="sfloat" indexed="true" stored="true" multiValued="true" />
<dynamicField name="*_fs" type="sfloat" indexed="true" stored="true" multiValued="false" />
<!-- Trie float fields -->
<dynamicField name="*_ft" type="tfloat" indexed="true" stored="false" multiValued="false" />
<dynamicField name="*_ftm" type="tfloat" indexed="true" stored="false" multiValued="true" />
<dynamicField name="*_ftms" type="tfloat" indexed="true" stored="true" multiValued="true" />
<dynamicField name="*_fts" type="tfloat" indexed="true" stored="true" multiValued="false" />
<!-- Sortable int fields -->
<dynamicField name="*_i" type="sint" indexed="true" stored="false" multiValued="false" />
<dynamicField name="*_im" type="sint" indexed="true" stored="false" multiValued="true" />
<dynamicField name="*_ims" type="sint" indexed="true" stored="true" multiValued="true" />
<dynamicField name="*_is" type="sint" indexed="true" stored="true" multiValued="false" />
<!-- Sortable trie fields -->
<dynamicField name="*_it" type="tint" indexed="true" stored="false" multiValued="false" />
<dynamicField name="*_itm" type="tint" indexed="true" stored="false" multiValued="true" />
<dynamicField name="*_itms" type="tint" indexed="true" stored="true" multiValued="true" />
<dynamicField name="*_its" type="tint" indexed="true" stored="true" multiValued="false" />
<!-- Sortable long fields -->
<dynamicField name="*_l" type="slong" indexed="true" stored="false" multiValued="false" />
<dynamicField name="*_lm" type="slong" indexed="true" stored="false" multiValued="true" />
<dynamicField name="*_lms" type="slong" indexed="true" stored="true" multiValued="true" />
<dynamicField name="*_ls" type="slong" indexed="true" stored="true" multiValued="false" />
<!-- String fields -->
<dynamicField name="*_s" type="string" indexed="true" stored="false" multiValued="false" />
<dynamicField name="*_sm" type="string" indexed="true" stored="false" multiValued="true" />
<dynamicField name="*_sms" type="string" indexed="true" stored="true" multiValued="true" />
<dynamicField name="*_ss" type="string" indexed="true" stored="true" multiValued="false" />
<!-- Random value fields -->
<field name="random" type="rand" indexed="true" stored="false" multiValued="false" />
<dynamicField name="random_*" type="rand" indexed="true" stored="false" multiValued="false" />
</fields>
<uniqueKey>id</uniqueKey>
<defaultSearchField>text</defaultSearchField>
<solrQueryParser defaultOperator="AND"/>
</schema>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment