Skip to content

Instantly share code, notes, and snippets.

@ctfliblime
Created May 31, 2012 21:01
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ctfliblime/2846252 to your computer and use it in GitHub Desktop.
Save ctfliblime/2846252 to your computer and use it in GitHub Desktop.
Solr index schema
<?xml version="1.0" ?>
<schema name="VuFind Bibliographic Index" version="1.2">
<types>
<fieldType name="string" class="solr.StrField" sortMissingLast="true" omitNorms="true"/>
<fieldType name="textFacet" class="solr.TextField" sortMissingLast="true" omitNorms="true">
<analyzer>
<tokenizer class="solr.KeywordTokenizerFactory"/>
<!-- strip trailing punctuation from facets: -->
<filter class="solr.PatternReplaceFilterFactory" pattern="(?&lt;!\b[A-Z])[.\s]*$" replacement="" replace="first"/>
</analyzer>
</fieldType>
<fieldType name="text" class="solr.TextField" positionIncrementGap="100">
<analyzer type="index">
<tokenizer class="solr.ICUTokenizerFactory"/>
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true"/>
<filter class="solr.ICUFoldingFilterFactory"/>
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
<filter class="solr.SnowballPorterFilterFactory" language="English"/>
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.ICUTokenizerFactory"/>
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true"/>
<filter class="solr.ICUFoldingFilterFactory"/>
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
<filter class="solr.SnowballPorterFilterFactory" language="English"/>
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
</analyzer>
</fieldType>
<!-- Text Field without Stemming and Synonyms -->
<fieldType name="textProper" class="solr.TextField" positionIncrementGap="100">
<analyzer type="index">
<tokenizer class="solr.ICUTokenizerFactory"/>
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
<filter class="solr.ICUFoldingFilterFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.ICUTokenizerFactory"/>
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0"/>
<filter class="solr.ICUFoldingFilterFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
</analyzer>
</fieldType>
<!-- Basic Text Field for use with Spell Correction -->
<fieldType name="textSpell" class="solr.TextField" positionIncrementGap="100">
<analyzer>
<tokenizer class="solr.ICUTokenizerFactory"/>
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0"/>
<filter class="solr.ICUFoldingFilterFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
</analyzer>
</fieldType>
<!-- More advanced spell checking field. -->
<fieldType name="textSpellShingle" class="solr.TextField" positionIncrementGap="100">
<analyzer type="index">
<tokenizer class="solr.ICUTokenizerFactory"/>
<filter class="solr.ICUFoldingFilterFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
<filter class="solr.ShingleFilterFactory" maxShingleSize="2" outputUnigrams="false"/>
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.ICUTokenizerFactory"/>
<filter class="solr.ICUFoldingFilterFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
<filter class="solr.ShingleFilterFactory" maxShingleSize="2" outputUnigrams="false"/>
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
</analyzer>
</fieldType>
<!-- Text Field for Normalized ISBN/ISSN Numbers - take first chunk of text
prior to whitespace, force to lowercase, strip non-ISBN/ISSN characters,
omit results that are empty after stripping. -->
<fieldType name="isn" class="solr.TextField" positionIncrementGap="100">
<analyzer>
<tokenizer class="solr.PatternTokenizerFactory" pattern="^(\S*)\s*.*$" group="1"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.PatternReplaceFilterFactory" pattern="[^0-9x]" replacement="" replace="all"/>
<filter class="solr.LengthFilterFactory" min="1" max="100" />
</analyzer>
</fieldType>
<fieldType name="date" class="solr.DateField" sortMissingLast="true" omitNorms="true"/>
</types>
<fields>
<!-- Core Fields -->
<field name="id" type="string" indexed="true" stored="true"/>
<field name="fullrecord" type="string" indexed="false" stored="true"/>
<field name="marc_error" type="string" indexed="false" stored="true" multiValued="true"/>
<field name="allfields" type="text" indexed="true" stored="false"/>
<field name="allfields_unstemmed" type="textProper" indexed="true" stored="false"/>
<field name="fulltext" type="text" indexed="true" stored="false"/>
<field name="fulltext_unstemmed" type="textProper" indexed="true" stored="false"/>
<field name="spelling" type="textSpell" indexed="true" stored="true"/>
<field name="spellingShingle" type="textSpellShingle" indexed="true" stored="true" multiValued="true"/>
<!-- Institutional Fields -->
<field name="institution" type="string" indexed="true" stored="true" multiValued="true"/>
<field name="collection" type="string" indexed="true" stored="true" multiValued="true"/>
<field name="building" type="string" indexed="true" stored="true" multiValued="true"/>
<!-- Generic Fields -->
<field name="language" type="string" indexed="true" stored="true" multiValued="true"/>
<field name="format" type="string" indexed="true" stored="true" multiValued="true"/>
<field name="author" type="textProper" indexed="true" stored="true" termVectors="true"/>
<field name="author-letter" type="string" indexed="true" stored="true"/>
<field name="authorStr" type="textFacet" indexed="true" stored="false"/>
<field name="title" type="text" indexed="true" stored="true"/>
<field name="title_sort" type="string" indexed="true" stored="true"/>
<field name="title_sub" type="text" indexed="true" stored="true"/>
<field name="title_short" type="text" indexed="true" stored="true"/>
<field name="title_full" type="text" indexed="true" stored="true"/>
<field name="title_full_unstemmed" type="textProper" indexed="true" stored="true"/>
<field name="title_fullStr" type="string" indexed="true" stored="true"/>
<field name="title_auth" type="text" indexed="true" stored="true"/>
<field name="physical" type="string" indexed="true" stored="true" multiValued="true"/>
<field name="publisher" type="textProper" indexed="true" stored="true" multiValued="true"/>
<field name="publisherStr" type="string" indexed="true" stored="false" multiValued="true"/>
<field name="publishDate" type="string" indexed="true" stored="true" multiValued="true"/>
<field name="publishDateSort" type="string" indexed="true" stored="false"/>
<field name="edition" type="string" indexed="true" stored="true"/>
<field name="description" type="text" indexed="true" stored="true"/>
<field name="contents" type="text" indexed="true" stored="true" multiValued="true"/>
<field name="url" type="string" indexed="false" stored="true" multiValued="true"/>
<field name="thumbnail" type="string" indexed="false" stored="true"/>
<!-- Catalog Specific Fields -->
<field name="lccn" type="string" indexed="true" stored="true"/>
<field name="ctrlnum" type="string" indexed="true" stored="true" multiValued="true"/>
<field name="isbn" type="isn" indexed="true" stored="true" multiValued="true"/>
<field name="issn" type="isn" indexed="true" stored="true" multiValued="true"/>
<field name="oclc_num" type="string" indexed="true" stored="true" multiValued="true" />
<field name="callnumber" type="string" indexed="true" stored="true"/>
<field name="callnumber-a" type="string" indexed="true" stored="true"/>
<field name="callnumber-first" type="string" indexed="true" stored="true"/>
<field name="callnumber-first-code" type="string" indexed="true" stored="true"/>
<field name="callnumber-subject" type="string" indexed="true" stored="true"/>
<field name="callnumber-subject-code" type="string" indexed="true" stored="true"/>
<field name="callnumber-label" type="string" indexed="true" stored="true"/>
<field name="dewey-hundreds" type="string" indexed="true" stored="true" multiValued="true"/>
<field name="dewey-tens" type="string" indexed="true" stored="true" multiValued="true"/>
<field name="dewey-ones" type="string" indexed="true" stored="true" multiValued="true"/>
<field name="dewey-full" type="string" indexed="true" stored="true" multiValued="true"/>
<field name="dewey-sort" type="string" indexed="true" stored="true" />
<field name="dewey-raw" type="string" indexed="true" stored="true" />
<field name="author2" type="textProper" indexed="true" stored="true" multiValued="true"/>
<field name="author2Str" type="string" indexed="true" stored="true" multiValued="true"/>
<field name="author2-role" type="string" indexed="true" stored="true" multiValued="true"/>
<field name="author_fuller" type="textProper" indexed="true" stored="true" />
<field name="author_additional" type="textProper" indexed="true" stored="true" multiValued="true"/>
<field name="author_additionalStr" type="string" indexed="true" stored="true" multiValued="true"/>
<field name="title_alt" type="text" indexed="true" stored="true" multiValued="true"/>
<field name="title_old" type="text" indexed="true" stored="true" multiValued="true"/>
<field name="title_new" type="text" indexed="true" stored="true" multiValued="true"/>
<field name="dateSpan" type="string" indexed="true" stored="true" multiValued="true"/>
<field name="series" type="text" indexed="true" stored="true" multiValued="true"/>
<field name="series2" type="text" indexed="true" stored="true" multiValued="true"/>
<field name="topic" type="text" indexed="true" stored="true" multiValued="true"/>
<field name="topic_unstemmed" type="textProper" indexed="true" stored="false" multiValued="true"/>
<field name="topic_facet" type="textFacet" indexed="true" stored="true" multiValued="true"/>
<field name="topic_browse" type="string" indexed="true" stored="false" multiValued="true"/>
<field name="author_browse" type="string" indexed="true" stored="false" multiValued="true"/>
<field name="genre" type="text" indexed="true" stored="true" multiValued="true"/>
<field name="genre_facet" type="textFacet" indexed="true" stored="true" multiValued="true"/>
<field name="geographic" type="text" indexed="true" stored="true" multiValued="true"/>
<field name="geographic_facet" type="textFacet" indexed="true" stored="true" multiValued="true"/>
<field name="era" type="text" indexed="true" stored="true" multiValued="true"/>
<field name="era_facet" type="textFacet" indexed="true" stored="true" multiValued="true"/>
<field name="illustrated" type="string" indexed="true" stored="true" multiValued="false"/>
<field name="long_lat" type="textFacet" indexed="true" stored="true" multiValued="false"/>
<!-- Used for loading correct record driver -->
<field name="recordtype" type="string" indexed="false" stored="true"/>
<!-- Tracking fields to keep track of oldest and most recent index times -->
<field name="first_indexed" type="date" indexed="true" stored="true"/>
<field name="last_indexed" type="date" indexed="true" stored="true"/>
<!-- Dynamic fields for customization without schema modification -->
<dynamicField name="*_date" type="date" indexed="true" stored="true"/>
<dynamicField name="*_date_mv" type="date" indexed="true" stored="true" multiValued="true"/>
<dynamicField name="*_isn" type="isn" indexed="true" stored="true"/>
<dynamicField name="*_isn_mv" type="isn" indexed="true" stored="true" multiValued="true"/>
<dynamicField name="*_str" type="string" indexed="true" stored="true"/>
<dynamicField name="*_str_mv" type="string" indexed="true" stored="true" multiValued="true"/>
<dynamicField name="*_txt" type="text" indexed="true" stored="true"/>
<dynamicField name="*_txt_mv" type="text" indexed="true" stored="true" multiValued="true"/>
<dynamicField name="*_txtF" type="textFacet" indexed="true" stored="true"/>
<dynamicField name="*_txtF_mv" type="textFacet" indexed="true" stored="true" multiValued="true"/>
<dynamicField name="*_txtP" type="textProper" indexed="true" stored="true"/>
<dynamicField name="*_txtP_mv" type="textProper" indexed="true" stored="true" multiValued="true"/>
</fields>
<uniqueKey>id</uniqueKey>
<defaultSearchField>allfields</defaultSearchField>
<!-- CopyFields for Spelling -->
<!-- ** Basic, single word spelling -->
<copyField source="allfields" dest="spelling"/>
<!-- ** Complex, Shingle spelling -->
<copyField source="author" dest="spellingShingle"/>
<copyField source="title" dest="spellingShingle"/>
<copyField source="contents" dest="spellingShingle"/>
<copyField source="series" dest="spellingShingle"/>
<copyField source="topic" dest="spellingShingle"/>
<!-- CopyFields for Faceting on Text -->
<copyField source="title_full" dest="title_fullStr"/>
<copyField source="title_full" dest="title_full_unstemmed"/>
<copyField source="author" dest="authorStr"/>
<copyField source="author2" dest="author2Str"/>
<copyField source="author_additional" dest="author_additionalStr"/>
<copyField source="publisher" dest="publisherStr"/>
<copyField source="topic" dest="topic_unstemmed"/>
<copyField source="allfields" dest="allfields_unstemmed"/>
<copyField source="fulltext" dest="fulltext_unstemmed"/>
<!-- CopyFields for Alphabetic Browse -->
<copyField source="topic" dest="topic_browse"/>
<copyField source="author" dest="author_browse"/>
<copyField source="author2" dest="author_browse"/>
<!-- Default Boolean Operator -->
<solrQueryParser defaultOperator="AND"/>
</schema>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment