Skip to content

Instantly share code, notes, and snippets.

@fontanka16
Created July 2, 2015 11:52
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save fontanka16/42ce366b45a206559573 to your computer and use it in GitHub Desktop.
Save fontanka16/42ce366b45a206559573 to your computer and use it in GitHub Desktop.
Stockholm University OPAC Solr Schema.xml
<?xml version="1.0" encoding="UTF-8" ?>
<schema name="example" version="1.5">
<fields>
<field name="_version_" type="long" indexed="true" stored="true"/>
<field name="abstract_display" type="string" indexed="false" stored="true" multiValued="false"/>
<field name="accessible_online" type="boolean" indexed="true" stored="true" />
<field name="allfields" type="text" indexed="true" multiValued="true" stored="false"/>
<field name="availability" type="string" indexed="true" stored="true" multiValued="true"/>
<field name="class_dewey_full" type="string" indexed="true" stored="true" multiValued="true"/>
<field name="class_kssb_full" type="string" indexed="true" stored="true" multiValued="true"/>
<field name="date_record_created" type="date" indexed="true" stored="true"/>
<field name="date_record_last_update" type="date" indexed="true" stored="true"/>
<field name="date_record_last_indexed" type="date" indexed="true" stored="true"/>
<field name="doctype_sv" type="string" indexed="true" stored="true" multiValued="true" />
<field name="doctype_en" type="string" indexed="true" stored="true" multiValued="true" />
<field name="edition_display" type="string" indexed="false" stored="true" multiValued="true"/>
<field name="error_strings" type="string" indexed="false" stored="true" multiValued="true"/>
<field name="fullrecord" type="string" indexed="false" stored="true"/>
<field name="fulltext" type="text" indexed="true" stored="false"/>
<field name="genre_marc_en" type="string" indexed="true" stored="true" multiValued="true"/>
<field name="genre_marc_sv" type="string" indexed="true" stored="true" multiValued="true"/>
<field name="holding_information_string" type="string" indexed="true" stored="true" multiValued="true"/>
<field name="hosting_publication_isbn_issn" type="isn" indexed="true" stored="true" multiValued="true"/>
<field name="id" type="string" indexed="true" stored="true"/>
<field name="isbn_issn" type="isn" indexed="true" stored="true" multiValued="true"/>
<field name="language_code" type="string" indexed="true" stored="true" multiValued="true"/>
<field name="language_string_sv" type="string" indexed="true" stored="true" multiValued="true"/>
<field name="library_name" type="string" indexed="true" stored="true" multiValued="true"/>
<field name="NoHoldingMessage" type="string" indexed="true" stored="true" multiValued="true" />
<field name="language_string_en" type="string" indexed="true" stored="true" multiValued="true"/>
<field name="note_display" type="string" indexed="false" stored="true" multiValued="true"/>
<field name="note_primary_display" type="string" indexed="false" stored="true" multiValued="true"/>
<field name="note_search" type="textProper" indexed="true" stored="false" multiValued="true"/>
<field name="physical_description_en" type="string" indexed="true" stored="true" multiValued="false"/>
<field name="physical_description_sv" type="string" indexed="true" stored="true" multiValued="false"/>
<field name="print_display" type="string" indexed="false" stored="true" multiValued="true"/>
<field name="publisher_display" type="string" indexed="false" stored="true" multiValued="true"/>
<!-- responsibility = upphov -->
<field name="responsibility_facet" type="textFacet" indexed="true" stored="false" multiValued="true"/>
<field name="responsibility_primary_display" type="string" indexed="false" stored="true" multiValued="true"/>
<field name="responsibility_search" type="textProper" indexed="true" stored="false" multiValued="true"/>
<field name="responsibility_secondary_display" type="string" indexed="false" stored="true" multiValued="true"/>
<field name="responsibility_secondary_search" type="textProper" indexed="true" stored="false" multiValued="true"/>
<field name="responsibility_secondary_facet" type="string" indexed="false" stored="false" multiValued="true"/>
<field name="responsibility_primary_facet" type="string" indexed="false" stored="false" multiValued="false"/>
<field name="sigel" type="string" indexed="true" stored="false" multiValued="true"/>
<field name="source" type="string" indexed="true" stored="true" multiValued="true"/>
<field name="spelling" type="textSpell" indexed="true" stored="true"/>
<field name="spellingShingle" type="textSpellShingle" indexed="true" stored="true" multiValued="true"/>
<field name="subject_area_su_sv" type="textFacet" indexed="true" stored="true" multiValued="true"/>
<field name="subject_area_su_en" type="textFacet" indexed="true" stored="true" multiValued="true"/>
<field name="subject_term_search" type="textProper" indexed="true" stored="false" multiValued="true"/>
<field name="subject_term_display" type="string" indexed="false" stored="true" multiValued="true"/>
<field name="subject_term_facet" type="textFacet" indexed="true" stored="false" multiValued="true"/>
<field name="subject_term_lcsh_search" type="textProper" indexed="true" stored="false" multiValued="true"/>
<field name="subject_term_lcsh_display" type="string" indexed="false" stored="true" multiValued="true"/>
<field name="subject_term_lcsh_facet" type="textFacet" indexed="true" stored="false" multiValued="true"/>
<field name="subject_term_sab_search" type="textProper" indexed="true" stored="false" multiValued="true"/>
<field name="subject_term_sab_display" type="string" indexed="false" stored="true" multiValued="true"/>
<field name="subject_term_sab_facet" type="textFacet" indexed="true" stored="false" multiValued="true"/>
<field name="titles_alternative_search" type="textProper" indexed="true" stored="false" multiValued="true"/>
<field name="titles_alternative_display" type="textProper" indexed="false" stored="true" multiValued="true"/>
<field name="title_main_search" type="textProper" indexed="true" stored="false" multiValued="false"/>
<field name="title_main_display" type="string" indexed="false" stored="true" multiValued="false"/>
<!-- för alternativ, se http://lucene.472066.n3.nabble.com/Faceting-and-first-letter-of-fields-td1703254.html eller http://www.packtpub.com/article/faceting-in-solr-1.4-enterprise-search-server -->
<field name="title_main__facetLetter" type="bucketFirstLetter" stored="true" />
<field name="title_main_sort" type="textSort" indexed="true" stored="true" multiValued="false"/>
<field name="title_hosting_publication_display" type="string" indexed="false" stored="true" multiValued="true"/>
<field name="title_hosting_publication_search" type="textProper" indexed="true" stored="false" multiValued="true"/>
<field name="title_hosting_publication_facet" type="textFacet" indexed="true" stored="false" multiValued="true"/>
<field name="type_of_resource_en" type="string" indexed="true" stored="true" multiValued="false"/>
<field name="type_of_resource_sv" type="string" indexed="true" stored="true" multiValued="false"/>
<field name="url_primary" type="string" indexed="false" stored="true" multiValued="true"/>
<field name="url_primary_title" type="string" indexed="false" stored="true" multiValued="false"/>
<field name="url_openurl" type="string" indexed="false" stored="true" multiValued="true"/>
<field name="verde_id" type="string" indexed="false" stored="true" multiValued="false"/>
<field name="website_category" type="string" indexed="true" stored="true"/>
<field name="url_libris" type="string" indexed="false" stored="true" multiValued="true"/>
<field name="year_primary" type="string" indexed="true" stored="true"/>
<field name="year_facet" type="sint" indexed="true" stored="true"/>
<field name="pages" type="sint" indexed="true" stored="true"/>
</fields>
<uniqueKey>id</uniqueKey>
<copyField source="title_main_display" dest="spelling" />
<copyField source="genre_marc_sv" dest="spellingShingle" />
<copyField source="titles_alternative_search" dest="spellingShingle" />
<copyField source="title_main_search" dest="titles_alternative_search" />
<copyField source="titles_alternative_search" dest="titles_alternative_display" />
<copyField source="responsibility_secondary_search" dest="spellingShingle" />
<copyField source="responsibility_secondary_search" dest="responsibility_search" />
<copyField source="responsibility_search" dest="spellingShingle" />
<copyField source="title_main_display" dest="title_main_sort" />
<copyField source="title_main_display" dest="title_main__facetLetter" />
<copyField source="responsibility_primary_facet" dest="responsibility_facet" />
<copyField source="responsibility_secondary_facet" dest="responsibility_facet" />
<copyField source="responsibility_secondary_facet" dest="responsibility_secondary_display" />
<!-- Kopiera specifika ämnesord till det generiska fältet -->
<copyField source="subject_term_lcsh_display" dest="subject_term_display" />
<copyField source="subject_term_sab_display" dest="subject_term_display" />
<copyField source="subject_term_sab_display" dest="subject_term_sab_facet" />
<copyField source="subject_term_sab_display" dest="spellingShingle" />
<copyField source="subject_term_sab_display" dest="subject_term_sab_search" />
<copyField source="subject_term_lcsh_display" dest="subject_term_lcsh_facet" />
<copyField source="subject_term_lcsh_display" dest="spellingShingle" />
<copyField source="subject_term_lcsh_display" dest="subject_term_lcsh_search" />
<copyField source="subject_term_display" dest="subject_term_facet" />
<copyField source="subject_term_display" dest="subject_term_search" />
<copyField source="subject_term_display" dest="spellingShingle" />
<copyField source="title_hosting_publication_display" dest="title_hosting_publication_search" />
<copyField source="title_hosting_publication_display" dest="title_hosting_publication_facet" />
<copyField source="responsibility_primary_facet" dest="responsibility_primary_display" />
<copyField source="abstract_display" dest="allfields" />
<copyField source="class_dewey_full" dest="allfields" />
<copyField source="class_kssb_full" dest="allfields" />
<copyField source="doctype_sv" dest="allfields" />
<copyField source="doctype_en" dest="allfields" />
<copyField source="edition_display" dest="allfields" />
<copyField source="fulltext" dest="allfields" />
<copyField source="genre_marc_en" dest="allfields" />
<copyField source="genre_marc_sv" dest="allfields" />
<copyField source="holding_information_string" dest="allfields" />
<copyField source="hosting_publication_isbn_issn" dest="allfields" />
<copyField source="isbn_issn" dest="allfields" />
<copyField source="language_code" dest="allfields" />
<copyField source="library_name" dest="allfields" />
<copyField source="NoHoldingMessage" dest="allfields" />
<copyField source="note_display" dest="allfields" />
<copyField source="note_primary_display" dest="allfields" />
<copyField source="physical_description_en" dest="allfields" />
<copyField source="physical_description_sv" dest="allfields" />
<copyField source="print_display" dest="allfields" />
<copyField source="publisher_display" dest="allfields" />
<copyField source="responsibility_facet" dest="allfields" />
<copyField source="sigel" dest="allfields" />
<copyField source="subject_area_su_sv" dest="allfields" />
<copyField source="subject_area_su_en" dest="allfields" />
<copyField source="subject_term_display" dest="allfields" />
<copyField source="subject_term_lcsh_display" dest="allfields" />
<copyField source="subject_term_sab_display" dest="allfields" />
<copyField source="titles_alternative_display" dest="allfields" />
<copyField source="title_main_display" dest="allfields" />
<copyField source="title_hosting_publication_display" dest="allfields" />
<copyField source="url_primary" dest="allfields" />
<copyField source="url_primary_title" dest="allfields" />
<copyField source="url_openurl" dest="allfields" />
<copyField source="website_category" dest="allfields" />
<copyField source="year_primary" dest="allfields" />
<copyField source="pages" dest="allfields" />
<types>
<fieldType name="string" class="solr.StrField" sortMissingLast="true" />
<!-- boolean type: "true" or "false" -->
<fieldType name="boolean" class="solr.BoolField" sortMissingLast="true"/>
<fieldType name="int" class="solr.TrieIntField" precisionStep="0" positionIncrementGap="0"/>
<fieldType name="float" class="solr.TrieFloatField" precisionStep="0" positionIncrementGap="0"/>
<fieldType name="long" class="solr.TrieLongField" precisionStep="0" positionIncrementGap="0"/>
<fieldType name="double" class="solr.TrieDoubleField" precisionStep="0" positionIncrementGap="0"/>
<fieldType name="tint" class="solr.TrieIntField" precisionStep="8" positionIncrementGap="0"/>
<fieldType name="tfloat" class="solr.TrieFloatField" precisionStep="8" positionIncrementGap="0"/>
<fieldType name="tlong" class="solr.TrieLongField" precisionStep="8" positionIncrementGap="0"/>
<fieldType name="tdouble" class="solr.TrieDoubleField" precisionStep="8" positionIncrementGap="0"/>
<fieldType name="date" class="solr.TrieDateField" precisionStep="0" positionIncrementGap="0"/>
<fieldType name="tdate" class="solr.TrieDateField" precisionStep="6" positionIncrementGap="0"/>
<fieldtype name="binary" class="solr.BinaryField"/>
<fieldType name="pint" class="solr.IntField"/>
<fieldType name="plong" class="solr.LongField"/>
<fieldType name="pfloat" class="solr.FloatField"/>
<fieldType name="pdouble" class="solr.DoubleField"/>
<fieldType name="pdate" class="solr.DateField" sortMissingLast="true"/>
<fieldType name="random" class="solr.RandomSortField" indexed="true" />
<fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100">
<analyzer>
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
</analyzer>
</fieldType>
<fieldType name="text_general" class="solr.TextField" positionIncrementGap="100">
<analyzer type="index">
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
</fieldType>
<fieldType name="text_en" class="solr.TextField" positionIncrementGap="100">
<analyzer type="index">
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.StopFilterFactory"
ignoreCase="true"
words="lang/stopwords_en.txt"
/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.EnglishPossessiveFilterFactory"/>
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
<filter class="solr.PorterStemFilterFactory"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
<filter class="solr.StopFilterFactory"
ignoreCase="true"
words="lang/stopwords_en.txt"
/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.EnglishPossessiveFilterFactory"/>
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
<filter class="solr.PorterStemFilterFactory"/>
</analyzer>
</fieldType>
<fieldType name="text_en_splitting" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
<analyzer type="index">
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.StopFilterFactory"
ignoreCase="true"
words="lang/stopwords_en.txt"
/>
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
<filter class="solr.PorterStemFilterFactory"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
<filter class="solr.StopFilterFactory"
ignoreCase="true"
words="lang/stopwords_en.txt"
/>
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
<filter class="solr.PorterStemFilterFactory"/>
</analyzer>
</fieldType>
<!-- Less flexible matching, but less false matches. Probably not ideal for product names,
but may be good for SKUs. Can insert dashes in the wrong place and still match. -->
<fieldType name="text_en_splitting_tight" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
<analyzer>
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_en.txt"/>
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
<filter class="solr.EnglishMinimalStemFilterFactory"/>
<!-- this filter can remove any duplicate tokens that appear at the same position - sometimes
possible with WordDelimiterFilter in conjuncton with stemming. -->
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
</analyzer>
</fieldType>
<!-- Just like text_general except it reverses the characters of
each token, to enable more efficient leading wildcard queries. -->
<fieldType name="text_general_rev" class="solr.TextField" positionIncrementGap="100">
<analyzer type="index">
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.ReversedWildcardFilterFactory" withOriginal="true"
maxPosAsterisk="3" maxPosQuestion="2" maxFractionAsterisk="0.33"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
</fieldType>
<fieldType name="alphaOnlySort" class="solr.TextField" sortMissingLast="true" omitNorms="true">
<analyzer>
<tokenizer class="solr.KeywordTokenizerFactory"/>
-->
<filter class="solr.LowerCaseFilterFactory" />
<filter class="solr.TrimFilterFactory" />
<filter class="solr.PatternReplaceFilterFactory"
pattern="([^a-z])" replacement="" replace="all"
/>
</analyzer>
</fieldType>
<fieldType name="lowercase" class="solr.TextField" positionIncrementGap="100">
<analyzer>
<tokenizer class="solr.KeywordTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory" />
</analyzer>
</fieldType>
<fieldType name="descendent_path" class="solr.TextField">
<analyzer type="index">
<tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/" />
</analyzer>
<analyzer type="query">
<tokenizer class="solr.KeywordTokenizerFactory" />
</analyzer>
</fieldType>
<fieldType name="ancestor_path" class="solr.TextField">
<analyzer type="index">
<tokenizer class="solr.KeywordTokenizerFactory" />
</analyzer>
<analyzer type="query">
<tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/" />
</analyzer>
</fieldType>
<fieldtype name="ignored" stored="false" indexed="false" multiValued="true" class="solr.StrField" />
<fieldType name="point" class="solr.PointType" dimension="2" subFieldSuffix="_d"/>
<fieldType name="location" class="solr.LatLonType" subFieldSuffix="_coordinate"/>
<fieldType name="location_rpt" class="solr.SpatialRecursivePrefixTreeFieldType"
geo="true" distErrPct="0.025" maxDistErr="0.000009" units="degrees" />
<fieldType name="currency" class="solr.CurrencyField" precisionStep="8" defaultCurrency="USD" currencyConfig="currency.xml" />
<!-- Swedish -->
<fieldType name="text_sv" class="solr.TextField" positionIncrementGap="100">
<analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_sv.txt" format="snowball" />
<filter class="solr.SnowballPorterFilterFactory" language="Swedish"/>
<!-- less aggressive: <filter class="solr.SwedishLightStemFilterFactory"/> -->
</analyzer>
</fieldType>
<fieldType name="bucketFirstLetter" class="solr.TextField" sortMissingLast="true" omitNorms="true">
<analyzer type="index">
<tokenizer class="solr.PatternTokenizerFactory" pattern="^([a-zA-Z]).*" group="1" />
<filter class="solr.SynonymFilterFactory" synonyms="mb_letterBuckets.txt" ignoreCase="true" expand="false" />
</analyzer>
<analyzer type="query">
<tokenizer class="solr.KeywordTokenizerFactory"/>
</analyzer>
</fieldType>
<fieldType name="textSort" class="solr.TextField" sortMissingLast="true" omitNorms="true">
<analyzer>
<tokenizer class="solr.KeywordTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
</fieldType>
<fieldType name="textFacet" class="solr.TextField" sortMissingLast="true" omitNorms="true">
<analyzer>
<tokenizer class="solr.KeywordTokenizerFactory"/>
<!-- strip trailing punctuation from facets: -->
<filter class="solr.PatternReplaceFilterFactory" pattern="(?&lt;!\b[A-Z])[.\s]*$" replacement="" replace="first"/>
</analyzer>
</fieldType>
<fieldType name="text" class="solr.TextField" positionIncrementGap="100">
<analyzer type="index">
<charFilter class="solr.MappingCharFilterFactory" mapping="mapping-FoldToASCII.txt"/>
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.WordDelimiterFilterFactory" preserveOriginal="1" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="0" stemEnglishPossessive="1"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true"/>
<filter class="solr.LowerCaseFilterFactory"/>
<!--<filter class="solr.SnowballPorterFilterFactory" language="English" protected="protwords.txt"/>-->
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
<!--<filter class="solr.ISOLatin1AccentFilterFactory"/>-->
</analyzer>
<analyzer type="query">
<charFilter class="solr.MappingCharFilterFactory" mapping="mapping-FoldToASCII.txt"/>
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.PatternReplaceFilterFactory" pattern="\[elektronisk resurs\]|/|:|-" replacement="" />
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
<filter class="solr.WordDelimiterFilterFactory" preserveOriginal="1" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="0" stemEnglishPossessive="1"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
<!--<filter class="solr.ISOLatin1AccentFilterFactory"/>-->
</analyzer>
</fieldType>
<!-- Text Field without Stemming and Synonyms -->
<fieldType name="textProper" class="solr.TextField" positionIncrementGap="100">
<analyzer type="index">
<charFilter class="solr.MappingCharFilterFactory" mapping="mapping-FoldToASCII.txt"/>
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<!-- <filter class="solr.ISOLatin1AccentFilterFactory"/>-->
<filter class="solr.WordDelimiterFilterFactory" preserveOriginal="1" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" stemEnglishPossessive="1"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
</analyzer>
<analyzer type="query">
<charFilter class="solr.MappingCharFilterFactory" mapping="mapping-FoldToASCII.txt"/>
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.PatternReplaceFilterFactory" pattern="\[elektronisk resurs\]|/|:|-" replacement="" />
<!-- <filter class="solr.ISOLatin1AccentFilterFactory"/>-->
<filter class="solr.WordDelimiterFilterFactory" preserveOriginal="1" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" stemEnglishPossessive="1"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
</analyzer>
</fieldType>
<!-- Basic Text Field for use with Spell Correction -->
<fieldType name="textSpell" class="solr.TextField" positionIncrementGap="100">
<analyzer>
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<!--<filter class="solr.ISOLatin1AccentFilterFactory"/>-->
<filter class="solr.WordDelimiterFilterFactory" preserveOriginal="1" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" stemEnglishPossessive="1"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwordsSpell.txt"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
</analyzer>
</fieldType>
<!-- More advanced spell checking field. -->
<fieldType name="textSpellShingle" class="solr.TextField" positionIncrementGap="100">
<analyzer type="index">
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.PatternReplaceFilterFactory" pattern="\[elektronisk resurs\]|/|:|-" replacement="" />
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwordsSpell.txt"/>
<filter class="solr.ShingleFilterFactory" maxShingleSize="2" outputUnigrams="false"/>
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.PatternReplaceFilterFactory" pattern="\[elektronisk resurs\]|/|:|-" replacement="" />
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
<filter class="solr.ShingleFilterFactory" maxShingleSize="2" outputUnigrams="false"/>
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
</analyzer>
</fieldType>
<fieldType name="sint" class="solr.SortableIntField" sortMissingLast="true" omitNorms="true"/>
<!-- Text Field for Normalized ISBN/ISSN Numbers - take first chunk of text
prior to whitespace, force to lowercase, strip non-ISBN/ISSN characters,
omit results that are empty after stripping. -->
<fieldType name="isn" class="solr.TextField" positionIncrementGap="100">
<analyzer>
<tokenizer class="solr.PatternTokenizerFactory" pattern="^(\S*)\s*.*$" group="1"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.PatternReplaceFilterFactory" pattern="[^0-9x]" replacement="" replace="all"/>
<filter class="solr.LengthFilterFactory" min="1" max="100" />
</analyzer>
</fieldType>
</types>
</schema>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment