| /schema/types/comment() | |
| SourceOptions | |
| XPath1/ParseXSL 1.0 | |
| <!-- attribute "name" is the name of this schema and is only used for display purposes. | |
| Applications should change this to reflect the nature of the search collection. | |
| version="x.y" is Solr's version number for the schema syntax and semantics. It should | |
| not normally be changed by applications. | |
| 1.3: removed optional field compress feature | |
| 1.4: default auto-phrase (QueryParser feature) to off | |
| 1.5: omitNorms defaults to true for primitive field types (int, float, boolean, string...) | |
| --> | |
| <schema name="plos" version="1.5"> | |
| <types> | |
| <!-- omitNorms defaults to "true" for primitive field types --> | |
| <fieldType name="string" class="solr.StrField" sortMissingLast="true"/> | |
| <!-- | |
| Default numeric field types. For faster range queries, consider the tint/tfloat/tlong/tdouble types. | |
| --> | |
| <fieldType name="int" class="solr.TrieIntField" precisionStep="0" positionIncrementGap="0"/> | |
| <fieldType name="date" class="solr.TrieDateField" precisionStep="0" positionIncrementGap="0"/> | |
| <!-- A Trie based date field for faster date range queries and date faceting. --> | |
| <fieldType name="tdate" class="solr.TrieDateField" precisionStep="6" positionIncrementGap="0"/> | |
| <!-- A text field that uses WordDelimiterFilter to enable splitting and matching of | |
| words on case-change, alpha numeric boundaries, and non-alphanumeric chars, | |
| so that a query of "wifi" or "wi fi" could match a document containing "Wi-Fi". | |
| Synonyms and stopwords are customized by external files, and stemming is enabled. | |
| --> | |
| <fieldType name="text" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true"> | |
| <analyzer type="index"> | |
| <tokenizer class="solr.WhitespaceTokenizerFactory"/> | |
| <filter class="solr.LowerCaseFilterFactory"/> | |
| <filter class="solr.ASCIIFoldingFilterFactory"/> | |
| <filter class="solr.LengthFilterFactory" min="3" max="100"/> | |
| <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true"/> | |
| <filter class="solr.WordDelimiterFilterFactory" stemEnglishPossessive="1" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnNumerics="0" splitOnCaseChange="0"/> | |
| <!-- Disabeling position filter. Note JO: 16743 | |
| filter class="solr.PositionFilterFactory" /--> | |
| <filter class="solr.TrimFilterFactory"/> | |
| <!-- EnglishPorterFilterFactory class is deprecated, replacing it with PorterStemFilterFactory --> | |
| <filter class="solr.PorterStemFilterFactory" protected="protwords.txt"/> | |
| </analyzer> | |
| <analyzer type="query"> | |
| <tokenizer class="solr.WhitespaceTokenizerFactory"/> | |
| <filter class="solr.LowerCaseFilterFactory"/> | |
| <filter class="solr.ASCIIFoldingFilterFactory"/> | |
| <filter class="solr.LengthFilterFactory" min="3" max="100"/> | |
| <!-- Disabling synonym filter, we may want to turn this back on eventually --> | |
| <!---filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/--> | |
| <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true"/> | |
| <filter class="solr.WordDelimiterFilterFactory" stemEnglishPossessive="1" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnNumerics="0" splitOnCaseChange="0"/> | |
| <!-- Disabeling position filter. Note JO: 16743 | |
| filter class="solr.PositionFilterFactory" /--> | |
| <filter class="solr.TrimFilterFactory"/> | |
| <!-- EnglishPorterFilterFactory class is deprecated, replacing it with PorterStemFilterFactory --> | |
| <filter class="solr.PorterStemFilterFactory" protected="protwords.txt"/> | |
| </analyzer> | |
| </fieldType> | |
| <!-- | |
| A text field that has little processing on it to use for term and spelling suggestions | |
| --> | |
| <fieldType name="text_noprocess" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true"> | |
| <analyzer type="index"> | |
| <tokenizer class="solr.WhitespaceTokenizerFactory"/> | |
| <filter class="solr.LowerCaseFilterFactory"/> | |
| <filter class="solr.LengthFilterFactory" min="3" max="100"/> | |
| <filter class="solr.WordDelimiterFilterFactory" stemEnglishPossessive="1" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnNumerics="1" splitOnCaseChange="0"/> | |
| <filter class="solr.TrimFilterFactory"/> | |
| </analyzer> | |
| <analyzer type="query"> | |
| <tokenizer class="solr.WhitespaceTokenizerFactory"/> | |
| <filter class="solr.LowerCaseFilterFactory"/> | |
| <filter class="solr.LengthFilterFactory" min="3" max="100"/> | |
| <filter class="solr.WordDelimiterFilterFactory" stemEnglishPossessive="1" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnNumerics="1" splitOnCaseChange="0"/> | |
| <filter class="solr.TrimFilterFactory"/> | |
| </analyzer> | |
| </fieldType> | |
| <!-- | |
| Lowercasing terms for case insensitive searching | |
| --> | |
| <fieldType name="keyword_lowercase" class="solr.TextField" positionIncrementGap="100"> | |
| <analyzer type="index"> | |
| <tokenizer class="solr.KeywordTokenizerFactory"/> | |
| <filter class="solr.LowerCaseFilterFactory"/> | |
| <filter class="solr.TrimFilterFactory"/> | |
| </analyzer> | |
| <analyzer type="query"> | |
| <tokenizer class="solr.KeywordTokenizerFactory"/> | |
| <filter class="solr.LowerCaseFilterFactory"/> | |
| <filter class="solr.TrimFilterFactory"/> | |
| </analyzer> | |
| </fieldType> | |
| <!-- A text field specialized for exact proper name search. --> | |
| <fieldType name="text_name" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true"> | |
| <analyzer type="index"> | |
| <tokenizer class="solr.WhitespaceTokenizerFactory"/> | |
| <filter class="solr.LowerCaseFilterFactory"/> | |
| <filter class="solr.ASCIIFoldingFilterFactory"/> | |
| <filter class="solr.WordDelimiterFilterFactory" preserveOriginal="1" splitOnCaseChange="0"/> | |
| <filter class="solr.StopFilterFactory" ignoreCase="true" words="author_stopwords.txt" enablePositionIncrements="true"/> | |
| </analyzer> | |
| <analyzer type="query"> | |
| <tokenizer class="solr.WhitespaceTokenizerFactory"/> | |
| <filter class="solr.LowerCaseFilterFactory"/> | |
| <filter class="solr.ASCIIFoldingFilterFactory"/> | |
| <filter class="solr.WordDelimiterFilterFactory" preserveOriginal="1" splitOnCaseChange="0"/> | |
| <filter class="solr.StopFilterFactory" ignoreCase="true" words="author_stopwords.txt" enablePositionIncrements="true"/> | |
| </analyzer> | |
| </fieldType> | |
| <!-- A general unstemmed text field that indexes tokens normally and also | |
| reversed (via ReversedWildcardFilterFactory), to enable more efficient | |
| leading wildcard queries. --> | |
| <!-- "autoGeneratePhraseQueries" that defaults to "false" in 3.x. | |
| In earlier versions, autoGeneratePhraseQueries was defaulted to 'True" | |
| So, it is required to set this value explicitly in order for WhitespaceTokenizerFactory to work properly. --> | |
| <fieldType name="text_rev" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true"> | |
| <analyzer type="index"> | |
| <tokenizer class="solr.WhitespaceTokenizerFactory"/> | |
| <filter class="solr.LowerCaseFilterFactory"/> | |
| <filter class="solr.ASCIIFoldingFilterFactory"/> | |
| <filter class="solr.LengthFilterFactory" min="3" max="25"/> | |
| <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true"/> | |
| <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="0"/> | |
| <filter class="solr.ReversedWildcardFilterFactory" withOriginal="true" maxPosAsterisk="3" maxPosQuestion="2" maxFractionAsterisk="0.33"/> | |
| </analyzer> | |
| <analyzer type="query"> | |
| <tokenizer class="solr.WhitespaceTokenizerFactory"/> | |
| <filter class="solr.LowerCaseFilterFactory"/> | |
| <filter class="solr.ASCIIFoldingFilterFactory"/> | |
| <filter class="solr.LengthFilterFactory" min="3" max="25"/> | |
| <!-- Disabling synonym filter, we'll want to turn this back on --> | |
| <!--filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/--> | |
| <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true"/> | |
| <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="0"/> | |
| </analyzer> | |
| </fieldType> | |
| <!-- | |
| Split text on the '/' character. Subjects come in with a defined hierarchy | |
| This removes it. | |
| --> | |
| <fieldType name="text_splitOnSlashLowercase" class="solr.TextField" positionIncrementGap="100"> | |
| <analyzer type="index"> | |
| <tokenizer class="solr.PatternTokenizerFactory" pattern="//*"/> | |
| <filter class="solr.TrimFilterFactory"/> | |
| <filter class="solr.LowerCaseFilterFactory"/> | |
| <filter class="solr.LengthFilterFactory" min="2" max="5000"/> | |
| </analyzer> | |
| <analyzer type="query"> | |
| <tokenizer class="solr.PatternTokenizerFactory" pattern="//*"/> | |
| <filter class="solr.TrimFilterFactory"/> | |
| <filter class="solr.LowerCaseFilterFactory"/> | |
| <filter class="solr.LengthFilterFactory" min="2" max="5000"/> | |
| <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false" tokenizerFactory="solr.KeywordTokenizerFactory"/> | |
| </analyzer> | |
| </fieldType> | |
| <!-- | |
| Split text on the '/' character. Subjects come in with a defined hierarchy | |
| This removes it. | |
| --> | |
| <fieldType name="text_splitOnSlash" class="solr.TextField" positionIncrementGap="100"> | |
| <analyzer type="index"> | |
| <tokenizer class="solr.PatternTokenizerFactory" pattern="//*"/> | |
| <filter class="solr.TrimFilterFactory"/> | |
| <filter class="solr.LengthFilterFactory" min="2" max="5000"/> | |
| </analyzer> | |
| <analyzer type="query"> | |
| <tokenizer class="solr.PatternTokenizerFactory" pattern="//*"/> | |
| <filter class="solr.TrimFilterFactory"/> | |
| <filter class="solr.LengthFilterFactory" min="2" max="5000"/> | |
| </analyzer> | |
| </fieldType> | |
| <!-- since fields of this type are by default not stored or indexed, | |
| any data added to them will be ignored outright. --> | |
| <fieldtype name="ignored" stored="false" indexed="false" multiValued="true" class="solr.StrField"/> | |
| <fieldtype name="trigram" stored="false" indexed="true" multiValued="true" class="solr.TextField" positionIncrementGap="100"> | |
| <analyzer> | |
| <tokenizer class="solr.StandardTokenizerFactory"/> | |
| <filter class="solr.LowerCaseFilterFactory"/> | |
| <filter class="solr.ASCIIFoldingFilterFactory"/> | |
| <filter class="solr.NGramFilterFactory" minGramSize="3" maxGramSize="20"/> | |
| </analyzer> | |
| <analyzer type="query"> | |
| <tokenizer class="solr.StandardTokenizerFactory"/> | |
| <filter class="solr.LowerCaseFilterFactory"/> | |
| <filter class="solr.ASCIIFoldingFilterFactory"/> | |
| <filter class="solr.NGramFilterFactory" minGramSize="3" maxGramSize="20"/> | |
| </analyzer> | |
| </fieldtype> | |
| <fieldtype name="text_subject_search" class="solr.TextField"> | |
| <analyzer> | |
| <tokenizer class="solr.KeywordTokenizerFactory"/> | |
| <filter class="solr.LowerCaseFilterFactory"/> | |
| <filter class="solr.EdgeNGramFilterFactory" maxGramSize="30"/> | |
| </analyzer> | |
| </fieldtype> | |
| </types> | |
| <fields> | |
| <field name="id" type="string" indexed="true" stored="true" required="true"/> | |
| <field name="doi" type="trigram" indexed="true" stored="true"/> | |
| <field name="pmid" type="string" indexed="true" stored="true"/> | |
| <field name="pmcid" type="string" indexed="true" stored="true"/> | |
| <field name="eissn" type="string" indexed="true" stored="true" multiValued="false"/> | |
| <field name="pissn" type="string" indexed="true" stored="true" multiValued="false"/> | |
| <field name="title" type="text" indexed="true" stored="true" multiValued="false"/> | |
| <field name="title_display" type="string" indexed="false" stored="true" multiValued="false"/> | |
| <field name="title_ngram" type="trigram" indexed="true" stored="false" multiValued="false"/> | |
| <field name="alternate_title" type="text" indexed="true" stored="true" multiValued="true"/> | |
| <field name="subject" type="text_splitOnSlashLowercase" indexed="true" stored="true" multiValued="true"/> | |
| <field name="subject_facet" type="text_splitOnSlash" indexed="true" stored="true" multiValued="true"/> | |
| <!-- Also store the subject to preserve the hierarchy if needed later --> | |
| <field name="subject_hierarchy" type="string" indexed="true" stored="true" multiValued="true"/> | |
| <field name="subject_level_1" type="string" indexed="true" stored="true" multiValued="true"/> | |
| <field name="pagecount" type="int" indexed="true" stored="true"/> | |
| <field name="volume" type="int" indexed="true" stored="true"/> | |
| <field name="issue" type="int" indexed="true" stored="true"/> | |
| <field name="elocation_id" type="keyword_lowercase" indexed="true" stored="true"/> | |
| <field name="publisher" type="text_name" indexed="true" stored="true"/> | |
| <field name="journal" type="keyword_lowercase" indexed="true" stored="true"/> | |
| <field name="journal_name" type="string" indexed="true" stored="true"/> | |
| <field name="journal_key" type="string" indexed="true" stored="true"/> | |
| <field name="journal_eissn" type="string" indexed="true" stored="true"/> | |
| <field name="journal_id_pmc" type="string" indexed="true" stored="true"/> | |
| <field name="journal_id_publisher" type="string" indexed="true" stored="true"/> | |
| <field name="journal_id_nlm_ta" type="string" indexed="true" stored="true"/> | |
| <field name="publication_date" type="tdate" indexed="true" stored="true"/> | |
| <field name="received_date" type="tdate" indexed="true" stored="true"/> | |
| <field name="accepted_date" type="tdate" indexed="true" stored="true"/> | |
| <field name="abstract" type="text" indexed="true" stored="true" multiValued="true"/> | |
| <field name="abstract_ngram" type="trigram" indexed="true" stored="false" multiValued="true"/> | |
| <field name="abstract_primary_display" type="string" indexed="false" stored="true" multiValued="true"/> | |
| <field name="author" type="text_name" indexed="true" stored="true" multiValued="true"/> | |
| <field name="author_facet" type="string" indexed="true" stored="true" multiValued="true"/> | |
| <field name="author_display" type="string" indexed="false" stored="true" multiValued="true"/> | |
| <field name="editor" type="text_name" indexed="true" stored="true" multiValued="true"/> | |
| <field name="editor_facet" type="string" indexed="true" stored="true" multiValued="true"/> | |
| <field name="editor_display" type="string" indexed="false" stored="true" multiValued="true"/> | |
| <field name="author_without_collab_display" type="string" indexed="false" stored="true" multiValued="true"/> | |
| <field name="author_collab_only_display" type="string" indexed="false" stored="true" multiValued="true"/> | |
| <!--The two following fields are copyied into an affiliates field --> | |
| <field name="author_affiliate" type="text" indexed="true" stored="true" multiValued="true"/> | |
| <field name="editor_affiliate" type="text" indexed="true" stored="true" multiValued="true"/> | |
| <field name="affiliate" type="text" indexed="true" stored="true" multiValued="true"/> | |
| <field name="affiliate_facet" type="string" indexed="true" stored="true" multiValued="true"/> | |
| <field name="author_notes" type="text" indexed="true" stored="true"/> | |
| <field name="competing_interest" type="text" indexed="true" stored="true"/> | |
| <field name="data_availability" type="text" indexed="true" stored="true"/> | |
| <field name="financial_disclosure" type="text" indexed="true" stored="true"/> | |
| <field name="article_type" type="keyword_lowercase" indexed="true" stored="true" multiValued="false"/> | |
| <field name="article_type_facet" type="string" indexed="true" stored="true" multiValued="false"/> | |
| <field name="reference" type="text" indexed="true" stored="true" multiValued="true"/> | |
| <field name="copyright" type="text" indexed="true" stored="true"/> | |
| <field name="figure_table_doi" type="string" indexed="true" stored="true" multiValued="true"/> | |
| <field name="figure_table_caption" type="text" indexed="true" stored="true" multiValued="true"/> | |
| <field name="striking_image" type="string" indexed="false" stored="true" multiValued="false"/> | |
| <!-- Should we add ngram and rev fields for all of these types as well? --> | |
| <!-- Document parts --> | |
| <field name="introduction" type="text" indexed="true" stored="true" multiValued="true"/> | |
| <field name="results_and_discussion" type="text" indexed="true" stored="true" multiValued="true"/> | |
| <field name="materials_and_methods" type="text" indexed="true" stored="true" multiValued="true"/> | |
| <field name="supporting_information" type="text" indexed="true" stored="true" multiValued="true"/> | |
| <field name="conclusions" type="text" indexed="true" stored="true" multiValued="true"/> | |
| <field name="body" type="text" indexed="true" stored="true" multiValued="false" termVectors="true"/> | |
| <!-- catchall field, containing all other searchable text fields --> | |
| <field name="everything" type="text" indexed="true" stored="true" multiValued="false" termVectors="true"/> | |
| <field name="everything_rev" type="text_rev" indexed="true" stored="false" multiValued="false"/> | |
| <field name="everything_ngram" type="trigram" indexed="true" stored="false" multiValued="false"/> | |
| <field name="everything_noprocess" type="text_noprocess" indexed="true" stored="true" multiValued="false"/> | |
| <!-- Indexes tokens both normally and in reverse for efficient leading wildcard queries. --> | |
| <field name="body_rev" type="text_rev" indexed="true" stored="false" multiValued="false"/> | |
| <field name="body_ngram" type="trigram" indexed="true" stored="false" multiValued="false"/> | |
| <!-- For tracking when the document was indexed --> | |
| <field name="timestamp" type="date" indexed="true" stored="true" default="NOW" multiValued="false"/> | |
| <!-- For tracking ALM data --> | |
| <field name="counter_total_all" type="int" indexed="true" stored="true" default="0" multiValued="false"/> | |
| <field name="counter_total_month" type="int" indexed="true" stored="true" default="0" multiValued="false"/> | |
| <field name="alm_scopusCiteCount" type="int" indexed="true" stored="true" default="0" multiValued="false"/> | |
| <field name="alm_citeulikeCount" type="int" indexed="true" stored="true" default="0" multiValued="false"/> | |
| <field name="alm_connoteaCount" type="int" indexed="true" stored="true" default="0" multiValued="false"/> | |
| <field name="alm_mendeleyCount" type="int" indexed="true" stored="true" default="0" multiValued="false"/> | |
| <field name="alm_twitterCount" type="int" indexed="true" stored="true" default="0" multiValued="false"/> | |
| <field name="alm_facebookCount" type="int" indexed="true" stored="true" default="0" multiValued="false"/> | |
| <field name="alm_pmc_usage_total_all" type="int" indexed="true" stored="true" default="0" multiValued="false"/> | |
| <field name="alm_webOfScienceCount" type="int" indexed="true" stored="false" default="0" multiValued="false"/> | |
| <field name="alm_crossRefCount" type="int" indexed="true" stored="true" default="0" multiValued="false"/> | |
| <!-- Fields for partial documents --> | |
| <field name="doc_partial_parent_id" type="string" indexed="true" stored="true" multiValued="false"/> | |
| <field name="doc_type" type="string" indexed="true" stored="true" multiValued="false"/> | |
| <field name="doc_partial_type" type="string" indexed="true" stored="false" multiValued="false"/> | |
| <field name="doc_partial_body" type="text" indexed="true" stored="true" multiValued="true" termVectors="true"/> | |
| <!-- Fields for academic editor document --> | |
| <field name="ae_name" type="string" indexed="true" stored="true" multiValued="false"/> | |
| <field name="ae_name_facet" type="text_name" indexed="true" stored="true" multiValued="false"/> | |
| <field name="ae_last_name" type="keyword_lowercase" indexed="true" stored="true" multiValued="false"/> | |
| <field name="ae_institute" type="string" indexed="false" stored="true" multiValued="true"/> | |
| <field name="ae_country" type="string" indexed="false" stored="true" multiValued="true"/> | |
| <field name="ae_subject" type="text_subject_search" indexed="true" stored="true" multiValued="true"/> | |
| <field name="ae_subject_facet" type="keyword_lowercase" indexed="true" stored="true" multiValued="true"/> | |
| <field name="trial_registration" type="text" indexed="true" stored="true" multiValued="true"/> | |
| <!-- Amendment fields --> | |
| <field name="correction" type="string" indexed="true" stored="true" multiValued="true"/> | |
| <field name="expression_of_concern" type="string" indexed="true" stored="true" multiValued="true"/> | |
| <field name="retraction" type="string" indexed="true" stored="true" multiValued="false"/> | |
| <!-- publication stage and revision date fields --> | |
| <field name="publication_stage" type="string" indexed="true" stored="true" multiValued="false"/> | |
| <field name="revision_date" type="tdate" indexed="true" stored="true"/> | |
| </fields> | |
| <!-- Field to use to determine and enforce document uniqueness. | |
| Unless this field is marked with required="false", it will be a required field | |
| --> | |
| <uniqueKey>id</uniqueKey> | |
| <!-- copyField commands copy one field to another at the time a document | |
| is added to the index. It's used either to index the same field differently, | |
| or to add multiple fields to the same field for easier/faster searching. --> | |
| <copyField source="abstract" dest="abstract_ngram"/> | |
| <copyField source="author_affiliate" dest="affiliate"/> | |
| <copyField source="author_affiliate" dest="affiliate_facet"/> | |
| <copyField source="editor_affiliate" dest="affiliate"/> | |
| <copyField source="editor_affiliate" dest="affiliate_facet"/> | |
| <copyField source="subject" dest="subject_hierarchy"/> | |
| <copyField source="subject" dest="subject_facet"/> | |
| <copyField source="author" dest="author_facet"/> | |
| <copyField source="editor" dest="editor_facet"/> | |
| <copyField source="article_type" dest="article_type_facet"/> | |
| <copyField source="body" dest="body_rev"/> | |
| <copyField source="body" dest="body_ngram"/> | |
| <copyField source="title" dest="title_ngram"/> | |
| <copyField source="everything" dest="everything_rev"/> | |
| <copyField source="everything" dest="everything_ngram"/> | |
| <copyField source="everything" dest="everything_noprocess"/> | |
| <copyField source="ae_subject" dest="ae_subject_facet"/> | |
| <copyField source="ae_name" dest="ae_name_facet"/> | |
| </schema> |