Created
January 25, 2023 19:36
-
-
Save scherztc/d9d5cf56ed213b5d80c0b61890c68e8e to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?xml version="1.0" encoding="UTF-8" ?> | |
<!-- | |
Licensed to the Apache Software Foundation (ASF) under one or more | |
contributor license agreements. See the NOTICE file distributed with | |
this work for additional information regarding copyright ownership. | |
The ASF licenses this file to You under the Apache License, Version 2.0 | |
(the "License"); you may not use this file except in compliance with | |
the License. You may obtain a copy of the License at | |
http://www.apache.org/licenses/LICENSE-2.0 | |
Unless required by applicable law or agreed to in writing, software | |
distributed under the License is distributed on an "AS IS" BASIS, | |
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
See the License for the specific language governing permissions and | |
limitations under the License. | |
--> | |
<!-- | |
This is the Solr schema file. This file should be named "schema.xml" and | |
should be in the conf directory under the solr home | |
(i.e. ./solr/conf/schema.xml by default) | |
or located where the classloader for the Solr webapp can find it. | |
This example schema is the recommended starting point for users. | |
It should be kept correct and concise, usable out-of-the-box. | |
For more information, on how to customize this file, please see | |
http://wiki.apache.org/solr/SchemaXml | |
PERFORMANCE NOTE: this schema includes many optional features and should not | |
be used for benchmarking. To improve performance one could | |
- set stored="false" for all fields possible (esp large fields) when you | |
only need to search on the field but don't need to return the original | |
value. | |
- set indexed="false" if you don't need to search on the field, but only | |
return the field as a result of searching on other indexed fields. | |
- remove all unneeded copyField statements | |
- for best index size and searching performance, set "index" to false | |
for all general text fields, use copyField to copy them to the | |
catchall "text" field, and use that for searching. | |
- For maximum indexing performance, use the StreamingUpdateSolrServer | |
java client. | |
- Remember to run the JVM in server mode, and use a higher logging level | |
that avoids logging every request | |
--> | |
<schema name="Hydra Demo Index" version="1.5"> | |
<!-- attribute "name" is the name of this schema and is only used for display purposes. | |
Applications should change this to reflect the nature of the search collection. | |
version="1.4" is Solr's version number for the schema syntax and semantics. It should | |
not normally be changed by applications. | |
1.0: multiValued attribute did not exist, all fields are multiValued by nature | |
1.1: multiValued attribute introduced, false by default | |
1.2: omitTermFreqAndPositions attribute introduced, true by default except for text fields. | |
1.3: removed optional field compress feature | |
1.4: default auto-phrase (QueryParser feature) to off | |
--> | |
<types> | |
<fieldType name="string" class="solr.StrField" sortMissingLast="true" /> | |
<fieldType name="boolean" class="solr.BoolField" sortMissingLast="true"/> | |
<fieldType name="rand" class="solr.RandomSortField" omitNorms="true"/> | |
<!-- Default numeric field types. --> | |
<fieldType name="int" class="solr.TrieIntField" precisionStep="0" positionIncrementGap="0"/> | |
<fieldType name="float" class="solr.TrieFloatField" precisionStep="0" positionIncrementGap="0"/> | |
<fieldType name="long" class="solr.TrieLongField" precisionStep="0" positionIncrementGap="0"/> | |
<fieldType name="double" class="solr.TrieDoubleField" precisionStep="0" positionIncrementGap="0"/> | |
<!-- trie numeric field types for faster range queries --> | |
<fieldType name="tint" class="solr.TrieIntField" precisionStep="8" positionIncrementGap="0"/> | |
<fieldType name="tfloat" class="solr.TrieFloatField" precisionStep="8" positionIncrementGap="0"/> | |
<fieldType name="tlong" class="solr.TrieLongField" precisionStep="8" positionIncrementGap="0"/> | |
<fieldType name="tdouble" class="solr.TrieDoubleField" precisionStep="8" positionIncrementGap="0"/> | |
<!-- The format for this date field is of the form 1995-12-31T23:59:59Z | |
Optional fractional seconds are allowed: 1995-12-31T23:59:59.999Z | |
--> | |
<fieldType name="date" class="solr.TrieDateField" precisionStep="0" positionIncrementGap="0"/> | |
<!-- A Trie based date field for faster date range queries and date faceting. --> | |
<fieldType name="tdate" class="solr.TrieDateField" precisionStep="6" positionIncrementGap="0"/> | |
<!-- This point type indexes the coordinates as separate fields (subFields) | |
If subFieldType is defined, it references a type, and a dynamic field | |
definition is created matching *___<typename>. Alternately, if | |
subFieldSuffix is defined, that is used to create the subFields. | |
Example: if subFieldType="double", then the coordinates would be | |
indexed in fields myloc_0___double,myloc_1___double. | |
Example: if subFieldSuffix="_d" then the coordinates would be indexed | |
in fields myloc_0_d,myloc_1_d | |
The subFields are an implementation detail of the fieldType, and end | |
users normally should not need to know about them. | |
--> | |
<fieldType name="point" class="solr.PointType" dimension="2" subFieldSuffix="_d"/> | |
<!-- A specialized field for geospatial search. If indexed, this fieldType must not be multivalued. --> | |
<fieldType name="location" class="solr.LatLonType" subFieldSuffix="_coordinate"/> | |
<!-- An alternative geospatial field type new to Solr 4. It supports multiValued and polygon shapes. | |
For more information about this and other Spatial fields new to Solr 4, see: | |
http://wiki.apache.org/solr/SolrAdaptersForLuceneSpatial4 | |
--> | |
<fieldType name="location_rpt" class="solr.SpatialRecursivePrefixTreeFieldType" | |
geo="true" distErrPct="0.025" maxDistErr="0.000009" distanceUnits="degrees" /> | |
<fieldType name="text" class="solr.TextField" omitNorms="false"> | |
<analyzer> | |
<tokenizer class="solr.ICUTokenizerFactory"/> | |
<filter class="solr.ICUFoldingFilterFactory"/> <!-- NFKC, case folding, diacritics removed --> | |
<filter class="solr.TrimFilterFactory"/> | |
</analyzer> | |
</fieldType> | |
<!-- A text field that only splits on whitespace for exact matching of words --> | |
<fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100"> | |
<analyzer> | |
<tokenizer class="solr.WhitespaceTokenizerFactory"/> | |
<filter class="solr.TrimFilterFactory"/> | |
</analyzer> | |
</fieldType> | |
<!-- single token analyzed text, for sorting. Punctuation is significant. --> | |
<fieldtype name="alphaSort" class="solr.TextField" sortMissingLast="true" omitNorms="true"> | |
<analyzer> | |
<tokenizer class="solr.KeywordTokenizerFactory" /> | |
<filter class="solr.ICUFoldingFilterFactory"/> | |
<filter class="solr.TrimFilterFactory" /> | |
</analyzer> | |
</fieldtype> | |
<!-- A text field with defaults appropriate for English --> | |
<fieldType name="text_en" class="solr.TextField" positionIncrementGap="100"> | |
<analyzer> | |
<tokenizer class="solr.ICUTokenizerFactory"/> | |
<filter class="solr.ICUFoldingFilterFactory"/> <!-- NFKC, case folding, diacritics removed --> | |
<filter class="solr.EnglishPossessiveFilterFactory"/> | |
<!-- EnglishMinimalStemFilterFactory is less aggressive than PorterStemFilterFactory: --> | |
<filter class="solr.EnglishMinimalStemFilterFactory"/> | |
<!-- | |
<filter class="solr.PorterStemFilterFactory"/> | |
--> | |
<filter class="solr.TrimFilterFactory"/> | |
</analyzer> | |
</fieldType> | |
<!-- queries for paths match documents at that path, or in descendent paths --> | |
<fieldType name="descendent_path" class="solr.TextField"> | |
<analyzer type="index"> | |
<tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/" /> | |
</analyzer> | |
<analyzer type="query"> | |
<tokenizer class="solr.KeywordTokenizerFactory" /> | |
</analyzer> | |
</fieldType> | |
<!-- queries for paths match documents at that path, or in ancestor paths --> | |
<fieldType name="ancestor_path" class="solr.TextField"> | |
<analyzer type="index"> | |
<tokenizer class="solr.KeywordTokenizerFactory" /> | |
</analyzer> | |
<analyzer type="query"> | |
<tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/" /> | |
</analyzer> | |
</fieldType> | |
<fieldType class="solr.TextField" name="textSuggest" positionIncrementGap="100"> | |
<analyzer> | |
<tokenizer class="solr.KeywordTokenizerFactory"/> | |
<filter class="solr.LowerCaseFilterFactory"/> | |
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/> | |
</analyzer> | |
</fieldType> | |
</types> | |
<fields> | |
<!-- If you remove this field, you must _also_ disable the update log in solrconfig.xml | |
or Solr won't start. _version_ and update log are required for SolrCloud | |
--> | |
<field name="_version_" type="long" indexed="true" stored="true"/> | |
<field name="id" type="string" stored="true" indexed="true" multiValued="false" required="true"/> | |
<field name="timestamp" type="date" indexed="true" stored="true" default="NOW" multiValued="false"/> | |
<field name="lat" type="tdouble" stored="true" indexed="true" multiValued="false"/> | |
<field name="lng" type="tdouble" stored="true" indexed="true" multiValued="false"/> | |
<!-- NOTE: not all possible Solr field types are represented in the dynamic fields --> | |
<!-- text (_t...) --> | |
<dynamicField name="*_ti" type="text" stored="false" indexed="true" multiValued="false"/> | |
<dynamicField name="*_tim" type="text" stored="false" indexed="true" multiValued="true"/> | |
<dynamicField name="*_ts" type="text" stored="true" indexed="false" multiValued="false"/> | |
<dynamicField name="*_tsm" type="text" stored="true" indexed="false" multiValued="true"/> | |
<dynamicField name="*_tsi" type="text" stored="true" indexed="true" multiValued="false"/> | |
<dynamicField name="*_tsim" type="text" stored="true" indexed="true" multiValued="true"/> | |
<dynamicField name="*_tiv" type="text" stored="false" indexed="true" multiValued="false" termVectors="true" termPositions="true" termOffsets="true"/> | |
<dynamicField name="*_timv" type="text" stored="false" indexed="true" multiValued="true" termVectors="true" termPositions="true" termOffsets="true"/> | |
<dynamicField name="*_tsiv" type="text" stored="true" indexed="true" multiValued="false" termVectors="true" termPositions="true" termOffsets="true"/> | |
<dynamicField name="*_tsimv" type="text" stored="true" indexed="true" multiValued="true" termVectors="true" termPositions="true" termOffsets="true"/> | |
<!-- English text (_te...) --> | |
<dynamicField name="*_tei" type="text_en" stored="false" indexed="true" multiValued="false"/> | |
<dynamicField name="*_teim" type="text_en" stored="false" indexed="true" multiValued="true"/> | |
<dynamicField name="*_tes" type="text_en" stored="true" indexed="false" multiValued="false"/> | |
<dynamicField name="*_tesm" type="text_en" stored="true" indexed="false" multiValued="true"/> | |
<dynamicField name="*_tesi" type="text_en" stored="true" indexed="true" multiValued="false"/> | |
<dynamicField name="*_tesim" type="text_en" stored="true" indexed="true" multiValued="true"/> | |
<dynamicField name="*_teiv" type="text_en" stored="false" indexed="true" multiValued="false" termVectors="true" termPositions="true" termOffsets="true"/> | |
<dynamicField name="*_teimv" type="text_en" stored="false" indexed="true" multiValued="true" termVectors="true" termPositions="true" termOffsets="true"/> | |
<dynamicField name="*_tesiv" type="text_en" stored="true" indexed="true" multiValued="false" termVectors="true" termPositions="true" termOffsets="true"/> | |
<dynamicField name="*_tesimv" type="text_en" stored="true" indexed="true" multiValued="true" termVectors="true" termPositions="true" termOffsets="true"/> | |
<!-- string (_s...) --> | |
<dynamicField name="*_si" type="string" stored="false" indexed="true" multiValued="false"/> | |
<dynamicField name="*_sim" type="string" stored="false" indexed="true" multiValued="true"/> | |
<dynamicField name="*_ss" type="string" stored="true" indexed="false" multiValued="false"/> | |
<dynamicField name="*_ssm" type="string" stored="true" indexed="false" multiValued="true"/> | |
<dynamicField name="*_ssi" type="string" stored="true" indexed="true" multiValued="false"/> | |
<dynamicField name="*_ssim" type="string" stored="true" indexed="true" multiValued="true"/> | |
<dynamicField name="*_ssort" type="alphaSort" stored="false" indexed="true" multiValued="false"/> | |
<!-- integer (_i...) --> | |
<dynamicField name="*_ii" type="int" stored="false" indexed="true" multiValued="false"/> | |
<dynamicField name="*_iim" type="int" stored="false" indexed="true" multiValued="true"/> | |
<dynamicField name="*_is" type="int" stored="true" indexed="false" multiValued="false"/> | |
<dynamicField name="*_ism" type="int" stored="true" indexed="false" multiValued="true"/> | |
<dynamicField name="*_isi" type="int" stored="true" indexed="true" multiValued="false"/> | |
<dynamicField name="*_isim" type="int" stored="true" indexed="true" multiValued="true"/> | |
<!-- trie integer (_it...) (for faster range queries) --> | |
<dynamicField name="*_iti" type="tint" stored="false" indexed="true" multiValued="false"/> | |
<dynamicField name="*_itim" type="tint" stored="false" indexed="true" multiValued="true"/> | |
<dynamicField name="*_its" type="tint" stored="true" indexed="false" multiValued="false"/> | |
<dynamicField name="*_itsm" type="tint" stored="true" indexed="false" multiValued="true"/> | |
<dynamicField name="*_itsi" type="tint" stored="true" indexed="true" multiValued="false"/> | |
<dynamicField name="*_itsim" type="tint" stored="true" indexed="true" multiValued="true"/> | |
<!-- date (_dt...) --> | |
<!-- The format for this date field is of the form 1995-12-31T23:59:59Z | |
Optional fractional seconds are allowed: 1995-12-31T23:59:59.999Z --> | |
<dynamicField name="*_dti" type="date" stored="false" indexed="true" multiValued="false"/> | |
<dynamicField name="*_dtim" type="date" stored="false" indexed="true" multiValued="true"/> | |
<dynamicField name="*_dts" type="date" stored="true" indexed="false" multiValued="false"/> | |
<dynamicField name="*_dtsm" type="date" stored="true" indexed="false" multiValued="true"/> | |
<dynamicField name="*_dtsi" type="date" stored="true" indexed="true" multiValued="false"/> | |
<dynamicField name="*_dtsim" type="date" stored="true" indexed="true" multiValued="true"/> | |
<!-- trie date (_dtt...) (for faster range queries) --> | |
<dynamicField name="*_dtti" type="tdate" stored="false" indexed="true" multiValued="false"/> | |
<dynamicField name="*_dttim" type="tdate" stored="false" indexed="true" multiValued="true"/> | |
<dynamicField name="*_dtts" type="tdate" stored="true" indexed="false" multiValued="false"/> | |
<dynamicField name="*_dttsm" type="tdate" stored="true" indexed="false" multiValued="true"/> | |
<dynamicField name="*_dttsi" type="tdate" stored="true" indexed="true" multiValued="false"/> | |
<dynamicField name="*_dttsim" type="tdate" stored="true" indexed="true" multiValued="true"/> | |
<!-- long (_l...) --> | |
<dynamicField name="*_li" type="long" stored="false" indexed="true" multiValued="false"/> | |
<dynamicField name="*_lim" type="long" stored="false" indexed="true" multiValued="true"/> | |
<dynamicField name="*_ls" type="long" stored="true" indexed="false" multiValued="false"/> | |
<dynamicField name="*_lsm" type="long" stored="true" indexed="false" multiValued="true"/> | |
<dynamicField name="*_lsi" type="long" stored="true" indexed="true" multiValued="false"/> | |
<dynamicField name="*_lsim" type="long" stored="true" indexed="true" multiValued="true"/> | |
<!-- trie long (_lt...) (for faster range queries) --> | |
<dynamicField name="*_lti" type="tlong" stored="false" indexed="true" multiValued="false"/> | |
<dynamicField name="*_ltim" type="tlong" stored="false" indexed="true" multiValued="true"/> | |
<dynamicField name="*_lts" type="tlong" stored="true" indexed="false" multiValued="false"/> | |
<dynamicField name="*_ltsm" type="tlong" stored="true" indexed="false" multiValued="true"/> | |
<dynamicField name="*_ltsi" type="tlong" stored="true" indexed="true" multiValued="false"/> | |
<dynamicField name="*_ltsim" type="tlong" stored="true" indexed="true" multiValued="true"/> | |
<!-- double (_db...) --> | |
<dynamicField name="*_dbi" type="double" stored="false" indexed="true" multiValued="false"/> | |
<dynamicField name="*_dbim" type="double" stored="false" indexed="true" multiValued="true"/> | |
<dynamicField name="*_dbs" type="double" stored="true" indexed="false" multiValued="false"/> | |
<dynamicField name="*_dbsm" type="double" stored="true" indexed="false" multiValued="true"/> | |
<dynamicField name="*_dbsi" type="double" stored="true" indexed="true" multiValued="false"/> | |
<dynamicField name="*_dbsim" type="double" stored="true" indexed="true" multiValued="true"/> | |
<!-- trie double (_dbt...) (for faster range queries) --> | |
<dynamicField name="*_dbti" type="tdouble" stored="false" indexed="true" multiValued="false"/> | |
<dynamicField name="*_dbtim" type="tdouble" stored="false" indexed="true" multiValued="true"/> | |
<dynamicField name="*_dbts" type="tdouble" stored="true" indexed="false" multiValued="false"/> | |
<dynamicField name="*_dbtsm" type="tdouble" stored="true" indexed="false" multiValued="true"/> | |
<dynamicField name="*_dbtsi" type="tdouble" stored="true" indexed="true" multiValued="false"/> | |
<dynamicField name="*_dbtsim" type="tdouble" stored="true" indexed="true" multiValued="true"/> | |
<!-- float (_f...) --> | |
<dynamicField name="*_fi" type="float" stored="false" indexed="true" multiValued="false"/> | |
<dynamicField name="*_fim" type="float" stored="false" indexed="true" multiValued="true"/> | |
<dynamicField name="*_fs" type="float" stored="true" indexed="false" multiValued="false"/> | |
<dynamicField name="*_fsm" type="float" stored="true" indexed="false" multiValued="true"/> | |
<dynamicField name="*_fsi" type="float" stored="true" indexed="true" multiValued="false"/> | |
<dynamicField name="*_fsim" type="float" stored="true" indexed="true" multiValued="true"/> | |
<!-- trie float (_ft...) (for faster range queries) --> | |
<dynamicField name="*_fti" type="tfloat" stored="false" indexed="true" multiValued="false"/> | |
<dynamicField name="*_ftim" type="tfloat" stored="false" indexed="true" multiValued="true"/> | |
<dynamicField name="*_fts" type="tfloat" stored="true" indexed="false" multiValued="false"/> | |
<dynamicField name="*_ftsm" type="tfloat" stored="true" indexed="false" multiValued="true"/> | |
<dynamicField name="*_ftsi" type="tfloat" stored="true" indexed="true" multiValued="false"/> | |
<dynamicField name="*_ftsim" type="tfloat" stored="true" indexed="true" multiValued="true"/> | |
<!-- boolean (_b...) --> | |
<dynamicField name="*_bi" type="boolean" stored="false" indexed="true" multiValued="false"/> | |
<dynamicField name="*_bs" type="boolean" stored="true" indexed="false" multiValued="false"/> | |
<dynamicField name="*_bsi" type="boolean" stored="true" indexed="true" multiValued="false"/> | |
<!-- Type used to index the lat and lon components for the "location" FieldType --> | |
<dynamicField name="*_coordinate" type="tdouble" indexed="true" stored="false" /> | |
<!-- location (_ll...) --> | |
<dynamicField name="*_lli" type="location" stored="false" indexed="true" multiValued="false"/> | |
<dynamicField name="*_llim" type="location" stored="false" indexed="true" multiValued="true"/> | |
<dynamicField name="*_lls" type="location" stored="true" indexed="false" multiValued="false"/> | |
<dynamicField name="*_llsm" type="location" stored="true" indexed="false" multiValued="true"/> | |
<dynamicField name="*_llsi" type="location" stored="true" indexed="true" multiValued="false"/> | |
<dynamicField name="*_llsim" type="location" stored="true" indexed="true" multiValued="true"/> | |
<dynamicField name="*suggest" type="textSuggest" indexed="true" stored="false" multiValued="true" /> | |
<!-- you must define copyField source and dest fields explicity or schemaBrowser doesn't work --> | |
<field name="all_text_timv" type="text" stored="false" indexed="true" multiValued="true" termVectors="true" termPositions="true" termOffsets="true"/> | |
</fields> | |
<!-- Field to use to determine and enforce document uniqueness. | |
Unless this field is marked with required="false", it will be a required field | |
--> | |
<uniqueKey>id</uniqueKey> | |
<!-- copyField commands copy one field to another at the time a document | |
is added to the index. It's used either to index the same field differently, | |
or to add multiple fields to the same field for easier/faster searching. --> | |
<!-- Copy Fields --> | |
<!-- Above, multiple source fields are copied to the [text] field. | |
Another way to map multiple source fields to the same | |
destination field is to use the dynamic field syntax. | |
copyField also supports a maxChars to copy setting. --> | |
<!-- <copyField source="*_tesim" dest="all_text_timv" maxChars="3000"/> --> | |
<!-- for suggestions --> | |
<copyField source="*_tesim" dest="suggest"/> | |
<copyField source="*_ssim" dest="suggest"/> | |
<!-- Similarity is the scoring routine for each document vs. a query. | |
A custom similarity may be specified here, but the default is fine | |
for most applications. --> | |
<!-- <similarity class="org.apache.lucene.search.DefaultSimilarity"/> --> | |
<!-- ... OR ... | |
Specify a SimilarityFactory class name implementation | |
allowing parameters to be used. | |
--> | |
<!-- | |
<similarity class="com.example.solr.CustomSimilarityFactory"> | |
<str name="paramkey">param value</str> | |
</similarity> | |
--> | |
</schema> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment