Skip to content

Instantly share code, notes, and snippets.

@kenprice
Created November 12, 2015 03:50
Show Gist options
  • Save kenprice/82ce5ae16e9a09cb81f0 to your computer and use it in GitHub Desktop.
Save kenprice/82ce5ae16e9a09cb81f0 to your computer and use it in GitHub Desktop.
Ken's schema.xml
<?xml version="1.0" encoding="UTF-8" ?>
<schema name="gios-stuff" version="0.1">
<fields>
<!-- Should id be string? -->
<field name="id" type="string" indexed="true" stored="true" required="true"/>
<field name="title" type="textgen" indexed="true" stored="true"/>
<!-- textgen tokenizes at whilespace, will catenate words and split on case change when indexed and queried.
So a query for "Augustus DeMorgan" and "Augustus De-Morgan" will both be a match for Augustus De Morgan
-->
<field name="author" type="textgen" indexed="true" stored="true"/>
<field name="author_last_name" type="textgen" indexed="true" stored="true"/>
<field name="author_first_name" type="textgen" indexed="true" stored="true"/>
<field name="organization" type="textgen" indexed="true" stored="true"/>
<field name="publish_date" type="TrieDateField" indexed="true" stored="true"/>
<field name="body" type="document_body" indexed="true" stored="true"/>
<!-- document body should use a special type of class solr.TextField so analyzers can be used -->
<field name="collector" type="textgen" indexed="true" stored="false" multiValued="true"/>
<!-- collection of all fields; not stored -->
</fields>
<!-- may simplify some queries to combine fields into one -->
<copyField source="title" dest="collector"/>
<copyField source="author" dest="collector"/>
<copyField source="body" dest="collector"/>
<copyField source="organization" dest="collector"/>
<!-- custom type for document body -->
<fieldType name="document_body" class="solr.TextField">
<analyzer type="index">
<!-- sample; Tokenization and filtering can happen during indexing
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.KeepWordFilterFactory" words="keepwords.txt"/>
<filter class="solr.SynonymFilterFactory" synonyms="syns.txt"/>
-->
</analyzer>
<analyzer type="query">
<!-- sample; Tokenization and filtering can happen during querying
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
-->
</analyzer>
</fieldType>
</schema>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment