Skip to content

Instantly share code, notes, and snippets.

@ChristopherBiscardi
Created February 8, 2014 04:41
Show Gist options
  • Save ChristopherBiscardi/8876815 to your computer and use it in GitHub Desktop.
Save ChristopherBiscardi/8876815 to your computer and use it in GitHub Desktop.
Solr/Yokozuna/Riak Search 2 Schema for GeoSpatial Indexing.
<?xml version="1.0" encoding="UTF-8"?>
<schema name="geotest" version="1.5">
<uniqueKey>_yz_id</uniqueKey>
<fields>
<field name="name" type="string" indexed="true" stored="true"/>
<field name="loc" type="location_rpt" indexed="true" stored="true" />
<!-- Begin Yokozuna Fields -->
<field name="_yz_id" type="_yz_str" indexed="true" stored="true" required="true" />
<field name="text" type="text_ws" indexed="true" stored="false" multiValued="true"/>
<field name="_version_" type="long" indexed="true" stored="true"/>
<!-- Entropy Data: Data related to anti-entropy -->
<field name="_yz_ed" type="_yz_str" indexed="true" stored="false"/>
<!-- Partition Number: Used as a filter query param -->
<field name="_yz_pn" type="_yz_str" indexed="true" stored="false"/>
<!-- First Partition Number: The first partition in this doc's
preflist, used for further filtering on overlapping partitions. -->
<field name="_yz_fpn" type="_yz_str" indexed="true" stored="false"/>
<!-- If there is a sibling, use vtag to differentiate them -->
<field name="_yz_vtag" type="_yz_str" indexed="true" stored="false"/>
<!-- Node: The name of the node that this doc was created on. -->
<field name="_yz_node" type="_yz_str" indexed="true" stored="false"/>
<field name="_yz_rt" type="_yz_str" indexed="true" stored="true"/>
<!-- Riak Bucket: The bucket of the Riak object this doc corresponds to. -->
<field name="_yz_rb" type="_yz_str" indexed="true" stored="true"/>
<!-- Riak Key: The key of the Riak object this doc corresponds to. -->
<field name="_yz_rk" type="_yz_str" indexed="true" stored="true"/>
<!-- Node: Stores a flag if this doc is the product of a failed object extration -->
<field name="_yz_err" type="_yz_str" indexed="true" stored="false"/>
</fields>
<types>
<fieldType name="location_rpt" class="solr.SpatialRecursivePrefixTreeFieldType"
distErrPct="0.025"
maxDistErr="0.000009"
units="degrees"
/>
<!-- since fields of this type are by default not stored or indexed,
any data added to them will be ignored outright. -->
<fieldtype name="ignored" stored="false" indexed="false" multiValued="true" class="solr.StrField" />
<!-- YZ String: Used for non-analyzed fields -->
<fieldType name="_yz_str" class="solr.StrField" sortMissingLast="true"/>
<fieldType name="string" class="solr.StrField" sortMissingLast="true"/>
<fieldType name="boolean" class="solr.BoolField" sortMissingLast="true"/>
<fieldType name="int" class="solr.TrieIntField" precisionStep="0" positionIncrementGap="0"/>
<fieldType name="float" class="solr.TrieFloatField" precisionStep="0" positionIncrementGap="0"/>
<fieldType name="long" class="solr.TrieLongField" precisionStep="0" positionIncrementGap="0"/>
<fieldType name="double" class="solr.TrieDoubleField" precisionStep="0" positionIncrementGap="0"/>
<!--
Numeric field types that index each value at various levels of precision
to accelerate range queries when the number of values between the range
endpoints is large. See the javadoc for NumericRangeQuery for internal
implementation details.
Smaller precisionStep values (specified in bits) will lead to more tokens
indexed per value, slightly larger index size, and faster range queries.
A precisionStep of 0 disables indexing at different precision levels.
-->
<fieldType name="tint" class="solr.TrieIntField" precisionStep="8" positionIncrementGap="0"/>
<fieldType name="tfloat" class="solr.TrieFloatField" precisionStep="8" positionIncrementGap="0"/>
<fieldType name="tlong" class="solr.TrieLongField" precisionStep="8" positionIncrementGap="0"/>
<fieldType name="tdouble" class="solr.TrieDoubleField" precisionStep="8" positionIncrementGap="0"/>
<fieldType name="date" class="solr.TrieDateField" precisionStep="0" positionIncrementGap="0"/>
<!-- A Trie based date field for faster date range queries and date faceting. -->
<fieldType name="tdate" class="solr.TrieDateField" precisionStep="6" positionIncrementGap="0"/>
<!--Binary data type. The data should be sent/retrieved in as Base64 encoded Strings -->
<fieldtype name="binary" class="solr.BinaryField"/>
<fieldType name="random" class="solr.RandomSortField" indexed="true"/>
<!-- A text field that only splits on whitespace for exact matching of words -->
<fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100">
<analyzer>
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
</analyzer>
</fieldType>
<!-- A general text field that has reasonable, generic
cross-language defaults: it tokenizes with StandardTokenizer,
removes stop words from case-insensitive "stopwords.txt"
(empty by default), and down cases. At query time only, it
also applies synonyms. -->
<fieldType name="text_general" class="solr.TextField" positionIncrementGap="100">
<analyzer type="index">
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
</fieldType>
</types>
</schema>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment