Created
May 16, 2011 11:13
-
-
Save tschellenbach/974251 to your computer and use it in GitHub Desktop.
solr schema tag search
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?xml version="1.0" encoding="UTF-8" ?> | |
<config> | |
<abortOnConfigurationError>${solr.abortOnConfigurationError:true}</abortOnConfigurationError> | |
<luceneMatchVersion>LUCENE_31</luceneMatchVersion> | |
<lib dir="../../contrib/extraction/lib" /> | |
<lib dir="../../dist/" regex="apache-solr-cell-\d.*\.jar" /> | |
<lib dir="../../dist/" regex="apache-solr-clustering-\d.*\.jar" /> | |
<lib dir="../../dist/" regex="apache-solr-dataimporthandler-\d.*\.jar" /> | |
<lib dir="../../contrib/clustering/lib/" /> | |
<dataDir>${solr.data.dir:}</dataDir> | |
<directoryFactory name="DirectoryFactory" | |
class="${solr.directoryFactory:solr.StandardDirectoryFactory}"/> | |
<indexDefaults> | |
<useCompoundFile>false</useCompoundFile> | |
<mergeFactor>2</mergeFactor> | |
<ramBufferSizeMB>128</ramBufferSizeMB> | |
<maxFieldLength>10000</maxFieldLength> | |
<writeLockTimeout>1000</writeLockTimeout> | |
<commitLockTimeout>10000</commitLockTimeout> | |
<lockType>native</lockType> | |
</indexDefaults> | |
<mainIndex> | |
<useCompoundFile>false</useCompoundFile> | |
<ramBufferSizeMB>128</ramBufferSizeMB> | |
<mergeFactor>2</mergeFactor> | |
<unlockOnStartup>false</unlockOnStartup> | |
<reopenReaders>true</reopenReaders> | |
<deletionPolicy class="solr.SolrDeletionPolicy"> | |
<str name="maxCommitsToKeep">1</str> | |
<str name="maxOptimizedCommitsToKeep">0</str> | |
</deletionPolicy> | |
<infoStream file="INFOSTREAM.txt">false</infoStream> | |
</mainIndex> | |
<!-- JMX | |
This example enables JMX if and only if an existing MBeanServer | |
is found, use this if you want to configure JMX through JVM | |
parameters. Remove this to disable exposing Solr configuration | |
and statistics to JMX. | |
For more details see http://wiki.apache.org/solr/SolrJmx | |
--> | |
<jmx /> | |
<!-- If you want to connect to a particular server, specify the | |
agentId | |
--> | |
<!-- <jmx agentId="myAgent" /> --> | |
<!-- If you want to start a new MBeanServer, specify the serviceUrl --> | |
<!-- <jmx serviceUrl="service:jmx:rmi:///jndi/rmi://localhost:9999/solr"/> | |
--> | |
<!-- The default high-performance update handler --> | |
<updateHandler class="solr.DirectUpdateHandler2"> | |
<autoCommit> | |
<maxTime>300</maxTime> | |
</autoCommit> | |
<!-- Update Related Event Listeners | |
Various IndexWriter realted events can trigger Listeners to | |
take actions. | |
postCommit - fired after every commit or optimize command | |
postOptimize - fired after every optimize command | |
--> | |
<!-- The RunExecutableListener executes an external command from a | |
hook such as postCommit or postOptimize. | |
exe - the name of the executable to run | |
dir - dir to use as the current working directory. (default=".") | |
wait - the calling thread waits until the executable returns. | |
(default="true") | |
args - the arguments to pass to the program. (default is none) | |
env - environment variables to set. (default is none) | |
--> | |
<!-- This example shows how RunExecutableListener could be used | |
with the script based replication... | |
http://wiki.apache.org/solr/CollectionDistribution | |
--> | |
<!-- | |
<listener event="postCommit" class="solr.RunExecutableListener"> | |
<str name="exe">solr/bin/snapshooter</str> | |
<str name="dir">.</str> | |
<bool name="wait">true</bool> | |
<arr name="args"> <str>arg1</str> <str>arg2</str> </arr> | |
<arr name="env"> <str>MYVAR=val1</str> </arr> | |
</listener> | |
--> | |
</updateHandler> | |
<query> | |
<maxBooleanClauses>1024</maxBooleanClauses> | |
<!-- Solr Internal Query Caches | |
There are two implementations of cache available for Solr, | |
LRUCache, based on a synchronized LinkedHashMap, and | |
FastLRUCache, based on a ConcurrentHashMap. | |
FastLRUCache has faster gets and slower puts in single | |
threaded operation and thus is generally faster than LRUCache | |
when the hit ratio of the cache is high (> 75%), and may be | |
faster under other scenarios on multi-cpu systems. | |
--> | |
<!-- Filter Cache | |
Cache used by SolrIndexSearcher for filters (DocSets), | |
unordered sets of *all* documents that match a query. When a | |
new searcher is opened, its caches may be prepopulated or | |
"autowarmed" using data from caches in the old searcher. | |
autowarmCount is the number of items to prepopulate. For | |
LRUCache, the autowarmed items will be the most recently | |
accessed items. | |
Parameters: | |
class - the SolrCache implementation LRUCache or | |
(LRUCache or FastLRUCache) | |
size - the maximum number of entries in the cache | |
initialSize - the initial capacity (number of entries) of | |
the cache. (see java.util.HashMap) | |
autowarmCount - the number of entries to prepopulate from | |
and old cache. | |
--> | |
<filterCache class="solr.FastLRUCache" | |
size="262144" | |
initialSize="65536" | |
autowarmCount="16384"/> | |
<!-- Query Result Cache | |
Caches results of searches - ordered lists of document ids | |
(DocList) based on a query, a sort, and the range of documents requested. | |
--> | |
<queryResultCache class="solr.LRUCache" | |
size="1048576" | |
initialSize="262144" | |
autowarmCount="65536"/> | |
<!-- Document Cache | |
Caches Lucene Document objects (the stored fields for each | |
document). Since Lucene internal document ids are transient, | |
this cache will not be autowarmed. | |
--> | |
<documentCache class="solr.LRUCache" | |
size="65536" | |
initialSize="32768" | |
autowarmCount="16384"/> | |
<!-- Field Value Cache | |
Cache used to hold field values that are quickly accessible | |
by document id. The fieldValueCache is created by default | |
even if not configured here. | |
--> | |
<!-- | |
<fieldValueCache class="solr.FastLRUCache" | |
size="512" | |
autowarmCount="128" | |
showItems="32" /> | |
--> | |
<!-- Custom Cache | |
Example of a generic cache. These caches may be accessed by | |
name through SolrIndexSearcher.getCache(),cacheLookup(), and | |
cacheInsert(). The purpose is to enable easy caching of | |
user/application level data. The regenerator argument should | |
be specified as an implementation of solr.CacheRegenerator | |
if autowarming is desired. | |
--> | |
<!-- | |
<cache name="myUserCache" | |
class="solr.LRUCache" | |
size="4096" | |
initialSize="1024" | |
autowarmCount="1024" | |
regenerator="com.mycompany.MyRegenerator" | |
/> | |
--> | |
<!-- Lazy Field Loading | |
If true, stored fields that are not requested will be loaded | |
lazily. This can result in a significant speed improvement | |
if the usual case is to not load all stored fields, | |
especially if the skipped fields are large compressed text | |
fields. | |
--> | |
<enableLazyFieldLoading>true</enableLazyFieldLoading> | |
<!-- Use Filter For Sorted Query | |
A possible optimization that attempts to use a filter to | |
satisfy a search. If the requested sort does not include | |
score, then the filterCache will be checked for a filter | |
matching the query. If found, the filter will be used as the | |
source of document ids, and then the sort will be applied to | |
that. | |
For most situations, this will not be useful unless you | |
frequently get the same search repeatedly with differnet sort | |
options, and none of them ever use "score" | |
--> | |
<!-- | |
<useFilterForSortedQuery>true</useFilterForSortedQuery> | |
--> | |
<!-- Result Window Size | |
An optimization for use with the queryResultCache. When a search | |
is requested, a superset of the requested number of document ids | |
are collected. For example, if a search for a particular query | |
requests matching documents 10 through 19, and queryWindowSize is 50, | |
then documents 0 through 49 will be collected and cached. Any further | |
requests in that range can be satisfied via the cache. | |
--> | |
<queryResultWindowSize>20</queryResultWindowSize> | |
<!-- Maximum number of documents to cache for any entry in the | |
queryResultCache. | |
--> | |
<queryResultMaxDocsCached>200</queryResultMaxDocsCached> | |
<listener event="newSearcher" class="solr.QuerySenderListener"> | |
<arr name="queries"> | |
<lst> | |
<str name="q">*:*</str> | |
<str name="sort">recent_popularity desc</str> | |
</lst> | |
<lst> | |
<str name="q">*:*</str> | |
<str name="sort">popularity desc</str> | |
</lst> | |
<lst> | |
<str name="q">*:*</str> | |
<str name="sort">id desc</str> | |
</lst> | |
</arr> | |
</listener> | |
<listener event="firstSearcher" class="solr.QuerySenderListener"> | |
<arr name="queries"> | |
<lst> | |
<str name="q">*:*</str> | |
<str name="sort">recent_popularity desc</str> | |
</lst> | |
<lst> | |
<str name="q">*:*</str> | |
<str name="sort">popularity desc</str> | |
</lst> | |
<lst> | |
<str name="q">*:*</str> | |
<str name="sort">id desc</str> | |
</lst> | |
</arr> | |
</listener> | |
<useColdSearcher>true</useColdSearcher> | |
<maxWarmingSearchers>5</maxWarmingSearchers> | |
</query> | |
<requestDispatcher handleSelect="true" > | |
<requestParsers enableRemoteStreaming="true" | |
multipartUploadLimitInKB="2048000" /> | |
<httpCaching never304="true" /> | |
</requestDispatcher> | |
<requestHandler name="/dataimport" class="org.apache.solr.handler.dataimport.DataImportHandler"> | |
<lst name="defaults"> | |
<str name="config">data_config.xml</str> | |
</lst> | |
</requestHandler> | |
<requestHandler name="/dataimport_local" class="org.apache.solr.handler.dataimport.DataImportHandler"> | |
<lst name="defaults"> | |
<str name="config">data_config_local.xml</str> | |
</lst> | |
</requestHandler> | |
<requestHandler name="search" class="solr.SearchHandler" default="true"> | |
<lst name="defaults"> | |
<str name="echoParams">explicit</str> | |
<int name="rows">10</int> | |
</lst> | |
<arr name="components"> | |
<str>query</str> | |
<str>facet</str> | |
<str>stats</str> | |
<str>debug</str> | |
</arr> | |
</requestHandler> | |
<requestHandler name="/update" | |
class="solr.XmlUpdateRequestHandler"> | |
<!-- See below for information on defining | |
updateRequestProcessorChains that can be used by name | |
on each Update Request | |
--> | |
<!-- | |
<lst name="defaults"> | |
<str name="update.processor">dedupe</str> | |
</lst> | |
--> | |
</requestHandler> | |
<!-- Field Analysis Request Handler | |
RequestHandler that provides much the same functionality as | |
analysis.jsp. Provides the ability to specify multiple field | |
types and field names in the same request and outputs | |
index-time and query-time analysis for each of them. | |
Request parameters are: | |
analysis.fieldname - field name whose analyzers are to be used | |
analysis.fieldtype - field type whose analyzers are to be used | |
analysis.fieldvalue - text for index-time analysis | |
q (or analysis.q) - text for query time analysis | |
analysis.showmatch (true|false) - When set to true and when | |
query analysis is performed, the produced tokens of the | |
field value analysis will be marked as "matched" for every | |
token that is produces by the query analysis | |
--> | |
<requestHandler name="/analysis/field" | |
startup="lazy" | |
class="solr.FieldAnalysisRequestHandler" /> | |
<!-- Document Analysis Handler | |
http://wiki.apache.org/solr/AnalysisRequestHandler | |
An analysis handler that provides a breakdown of the analysis | |
process of provided docuemnts. This handler expects a (single) | |
content stream with the following format: | |
<docs> | |
<doc> | |
<field name="id">1</field> | |
<field name="name">The Name</field> | |
<field name="text">The Text Value</field> | |
</doc> | |
<doc>...</doc> | |
<doc>...</doc> | |
... | |
</docs> | |
Note: Each document must contain a field which serves as the | |
unique key. This key is used in the returned response to assoicate | |
ananalysis breakdown to the analyzed document. | |
Like the FieldAnalysisRequestHandler, this handler also supports | |
query analysis by sending either an "analysis.query" or "q" | |
request paraemter that holds the query text to be analyized. It | |
also supports the "analysis.showmatch" parameter which when set to | |
true, all field tokens that match the query tokens will be marked | |
as a "match". | |
--> | |
<requestHandler name="/analysis/document" | |
class="solr.DocumentAnalysisRequestHandler" | |
startup="lazy" /> | |
<!-- Admin Handlers | |
Admin Handlers - This will register all the standard admin | |
RequestHandlers. | |
--> | |
<requestHandler name="/admin/" | |
class="solr.admin.AdminHandlers" /> | |
<!-- This single handler is equivilent to the following... --> | |
<!-- | |
<requestHandler name="/admin/luke" class="solr.admin.LukeRequestHandler" /> | |
<requestHandler name="/admin/system" class="solr.admin.SystemInfoHandler" /> | |
<requestHandler name="/admin/plugins" class="solr.admin.PluginInfoHandler" /> | |
<requestHandler name="/admin/threads" class="solr.admin.ThreadDumpHandler" /> | |
<requestHandler name="/admin/properties" class="solr.admin.PropertiesRequestHandler" /> | |
<requestHandler name="/admin/file" class="solr.admin.ShowFileRequestHandler" > | |
--> | |
<!-- If you wish to hide files under ${solr.home}/conf, explicitly | |
register the ShowFileRequestHandler using: | |
--> | |
<!-- | |
<requestHandler name="/admin/file" | |
class="solr.admin.ShowFileRequestHandler" > | |
<lst name="invariants"> | |
<str name="hidden">synonyms.txt</str> | |
<str name="hidden">anotherfile.txt</str> | |
</lst> | |
</requestHandler> | |
--> | |
<!-- ping/healthcheck --> | |
<requestHandler name="/admin/ping" class="solr.PingRequestHandler"> | |
<lst name="defaults"> | |
<str name="qt">search</str> | |
<str name="q">solrpingquery</str> | |
<str name="echoParams">all</str> | |
</lst> | |
</requestHandler> | |
<!-- Echo the request contents back to the client --> | |
<requestHandler name="/debug/dump" class="solr.DumpRequestHandler" > | |
<lst name="defaults"> | |
<str name="echoParams">explicit</str> | |
<str name="echoHandler">true</str> | |
</lst> | |
</requestHandler> | |
<!-- Solr Replication | |
The SolrReplicationHandler supports replicating indexes from a | |
"master" used for indexing and "salves" used for queries. | |
http://wiki.apache.org/solr/SolrReplication | |
In the example below, remove the <lst name="master"> section if | |
this is just a slave and remove the <lst name="slave"> section | |
if this is just a master. | |
--> | |
<!-- | |
<requestHandler name="/replication" class="solr.ReplicationHandler" > | |
<lst name="master"> | |
<str name="replicateAfter">commit</str> | |
<str name="replicateAfter">startup</str> | |
<str name="confFiles">schema.xml,stopwords.txt</str> | |
</lst> | |
<lst name="slave"> | |
<str name="masterUrl">http://localhost:8983/solr/replication</str> | |
<str name="pollInterval">00:00:60</str> | |
</lst> | |
</requestHandler> | |
--> | |
<!-- Search Components | |
Search components are registered to SolrCore and used by | |
instances of SearchHandler (which can access them by name) | |
By default, the following components are avaliable: | |
<searchComponent name="query" class="solr.QueryComponent" /> | |
<searchComponent name="facet" class="solr.FacetComponent" /> | |
<searchComponent name="mlt" class="solr.MoreLikeThisComponent" /> | |
<searchComponent name="highlight" class="solr.HighlightComponent" /> | |
<searchComponent name="stats" class="solr.StatsComponent" /> | |
<searchComponent name="debug" class="solr.DebugComponent" /> | |
Default configuration in a requestHandler would look like: | |
<arr name="components"> | |
<str>query</str> | |
<str>facet</str> | |
<str>mlt</str> | |
<str>highlight</str> | |
<str>stats</str> | |
<str>debug</str> | |
</arr> | |
If you register a searchComponent to one of the standard names, | |
that will be used instead of the default. | |
To insert components before or after the 'standard' components, use: | |
<arr name="first-components"> | |
<str>myFirstComponentName</str> | |
</arr> | |
<arr name="last-components"> | |
<str>myLastComponentName</str> | |
</arr> | |
NOTE: The component registered with the name "debug" will | |
always be executed after the "last-components" | |
--> | |
<!-- Terms Component | |
http://wiki.apache.org/solr/TermsComponent | |
A component to return terms and document frequency of those | |
terms | |
--> | |
<searchComponent name="terms" class="solr.TermsComponent"/> | |
<!-- A request handler for demonstrating the terms component --> | |
<requestHandler name="/terms" class="solr.SearchHandler" startup="lazy"> | |
<lst name="defaults"> | |
<bool name="terms">true</bool> | |
</lst> | |
<arr name="components"> | |
<str>terms</str> | |
</arr> | |
</requestHandler> | |
<!-- Update Processors | |
Chains of Update Processor Factories for dealing with Update | |
Requests can be declared, and then used by name in Update | |
Request Processors | |
http://wiki.apache.org/solr/UpdateRequestProcessor | |
--> | |
<!-- Deduplication | |
An example dedup update processor that creates the "id" field | |
on the fly based on the hash code of some other fields. This | |
example has overwriteDupes set to false since we are using the | |
id field as the signatureField and Solr will maintain | |
uniqueness based on that anyway. | |
--> | |
<!-- | |
<updateRequestProcessorChain name="dedupe"> | |
<processor class="solr.processor.SignatureUpdateProcessorFactory"> | |
<bool name="enabled">true</bool> | |
<str name="signatureField">id</str> | |
<bool name="overwriteDupes">false</bool> | |
<str name="fields">name,features,cat</str> | |
<str name="signatureClass">solr.processor.Lookup3Signature</str> | |
</processor> | |
<processor class="solr.LogUpdateProcessorFactory" /> | |
<processor class="solr.RunUpdateProcessorFactory" /> | |
</updateRequestProcessorChain> | |
--> | |
<!-- Response Writers | |
http://wiki.apache.org/solr/QueryResponseWriter | |
Request responses will be written using the writer specified by | |
the 'wt' request parameter matching the name of a registered | |
writer. | |
The "default" writer is the default and will be used if 'wt' is | |
not specified in the request. | |
--> | |
<!-- The following response writers are implicitly configured unless | |
overridden... | |
--> | |
<!-- | |
<queryResponseWriter name="xml" | |
default="true" | |
class="solr.XMLResponseWriter" /> | |
<queryResponseWriter name="json" class="solr.JSONResponseWriter"/> | |
<queryResponseWriter name="python" class="solr.PythonResponseWriter"/> | |
<queryResponseWriter name="ruby" class="solr.RubyResponseWriter"/> | |
<queryResponseWriter name="php" class="solr.PHPResponseWriter"/> | |
<queryResponseWriter name="phps" class="solr.PHPSerializedResponseWriter"/> | |
<queryResponseWriter name="velocity" class="solr.VelocityResponseWriter"/> | |
<queryResponseWriter name="csv" class="solr.CSVResponseWriter"/> | |
--> | |
<!-- | |
Custom response writers can be declared as needed... | |
--> | |
<!-- | |
<queryResponseWriter name="custom" class="com.example.MyResponseWriter"/> | |
--> | |
<!-- XSLT response writer transforms the XML output by any xslt file found | |
in Solr's conf/xslt directory. Changes to xslt files are checked for | |
every xsltCacheLifetimeSeconds. | |
--> | |
<queryResponseWriter name="xslt" class="solr.XSLTResponseWriter"> | |
<int name="xsltCacheLifetimeSeconds">5</int> | |
</queryResponseWriter> | |
<!-- Query Parsers | |
http://wiki.apache.org/solr/SolrQuerySyntax | |
Multiple QParserPlugins can be registered by name, and then | |
used in either the "defType" param for the QueryComponent (used | |
by SearchHandler) or in LocalParams | |
--> | |
<!-- example of registering a query parser --> | |
<!-- | |
<queryParser name="myparser" class="com.mycompany.MyQParserPlugin"/> | |
--> | |
<!-- Function Parsers | |
http://wiki.apache.org/solr/FunctionQuery | |
Multiple ValueSourceParsers can be registered by name, and then | |
used as function names when using the "func" QParser. | |
--> | |
<!-- example of registering a custom function parser --> | |
<!-- | |
<valueSourceParser name="myfunc" | |
class="com.mycompany.MyValueSourceParser" /> | |
--> | |
<admin> | |
<defaultQuery>*:*</defaultQuery> | |
</admin> | |
</config> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?xml version="1.0" encoding="UTF-8" ?> | |
<schema name="fashiolista_tags" version="0.1"> | |
<types> | |
<fieldType name="sint" class="solr.SortableIntField" sortMissingLast="true" omitNorms="true" multiValued="false" /> | |
<fieldType name="string" class="solr.StrField" sortMissingLast="true" omitNorms="true" multiValued="false"/> | |
</types> | |
<fields> | |
<field name="id" type="sint" indexed="true" stored="true" required="true" /> | |
<field name="tag" type="string" indexed="true" stored="true" multiValued="true"/> | |
<field name="popularity" type="sint" indexed="true" stored="true" required="true" /> | |
<field name="recent_popularity" type="sint" indexed="true" stored="true" required="true" /> | |
</fields> | |
<uniqueKey>id</uniqueKey> | |
<defaultSearchField>tag</defaultSearchField> | |
<solrQueryParser defaultOperator="AND"/> | |
</schema> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment