Skip to content

Instantly share code, notes, and snippets.

@johnwilliams
Created March 8, 2010 23:09
Show Gist options
  • Save johnwilliams/325914 to your computer and use it in GitHub Desktop.
Save johnwilliams/325914 to your computer and use it in GitHub Desktop.
<?xml version="1.0" ?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<config>
<!-- Used to specify an alternate directory to hold all index data
other than the default ./data under the Solr home.
If replication is in use, this should match the replication configuration. -->
<!--
<dataDir>./solr/data</dataDir>
-->
<indexDefaults>
<!-- Values here affect all index writers and act as a default unless overridden. -->
<useCompoundFile>false</useCompoundFile>
<mergeFactor>20</mergeFactor>
<maxBufferedDocs>1000</maxBufferedDocs>
<maxMergeDocs>2147483647</maxMergeDocs>
<maxFieldLength>10000</maxFieldLength>
<writeLockTimeout>1000</writeLockTimeout>
<commitLockTimeout>10000</commitLockTimeout>
<lockType>single</lockType>
</indexDefaults>
<mainIndex>
<!-- options specific to the main on-disk lucene index -->
<useCompoundFile>false</useCompoundFile>
<mergeFactor>20</mergeFactor>
<maxBufferedDocs>1000</maxBufferedDocs>
<maxMergeDocs>2147483647</maxMergeDocs>
<maxFieldLength>10000</maxFieldLength>
<!-- If true, unlock any held write or commit locks on startup.
This defeats the locking mechanism that allows multiple
processes to safely access a lucene index, and should be
used with care. -->
<unlockOnStartup>true</unlockOnStartup>
</mainIndex>
<!-- the default high-performance update handler -->
<updateHandler class="solr.DirectUpdateHandler2">
<!-- A prefix of "solr." for class names is an alias that
causes solr to search appropriate packages, including
org.apache.solr.(search|update|request|core|analysis)
-->
<!-- Limit number of buffered deletions to help bound memory during indexing -->
<maxPendingDeletes>10000</maxPendingDeletes>
<!-- autocommit pending docs if certain criteria are met
maxDocs - number of updates since last commit
maxTime - number of milliseconds since oldest uncommitted update
<autoCommit>
<maxDocs>10000</maxDocs>
<maxTime>600000</maxTime>
</autoCommit>
-->
</updateHandler>
<query>
<!-- Maximum number of clauses in a boolean query... can affect
range or prefix queries that expand to big boolean
queries. An exception is thrown if exceeded. -->
<maxBooleanClauses>16384</maxBooleanClauses>
<!-- Cache used by SolrIndexSearcher for filters (DocSets),
unordered sets of *all* documents that match a query.
When a new searcher is opened, its caches may be prepopulated
or "autowarmed" using data from caches in the old searcher.
autowarmCount is the number of items to prepopulate. For LRUCache,
the autowarmed items will be the most recently accessed items.
Parameters:
class - the SolrCache implementation (currently only LRUCache)
size - the maximum number of entries in the cache
initialSize - the initial capacity (number of entries) of
the cache. (seel java.util.HashMap)
autowarmCount - the number of entries to prepopulate from
and old cache.
-->
<filterCache
class="solr.LRUCache"
size="256"
initialSize="256"
autowarmCount="0"/>
<!-- queryResultCache caches results of searches - ordered lists of
document ids (DocList) based on a query, a sort, and the range
of documents requested. -->
<queryResultCache
class="solr.LRUCache"
size="256"
initialSize="256"
autowarmCount="0"/>
<!-- documentCache caches Lucene Document objects (the stored fields for each document).
Since Lucene internal document ids are transient, this cache will not be autowarmed. -->
<documentCache
class="solr.LRUCache"
size="256"
initialSize="256"
autowarmCount="0"/>
<!-- If true, stored fields that are not requested will be loaded lazily.
-->
<enableLazyFieldLoading>true</enableLazyFieldLoading>
<!-- Example of a generic cache. These caches may be accessed by name
through SolrIndexSearcher.getCache(),cacheLookup(), and cacheInsert().
The purpose is to enable easy caching of user/application level data.
The regenerator argument should be specified as an implementation
of solr.search.CacheRegenerator if autowarming is desired. -->
<!--
<cache name="myUserCache"
class="solr.LRUCache"
size="4096"
initialSize="1024"
autowarmCount="1024"
regenerator="org.mycompany.mypackage.MyRegenerator"
/>
-->
<!-- An optimization that attempts to use a filter to satisfy a search.
If the requested sort does not include score, then the filterCache
will be checked for a filter matching the query. If found, the filter
will be used as the source of document ids, and then the sort will be
applied to that.
<useFilterForSortedQuery>true</useFilterForSortedQuery>
-->
<!-- An optimization for use with the queryResultCache. When a search
is requested, a superset of the requested number of document ids
are collected. For example, if a search for a particular query
requests matching documents 10 through 19, and queryWindowSize is 50,
then documents 0 through 50 will be collected and cached. Any further
requests in that range can be satisfied via the cache. -->
<queryResultWindowSize>10</queryResultWindowSize>
<!-- This entry enables an int hash representation for filters (DocSets)
when the number of items in the set is less than maxSize. For smaller
sets, this representation is more memory efficient, more efficient to
iterate over, and faster to take intersections. -->
<HashDocSet maxSize="3000" loadFactor="0.75"/>
<!-- boolToFilterOptimizer converts boolean clauses with zero boost
into cached filters if the number of docs selected by the clause exceeds
the threshold (represented as a fraction of the total index) -->
<boolTofilterOptimizer enabled="true" cacheSize="32" threshold=".05"/>
<!-- a newSearcher event is fired whenever a new searcher is being prepared
and there is a current searcher handling requests (aka registered). -->
<!-- QuerySenderListener takes an array of NamedList and executes a
local query request for each NamedList in sequence. -->
<!--
<listener event="newSearcher" class="solr.QuerySenderListener">
<arr name="queries">
<lst> <str name="q">solr</str> <str name="start">0</str> <str name="rows">10</str> </lst>
<lst> <str name="q">rocks</str> <str name="start">0</str> <str name="rows">10</str> </lst>
</arr>
</listener>
-->
<!-- a firstSearcher event is fired whenever a new searcher is being
prepared but there is no current registered searcher to handle
requests or to gain autowarming data from. -->
<!--
<listener event="firstSearcher" class="solr.QuerySenderListener">
<arr name="queries">
<lst> <str name="q">fast_warm</str> <str name="start">0</str> <str name="rows">10</str> </lst>
</arr>
</listener>
-->
<!-- If a search request comes in and there is no current registered searcher,
then immediately register the still warming searcher and use it. If
"false" then all requests will block until the first searcher is done
warming. -->
<useColdSearcher>true</useColdSearcher>
</query>
<!--
Let the dispatch filter handler /select?qt=XXX
handleSelect=true will use consistent error handling for /select and /update
handleSelect=false will use solr1.1 style error formatting
-->
<requestDispatcher handleSelect="false" >
<!--Make sure your system has some authentication before enabling remote streaming! -->
<requestParsers enableRemoteStreaming="false" multipartUploadLimitInKB="131072" />
</requestDispatcher>
<!-- requestHandler plugins... incoming queries will be dispatched to the
correct handler based on the qt (query type) param matching the
name of registered handlers.
The "standard" request handler is the default and will be used if qt
is not specified in the request.
-->
<requestHandler name="standard" class="solr.StandardRequestHandler">
<!-- default values for query parameters -->
<lst name="defaults">
<str name="echoParams">explicit</str>
<int name="rows">50</int>
<str name="fl">model_id model_class model_base_class score</str>
<str name="version">2.1</str>
</lst>
</requestHandler>
<!-- Update request handler.
Note: Since solr1.1 requestHandlers requires a valid content type header
if posted in the body. For example, curl now requires:
-H 'Content-type:text/xml; charset=utf-8'
The response format differs from solr1.1 formatting and returns a standard
error code. To enable solr1.1 behavior, remove the /update handler or
change its path
-->
<requestHandler name="/update" class="solr.XmlUpdateRequestHandler" />
<!-- CSV update handler, loaded on demand -->
<requestHandler name="/update/csv" class="solr.CSVRequestHandler" startup="lazy" />
<!-- Admin Handlers. TODO There could be a single handler that loads them all... -->
<requestHandler name="/admin/luke" class="org.apache.solr.handler.admin.LukeRequestHandler" />
<requestHandler name="/admin/system" class="org.apache.solr.handler.admin.SystemInfoHandler" />
<requestHandler name="/admin/plugins" class="org.apache.solr.handler.admin.PluginInfoHandler" />
<requestHandler name="/admin/threads" class="org.apache.solr.handler.admin.ThreadDumpHandler" />
<requestHandler name="/admin/properties" class="org.apache.solr.handler.admin.PropertiesRequestHandler" />
<!-- Echo the request contents back to the client -->
<requestHandler name="/debug/dump" class="solr.DumpRequestHandler" >
<lst name="defaults">
<str name="echoParams">explicit</str>
<!-- for all params (including the default etc) use: 'all' -->
<str name="echoHandler">true</str>
</lst>
</requestHandler>
<highlighting>
<!-- Configure the standard fragmenter -->
<!-- This could most likely be commented out in the "default" case -->
<fragmenter name="gap" class="org.apache.solr.highlight.GapFragmenter" default="true">
<lst name="defaults">
<int name="hl.fragsize">100</int>
</lst>
</fragmenter>
<!-- A regular-expression-based fragmenter (f.i., for sentence extraction) -->
<fragmenter name="regex" class="org.apache.solr.highlight.RegexFragmenter">
<lst name="defaults">
<!-- slightly smaller fragsizes work better because of slop -->
<int name="hl.fragsize">70</int>
<!-- allow 50% slop on fragment sizes -->
<float name="hl.regex.slop">0.5</float>
<!-- a basic sentence pattern -->
<str name="hl.regex.pattern">[-\w ,/\n\"']{20,200}</str>
</lst>
</fragmenter>
<!-- Configure the standard formatter -->
<formatter name="html" class="org.apache.solr.highlight.HtmlFormatter" default="true">
<lst name="defaults">
<str name="hl.simple.pre"><![CDATA[<strong class="searchterm">]]></str>
<str name="hl.simple.post"><![CDATA[</strong>]]></str>
</lst>
</formatter>
</highlighting>
<!-- queryResponseWriter plugins... query responses will be written using the
writer specified by the 'wt' request parameter matching the name of a registered
writer.
The "standard" writer is the default and will be used if 'wt' is not specified
in the request. XMLResponseWriter will be used if nothing is specified here.
The json, python, and ruby writers are also available by default.
<queryResponseWriter name="standard" class="org.apache.solr.request.XMLResponseWriter"/>
<queryResponseWriter name="json" class="org.apache.solr.request.JSONResponseWriter"/>
<queryResponseWriter name="python" class="org.apache.solr.request.PythonResponseWriter"/>
<queryResponseWriter name="ruby" class="org.apache.solr.request.RubyResponseWriter"/>
<queryResponseWriter name="custom" class="com.example.MyResponseWriter"/>
-->
<!-- XSLT response writer transforms the XML output by any xslt file found
in Solr's conf/xslt directory. Changes to xslt files are checked for
every xsltCacheLifetimeSeconds.
-->
<queryResponseWriter name="xslt" class="org.apache.solr.request.XSLTResponseWriter">
<int name="xsltCacheLifetimeSeconds">5</int>
</queryResponseWriter>
<!-- config for the admin interface -->
<admin>
<defaultQuery>solr</defaultQuery>
<gettableFiles>solrconfig.xml schema.xml admin-extra.html</gettableFiles>
<!-- pingQuery should be "URLish" ...
&amp; separated key=val pairs ... but there shouldn't be any
URL escaping of the values -->
<pingQuery>
q=solr
</pingQuery>
<!-- configure a healthcheck file for servers behind a loadbalancer
<healthcheck type="file">server-enabled</healthcheck>
-->
</admin>
</config>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment