Skip to content

Instantly share code, notes, and snippets.

@dougmorato
Created June 20, 2011 16:30
Show Gist options
  • Save dougmorato/1035944 to your computer and use it in GitHub Desktop.
Save dougmorato/1035944 to your computer and use it in GitHub Desktop.
Sample sphinx.conf file to index Tryton product name and description
#
# Sphinx configuration file sample
#
# Please refer to doc/sphinx.html for details.
#
#############################################################################
## data source definition
#############################################################################
source product_product
{
# data source type. mandatory, no default value
# known types are mysql, pgsql, mssql, xmlpipe, xmlpipe2, odbc
type = pgsql
#####################################################################
## SQL settings (for 'mysql' and 'pgsql' types)
#####################################################################
# some straightforward parameters for SQL source types
sql_host = PUT_THE_IP_ADDRESS_OR_FQDN_OF_YOUR_DATABASE_SERVER
sql_user = PUT_YOUR_DATABASE_USERNAME
sql_pass = PUT_THE_PASSWORD_FOR_THE_DATABASE_USER
sql_db = PUT_THE_DATABASE_NAME
sql_port = 5432 #default is 3306
# main document fetch query
# mandatory, integer document ID field MUST be the first selected column
sql_query = \
SELECT \
product.id AS id, \
template.name AS name, \
product.description AS description, \
FROM product_product AS product \
JOIN product_template AS template ON template.id = product.template \
WHERE \
product.id >= $start and product.id <= $end \
# range query setup, query that must return min and max ID values
# optional, default is empty
#
# sql_query will need to reference $start and $end boundaries
# if using ranged query:
#
# sql_query = \
# SELECT doc.id, doc.id AS group, doc.title, doc.data \
# FROM documents doc \
# WHERE id>=$start AND id<=$end
#
sql_query_range = SELECT MIN(id),MAX(id) FROM product_product
# range query step
# optional, default is 1024
#
sql_range_step = 1000
# combined field plus attribute declaration (from a single column)
# stores column as an attribute, but also indexes it as a full-text field
#
sql_field_string = name
sql_field_string = description
# ranged query throttling, in milliseconds
# optional, default is 0 which means no delay
# enforces given delay before each query step
sql_ranged_throttle = 0
# document info query, ONLY for CLI search (ie. testing and debugging)
# optional, default is empty
# must contain $id macro and must fetch the document by that id
sql_query_info = SELECT * FROM documents WHERE id=$id
}
# inherited source
#
# all the parameters are copied from the parent source,
# and may then be overridden in this source definition
source product_product_pt : product_product
{
sql_query = \
SELECT\
"product"."id" AS id, get_template_translation(template.id, 'name', template.name, 'pt_BR') AS name, get_product_translation(product.id, 'description', product.description, 'pt_BR') AS description\
FROM product_product AS product\
JOIN product_template AS template ON template.id = product.template\
WHERE\
product.id >= $start and product.id <= $end\
}
#############################################################################
## index definition
#############################################################################
# local index example
#
# this is an index which is stored locally in the filesystem
#
# all indexing-time options (such as morphology and charsets)
# are configured per local index
index product_product
{
# document source(s) to index
# multi-value, mandatory
# document IDs must be globally unique across all sources
source = product_product
# index files path and file name, without extension
# mandatory, path must be writable, extensions will be auto-appended
path = /var/data/product_product
# document attribute values (docinfo) storage mode
# optional, default is 'extern'
# known values are 'none', 'extern' and 'inline'
docinfo = extern
# memory locking for cached data (.spa and .spi), to prevent swapping
# optional, default is 0 (do not mlock)
# requires searchd to be run from root
mlock = 0
# a list of morphology preprocessors to apply
# optional, default is empty
#
# builtin preprocessors are 'none', 'stem_en', 'stem_ru', 'stem_enru',
# 'soundex', and 'metaphone'; additional preprocessors available from
# libstemmer are 'libstemmer_XXX', where XXX is algorithm code
# (see libstemmer_c/libstemmer/modules.txt)
#
# morphology = stem_en, stem_ru, soundex
# morphology = libstemmer_german
# morphology = libstemmer_sv
morphology = none
# minimum indexed word length
# default is 1 (index everything)
min_word_len = 1
# charset encoding type
# optional, default is 'sbcs'
# known types are 'sbcs' (Single Byte CharSet) and 'utf-8'
charset_type = utf-8
# whether to strip HTML tags from incoming documents
# known values are 0 (do not strip) and 1 (do strip)
# optional, default is 0
html_strip = 1
}
# inherited index
#
# all the parameters are copied from the parent index,
# and may then be overridden in this index definition
index product_product_en : product_product
{
path = /var/data/product_product_en
morphology = stem_en
}
index product_product_pt : product_product
{
source = product_product_pt
path = /var/data/product_product_pt
morphology = libstemmer_pt
}
#############################################################################
## indexer settings
#############################################################################
indexer
{
# memory limit, in bytes, kiloytes (16384K) or megabytes (256M)
# optional, default is 32M, max is 2047M, recommended is 256M to 1024M
mem_limit = 2047M
}
#############################################################################
## searchd settings
#############################################################################
searchd
{
# [hostname:]port[:protocol], or /unix/socket/path to listen on
# known protocols are 'sphinx' (SphinxAPI) and 'mysql41' (SphinxQL)
#
# multi-value, multiple listen points are allowed
# optional, defaults are 9312:sphinx and 9306:mysql41, as below
#
# listen = 127.0.0.1
# listen = 192.168.0.1:9312
# listen = 9312
# listen = /var/run/searchd.sock
listen = PUT_PUBLIC_OR_PRIVATE_IP_ADDRESS_OF_THE_SEARCHD_SERVER:9312
listen = PUT_PUBLIC_OR_PRIVATE_IP_ADDRESS_OF_THE_SEARCHD_SERVER:9306:mysql41
# log file, searchd run info is logged here
# optional, default is 'searchd.log'
log = /var/log/sphinx/searchd.log
# query log file, all search queries are logged here
# optional, default is empty (do not log queries)
query_log = /var/log/sphinx/query.log
# client read timeout, seconds
# optional, default is 5
read_timeout = 5
# request timeout, seconds
# optional, default is 5 minutes
client_timeout = 300
# maximum amount of children to fork (concurrent searches to run)
# optional, default is 0 (unlimited)
max_children = 30
# PID file, searchd process ID file name
# mandatory
pid_file = /var/run/searchd.pid
# max amount of matches the daemon ever keeps in RAM, per-index
# WARNING, THERE'S ALSO PER-QUERY LIMIT, SEE SetLimits() API CALL
# default is 1000 (just like Google)
max_matches = 1000
# seamless rotate, prevents rotate stalls if precaching huge datasets
# optional, default is 1
seamless_rotate = 1
# whether to forcibly preopen all indexes on startup
# optional, default is 1 (preopen everything)
preopen_indexes = 1
# whether to unlink .old index copies on succesful rotation.
# optional, default is 1 (do unlink)
unlink_old = 1
# MVA updates pool size
# shared between all instances of searchd, disables attr flushes!
# optional, default size is 1M
mva_updates_pool = 1M
# max allowed network packet size
# limits both query packets from clients, and responses from agents
# optional, default size is 8M
max_packet_size = 8M
# max allowed per-query filter count
# optional, default is 256
max_filters = 256
# max allowed per-filter values count
# optional, default is 4096
max_filter_values = 4096
# max allowed per-batch query count (aka multi-query count)
# optional, default is 32
max_batch_queries = 32
# multi-processing mode (MPM)
# known values are none, fork, prefork, and threads
# optional, default is fork
#
workers = threads # for RT to work
}
# --eof--
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment