Created
June 20, 2011 16:30
-
-
Save dougmorato/1035944 to your computer and use it in GitHub Desktop.
Sample sphinx.conf file to index Tryton product name and description
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# | |
# Sphinx configuration file sample | |
# | |
# Please refer to doc/sphinx.html for details. | |
# | |
############################################################################# | |
## data source definition | |
############################################################################# | |
source product_product | |
{ | |
# data source type. mandatory, no default value | |
# known types are mysql, pgsql, mssql, xmlpipe, xmlpipe2, odbc | |
type = pgsql | |
##################################################################### | |
## SQL settings (for 'mysql' and 'pgsql' types) | |
##################################################################### | |
# some straightforward parameters for SQL source types | |
sql_host = PUT_THE_IP_ADDRESS_OR_FQDN_OF_YOUR_DATABASE_SERVER | |
sql_user = PUT_YOUR_DATABASE_USERNAME | |
sql_pass = PUT_THE_PASSWORD_FOR_THE_DATABASE_USER | |
sql_db = PUT_THE_DATABASE_NAME | |
sql_port = 5432 #default is 3306 | |
# main document fetch query | |
# mandatory, integer document ID field MUST be the first selected column | |
sql_query = \ | |
SELECT \ | |
product.id AS id, \ | |
template.name AS name, \ | |
product.description AS description, \ | |
FROM product_product AS product \ | |
JOIN product_template AS template ON template.id = product.template \ | |
WHERE \ | |
product.id >= $start and product.id <= $end \ | |
# range query setup, query that must return min and max ID values | |
# optional, default is empty | |
# | |
# sql_query will need to reference $start and $end boundaries | |
# if using ranged query: | |
# | |
# sql_query = \ | |
# SELECT doc.id, doc.id AS group, doc.title, doc.data \ | |
# FROM documents doc \ | |
# WHERE id>=$start AND id<=$end | |
# | |
sql_query_range = SELECT MIN(id),MAX(id) FROM product_product | |
# range query step | |
# optional, default is 1024 | |
# | |
sql_range_step = 1000 | |
# combined field plus attribute declaration (from a single column) | |
# stores column as an attribute, but also indexes it as a full-text field | |
# | |
sql_field_string = name | |
sql_field_string = description | |
# ranged query throttling, in milliseconds | |
# optional, default is 0 which means no delay | |
# enforces given delay before each query step | |
sql_ranged_throttle = 0 | |
# document info query, ONLY for CLI search (ie. testing and debugging) | |
# optional, default is empty | |
# must contain $id macro and must fetch the document by that id | |
sql_query_info = SELECT * FROM documents WHERE id=$id | |
} | |
# inherited source | |
# | |
# all the parameters are copied from the parent source, | |
# and may then be overridden in this source definition | |
source product_product_pt : product_product | |
{ | |
sql_query = \ | |
SELECT\ | |
"product"."id" AS id, get_template_translation(template.id, 'name', template.name, 'pt_BR') AS name, get_product_translation(product.id, 'description', product.description, 'pt_BR') AS description\ | |
FROM product_product AS product\ | |
JOIN product_template AS template ON template.id = product.template\ | |
WHERE\ | |
product.id >= $start and product.id <= $end\ | |
} | |
############################################################################# | |
## index definition | |
############################################################################# | |
# local index example | |
# | |
# this is an index which is stored locally in the filesystem | |
# | |
# all indexing-time options (such as morphology and charsets) | |
# are configured per local index | |
index product_product | |
{ | |
# document source(s) to index | |
# multi-value, mandatory | |
# document IDs must be globally unique across all sources | |
source = product_product | |
# index files path and file name, without extension | |
# mandatory, path must be writable, extensions will be auto-appended | |
path = /var/data/product_product | |
# document attribute values (docinfo) storage mode | |
# optional, default is 'extern' | |
# known values are 'none', 'extern' and 'inline' | |
docinfo = extern | |
# memory locking for cached data (.spa and .spi), to prevent swapping | |
# optional, default is 0 (do not mlock) | |
# requires searchd to be run from root | |
mlock = 0 | |
# a list of morphology preprocessors to apply | |
# optional, default is empty | |
# | |
# builtin preprocessors are 'none', 'stem_en', 'stem_ru', 'stem_enru', | |
# 'soundex', and 'metaphone'; additional preprocessors available from | |
# libstemmer are 'libstemmer_XXX', where XXX is algorithm code | |
# (see libstemmer_c/libstemmer/modules.txt) | |
# | |
# morphology = stem_en, stem_ru, soundex | |
# morphology = libstemmer_german | |
# morphology = libstemmer_sv | |
morphology = none | |
# minimum indexed word length | |
# default is 1 (index everything) | |
min_word_len = 1 | |
# charset encoding type | |
# optional, default is 'sbcs' | |
# known types are 'sbcs' (Single Byte CharSet) and 'utf-8' | |
charset_type = utf-8 | |
# whether to strip HTML tags from incoming documents | |
# known values are 0 (do not strip) and 1 (do strip) | |
# optional, default is 0 | |
html_strip = 1 | |
} | |
# inherited index | |
# | |
# all the parameters are copied from the parent index, | |
# and may then be overridden in this index definition | |
index product_product_en : product_product | |
{ | |
path = /var/data/product_product_en | |
morphology = stem_en | |
} | |
index product_product_pt : product_product | |
{ | |
source = product_product_pt | |
path = /var/data/product_product_pt | |
morphology = libstemmer_pt | |
} | |
############################################################################# | |
## indexer settings | |
############################################################################# | |
indexer | |
{ | |
# memory limit, in bytes, kiloytes (16384K) or megabytes (256M) | |
# optional, default is 32M, max is 2047M, recommended is 256M to 1024M | |
mem_limit = 2047M | |
} | |
############################################################################# | |
## searchd settings | |
############################################################################# | |
searchd | |
{ | |
# [hostname:]port[:protocol], or /unix/socket/path to listen on | |
# known protocols are 'sphinx' (SphinxAPI) and 'mysql41' (SphinxQL) | |
# | |
# multi-value, multiple listen points are allowed | |
# optional, defaults are 9312:sphinx and 9306:mysql41, as below | |
# | |
# listen = 127.0.0.1 | |
# listen = 192.168.0.1:9312 | |
# listen = 9312 | |
# listen = /var/run/searchd.sock | |
listen = PUT_PUBLIC_OR_PRIVATE_IP_ADDRESS_OF_THE_SEARCHD_SERVER:9312 | |
listen = PUT_PUBLIC_OR_PRIVATE_IP_ADDRESS_OF_THE_SEARCHD_SERVER:9306:mysql41 | |
# log file, searchd run info is logged here | |
# optional, default is 'searchd.log' | |
log = /var/log/sphinx/searchd.log | |
# query log file, all search queries are logged here | |
# optional, default is empty (do not log queries) | |
query_log = /var/log/sphinx/query.log | |
# client read timeout, seconds | |
# optional, default is 5 | |
read_timeout = 5 | |
# request timeout, seconds | |
# optional, default is 5 minutes | |
client_timeout = 300 | |
# maximum amount of children to fork (concurrent searches to run) | |
# optional, default is 0 (unlimited) | |
max_children = 30 | |
# PID file, searchd process ID file name | |
# mandatory | |
pid_file = /var/run/searchd.pid | |
# max amount of matches the daemon ever keeps in RAM, per-index | |
# WARNING, THERE'S ALSO PER-QUERY LIMIT, SEE SetLimits() API CALL | |
# default is 1000 (just like Google) | |
max_matches = 1000 | |
# seamless rotate, prevents rotate stalls if precaching huge datasets | |
# optional, default is 1 | |
seamless_rotate = 1 | |
# whether to forcibly preopen all indexes on startup | |
# optional, default is 1 (preopen everything) | |
preopen_indexes = 1 | |
# whether to unlink .old index copies on succesful rotation. | |
# optional, default is 1 (do unlink) | |
unlink_old = 1 | |
# MVA updates pool size | |
# shared between all instances of searchd, disables attr flushes! | |
# optional, default size is 1M | |
mva_updates_pool = 1M | |
# max allowed network packet size | |
# limits both query packets from clients, and responses from agents | |
# optional, default size is 8M | |
max_packet_size = 8M | |
# max allowed per-query filter count | |
# optional, default is 256 | |
max_filters = 256 | |
# max allowed per-filter values count | |
# optional, default is 4096 | |
max_filter_values = 4096 | |
# max allowed per-batch query count (aka multi-query count) | |
# optional, default is 32 | |
max_batch_queries = 32 | |
# multi-processing mode (MPM) | |
# known values are none, fork, prefork, and threads | |
# optional, default is fork | |
# | |
workers = threads # for RT to work | |
} | |
# --eof-- |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment