dougmorato/sphinx.conf

## sphinx.conf
#
# Sphinx configuration file sample
#
# Please refer to doc/sphinx.html for details.
#

#############################################################################
## data source definition
#############################################################################

source product_product
{
	# data source type. mandatory, no default value
	# known types are mysql, pgsql, mssql, xmlpipe, xmlpipe2, odbc
	type			= pgsql

	#####################################################################
	## SQL settings (for 'mysql' and 'pgsql' types)
	#####################################################################

	# some straightforward parameters for SQL source types
	sql_host		= PUT_THE_IP_ADDRESS_OR_FQDN_OF_YOUR_DATABASE_SERVER
	sql_user		= PUT_YOUR_DATABASE_USERNAME
	sql_pass		= PUT_THE_PASSWORD_FOR_THE_DATABASE_USER
	sql_db			= PUT_THE_DATABASE_NAME
	sql_port		= 5432	#default is 3306

	# main document fetch query
	# mandatory, integer document ID field MUST be the first selected column
	sql_query		= \
		SELECT \
			product.id AS id, \
			template.name AS name, \
			product.description AS description, \
		FROM product_product AS product \
		JOIN product_template AS template ON template.id = product.template \
                WHERE \
			product.id >= $start and product.id <= $end \

	# range query setup, query that must return min and max ID values
	# optional, default is empty
	#
	# sql_query will need to reference $start and $end boundaries
	# if using ranged query:
	#
	# sql_query		= \
	#	SELECT doc.id, doc.id AS group, doc.title, doc.data \
	#	FROM documents doc \
	#	WHERE id>=$start AND id<=$end
	#
	sql_query_range		= SELECT MIN(id),MAX(id) FROM product_product


	# range query step
	# optional, default is 1024
	#
	sql_range_step		= 1000

	# combined field plus attribute declaration (from a single column)
	# stores column as an attribute, but also indexes it as a full-text field
	#
	sql_field_string	= name
    sql_field_string	= description

	# ranged query throttling, in milliseconds
	# optional, default is 0 which means no delay
	# enforces given delay before each query step
	sql_ranged_throttle	= 0

	# document info query, ONLY for CLI search (ie. testing and debugging)
	# optional, default is empty
	# must contain $id macro and must fetch the document by that id
	sql_query_info		= SELECT * FROM documents WHERE id=$id

}

# inherited source
#
# all the parameters are copied from the parent source,
# and may then be overridden in this source definition

source product_product_pt : product_product
{
	sql_query	= \
            SELECT\
                "product"."id" AS id, get_template_translation(template.id, 'name', template.name, 'pt_BR') AS name, get_product_translation(product.id, 'description', product.description, 'pt_BR') AS description\
	FROM product_product AS product\
	JOIN product_template AS template ON template.id = product.template\
	WHERE\
		product.id >= $start and product.id <= $end\
}

#############################################################################
## index definition
#############################################################################

# local index example
#
# this is an index which is stored locally in the filesystem
#
# all indexing-time options (such as morphology and charsets)
# are configured per local index
index product_product
{
	# document source(s) to index
	# multi-value, mandatory
	# document IDs must be globally unique across all sources
	source			= product_product

	# index files path and file name, without extension
	# mandatory, path must be writable, extensions will be auto-appended
	path			= /var/data/product_product

	# document attribute values (docinfo) storage mode
	# optional, default is 'extern'
	# known values are 'none', 'extern' and 'inline'
	docinfo			= extern

	# memory locking for cached data (.spa and .spi), to prevent swapping
	# optional, default is 0 (do not mlock)
	# requires searchd to be run from root
	mlock			= 0

	# a list of morphology preprocessors to apply
	# optional, default is empty
	#
	# builtin preprocessors are 'none', 'stem_en', 'stem_ru', 'stem_enru',
	# 'soundex', and 'metaphone'; additional preprocessors available from
	# libstemmer are 'libstemmer_XXX', where XXX is algorithm code
	# (see libstemmer_c/libstemmer/modules.txt)
	#
	# morphology		= stem_en, stem_ru, soundex
	# morphology		= libstemmer_german
	# morphology		= libstemmer_sv
	morphology		= none

	# minimum indexed word length
	# default is 1 (index everything)
	min_word_len		= 1

	# charset encoding type
	# optional, default is 'sbcs'
	# known types are 'sbcs' (Single Byte CharSet) and 'utf-8'
	charset_type		= utf-8

	# whether to strip HTML tags from incoming documents
	# known values are 0 (do not strip) and 1 (do strip)
	# optional, default is 0
	html_strip		= 1

}


# inherited index
#
# all the parameters are copied from the parent index,
# and may then be overridden in this index definition
index product_product_en : product_product
{
	path			= /var/data/product_product_en
	morphology		= stem_en
}

index product_product_pt : product_product
{
	source                  = product_product_pt
	path			= /var/data/product_product_pt
	morphology		= libstemmer_pt
}

#############################################################################
## indexer settings
#############################################################################

indexer
{
	# memory limit, in bytes, kiloytes (16384K) or megabytes (256M)
	# optional, default is 32M, max is 2047M, recommended is 256M to 1024M
	mem_limit		= 2047M

}

#############################################################################
## searchd settings
#############################################################################

searchd
{
	# [hostname:]port[:protocol], or /unix/socket/path to listen on
	# known protocols are 'sphinx' (SphinxAPI) and 'mysql41' (SphinxQL)
	#
	# multi-value, multiple listen points are allowed
	# optional, defaults are 9312:sphinx and 9306:mysql41, as below
	#
	# listen			= 127.0.0.1
	# listen			= 192.168.0.1:9312
	# listen			= 9312
	# listen			= /var/run/searchd.sock
	listen			= PUT_PUBLIC_OR_PRIVATE_IP_ADDRESS_OF_THE_SEARCHD_SERVER:9312
	listen			= PUT_PUBLIC_OR_PRIVATE_IP_ADDRESS_OF_THE_SEARCHD_SERVER:9306:mysql41

	# log file, searchd run info is logged here
	# optional, default is 'searchd.log'
	log			= /var/log/sphinx/searchd.log

	# query log file, all search queries are logged here
	# optional, default is empty (do not log queries)
	query_log		= /var/log/sphinx/query.log

	# client read timeout, seconds
	# optional, default is 5
	read_timeout		= 5

	# request timeout, seconds
	# optional, default is 5 minutes
	client_timeout		= 300

	# maximum amount of children to fork (concurrent searches to run)
	# optional, default is 0 (unlimited)
	max_children		= 30

	# PID file, searchd process ID file name
	# mandatory
	pid_file		= /var/run/searchd.pid

	# max amount of matches the daemon ever keeps in RAM, per-index
	# WARNING, THERE'S ALSO PER-QUERY LIMIT, SEE SetLimits() API CALL
	# default is 1000 (just like Google)
	max_matches		= 1000

	# seamless rotate, prevents rotate stalls if precaching huge datasets
	# optional, default is 1
	seamless_rotate		= 1

	# whether to forcibly preopen all indexes on startup
	# optional, default is 1 (preopen everything)
	preopen_indexes		= 1

	# whether to unlink .old index copies on succesful rotation.
	# optional, default is 1 (do unlink)
	unlink_old		= 1

	# MVA updates pool size
	# shared between all instances of searchd, disables attr flushes!
	# optional, default size is 1M
	mva_updates_pool	= 1M

	# max allowed network packet size
	# limits both query packets from clients, and responses from agents
	# optional, default size is 8M
	max_packet_size		= 8M

	# max allowed per-query filter count
	# optional, default is 256
	max_filters		= 256

	# max allowed per-filter values count
	# optional, default is 4096
	max_filter_values	= 4096

	# max allowed per-batch query count (aka multi-query count)
	# optional, default is 32
	max_batch_queries	= 32

	# multi-processing mode (MPM)
	# known values are none, fork, prefork, and threads
	# optional, default is fork
	#
	workers			= threads # for RT to work
}
# --eof--
	#
	# Sphinx configuration file sample
	#
	# Please refer to doc/sphinx.html for details.
	#

	#############################################################################
	## data source definition
	#############################################################################

	source product_product
	{
	# data source type. mandatory, no default value
	# known types are mysql, pgsql, mssql, xmlpipe, xmlpipe2, odbc
	type = pgsql

	#####################################################################
	## SQL settings (for 'mysql' and 'pgsql' types)
	#####################################################################

	# some straightforward parameters for SQL source types
	sql_host = PUT_THE_IP_ADDRESS_OR_FQDN_OF_YOUR_DATABASE_SERVER
	sql_user = PUT_YOUR_DATABASE_USERNAME
	sql_pass = PUT_THE_PASSWORD_FOR_THE_DATABASE_USER
	sql_db = PUT_THE_DATABASE_NAME
	sql_port = 5432 #default is 3306

	# main document fetch query
	# mandatory, integer document ID field MUST be the first selected column
	sql_query = \
	SELECT \
	product.id AS id, \
	template.name AS name, \
	product.description AS description, \
	FROM product_product AS product \
	JOIN product_template AS template ON template.id = product.template \
	WHERE \
	product.id >= $start and product.id <= $end \

	# range query setup, query that must return min and max ID values
	# optional, default is empty
	#
	# sql_query will need to reference $start and $end boundaries
	# if using ranged query:
	#
	# sql_query = \
	# SELECT doc.id, doc.id AS group, doc.title, doc.data \
	# FROM documents doc \
	# WHERE id>=$start AND id<=$end
	#
	sql_query_range = SELECT MIN(id),MAX(id) FROM product_product


	# range query step
	# optional, default is 1024
	#
	sql_range_step = 1000

	# combined field plus attribute declaration (from a single column)
	# stores column as an attribute, but also indexes it as a full-text field
	#
	sql_field_string = name
	sql_field_string = description

	# ranged query throttling, in milliseconds
	# optional, default is 0 which means no delay
	# enforces given delay before each query step
	sql_ranged_throttle = 0

	# document info query, ONLY for CLI search (ie. testing and debugging)
	# optional, default is empty
	# must contain $id macro and must fetch the document by that id
	sql_query_info = SELECT * FROM documents WHERE id=$id

	}

	# inherited source
	#
	# all the parameters are copied from the parent source,
	# and may then be overridden in this source definition

	source product_product_pt : product_product
	{
	sql_query = \
	SELECT\
	"product"."id" AS id, get_template_translation(template.id, 'name', template.name, 'pt_BR') AS name, get_product_translation(product.id, 'description', product.description, 'pt_BR') AS description\
	FROM product_product AS product\
	JOIN product_template AS template ON template.id = product.template\
	WHERE\
	product.id >= $start and product.id <= $end\
	}

	#############################################################################
	## index definition
	#############################################################################

	# local index example
	#
	# this is an index which is stored locally in the filesystem
	#
	# all indexing-time options (such as morphology and charsets)
	# are configured per local index
	index product_product
	{
	# document source(s) to index
	# multi-value, mandatory
	# document IDs must be globally unique across all sources
	source = product_product

	# index files path and file name, without extension
	# mandatory, path must be writable, extensions will be auto-appended
	path = /var/data/product_product

	# document attribute values (docinfo) storage mode
	# optional, default is 'extern'
	# known values are 'none', 'extern' and 'inline'
	docinfo = extern

	# memory locking for cached data (.spa and .spi), to prevent swapping
	# optional, default is 0 (do not mlock)
	# requires searchd to be run from root
	mlock = 0

	# a list of morphology preprocessors to apply
	# optional, default is empty
	#
	# builtin preprocessors are 'none', 'stem_en', 'stem_ru', 'stem_enru',
	# 'soundex', and 'metaphone'; additional preprocessors available from
	# libstemmer are 'libstemmer_XXX', where XXX is algorithm code
	# (see libstemmer_c/libstemmer/modules.txt)
	#
	# morphology = stem_en, stem_ru, soundex
	# morphology = libstemmer_german
	# morphology = libstemmer_sv
	morphology = none

	# minimum indexed word length
	# default is 1 (index everything)
	min_word_len = 1

	# charset encoding type
	# optional, default is 'sbcs'
	# known types are 'sbcs' (Single Byte CharSet) and 'utf-8'
	charset_type = utf-8

	# whether to strip HTML tags from incoming documents
	# known values are 0 (do not strip) and 1 (do strip)
	# optional, default is 0
	html_strip = 1

	}


	# inherited index
	#
	# all the parameters are copied from the parent index,
	# and may then be overridden in this index definition
	index product_product_en : product_product
	{
	path = /var/data/product_product_en
	morphology = stem_en
	}

	index product_product_pt : product_product
	{
	source = product_product_pt
	path = /var/data/product_product_pt
	morphology = libstemmer_pt
	}

	#############################################################################
	## indexer settings
	#############################################################################

	indexer
	{
	# memory limit, in bytes, kiloytes (16384K) or megabytes (256M)
	# optional, default is 32M, max is 2047M, recommended is 256M to 1024M
	mem_limit = 2047M

	}

	#############################################################################
	## searchd settings
	#############################################################################

	searchd
	{
	# [hostname:]port[:protocol], or /unix/socket/path to listen on
	# known protocols are 'sphinx' (SphinxAPI) and 'mysql41' (SphinxQL)
	#
	# multi-value, multiple listen points are allowed
	# optional, defaults are 9312:sphinx and 9306:mysql41, as below
	#
	# listen = 127.0.0.1
	# listen = 192.168.0.1:9312
	# listen = 9312
	# listen = /var/run/searchd.sock
	listen = PUT_PUBLIC_OR_PRIVATE_IP_ADDRESS_OF_THE_SEARCHD_SERVER:9312
	listen = PUT_PUBLIC_OR_PRIVATE_IP_ADDRESS_OF_THE_SEARCHD_SERVER:9306:mysql41

	# log file, searchd run info is logged here
	# optional, default is 'searchd.log'
	log = /var/log/sphinx/searchd.log

	# query log file, all search queries are logged here
	# optional, default is empty (do not log queries)
	query_log = /var/log/sphinx/query.log

	# client read timeout, seconds
	# optional, default is 5
	read_timeout = 5

	# request timeout, seconds
	# optional, default is 5 minutes
	client_timeout = 300

	# maximum amount of children to fork (concurrent searches to run)
	# optional, default is 0 (unlimited)
	max_children = 30

	# PID file, searchd process ID file name
	# mandatory
	pid_file = /var/run/searchd.pid

	# max amount of matches the daemon ever keeps in RAM, per-index
	# WARNING, THERE'S ALSO PER-QUERY LIMIT, SEE SetLimits() API CALL
	# default is 1000 (just like Google)
	max_matches = 1000

	# seamless rotate, prevents rotate stalls if precaching huge datasets
	# optional, default is 1
	seamless_rotate = 1

	# whether to forcibly preopen all indexes on startup
	# optional, default is 1 (preopen everything)
	preopen_indexes = 1

	# whether to unlink .old index copies on succesful rotation.
	# optional, default is 1 (do unlink)
	unlink_old = 1

	# MVA updates pool size
	# shared between all instances of searchd, disables attr flushes!
	# optional, default size is 1M
	mva_updates_pool = 1M

	# max allowed network packet size
	# limits both query packets from clients, and responses from agents
	# optional, default size is 8M
	max_packet_size = 8M

	# max allowed per-query filter count
	# optional, default is 256
	max_filters = 256

	# max allowed per-filter values count
	# optional, default is 4096
	max_filter_values = 4096

	# max allowed per-batch query count (aka multi-query count)
	# optional, default is 32
	max_batch_queries = 32

	# multi-processing mode (MPM)
	# known values are none, fork, prefork, and threads
	# optional, default is fork
	#
	workers = threads # for RT to work
	}
	# --eof--