Skip to content

Instantly share code, notes, and snippets.

@mrflip
Created December 23, 2010 11:50
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mrflip/f003c19dd0ce53c654cb to your computer and use it in GitHub Desktop.
Save mrflip/f003c19dd0ce53c654cb to your computer and use it in GitHub Desktop.
#
# ElasticSearch config file
#
cluster:
name: hoolock
# http://groups.google.com/a/elasticsearch.com/group/users/browse_thread/thread/439afb06f3e85aa7/431a8543811d7848?lnk=gst&q=configuration#431a8543811d7848
routing:
allocation:
concurrent_recoveries: 1
# File paths
path:
home: /usr/local/share/elasticsearch
conf: /etc/elasticsearch
logs: /var/log/elasticsearch
# data dirs are set in in the elasticsearch.in.sh
# http://www.elasticsearch.com/docs/elasticsearch/modules/node/
node:
# # node.data: is this a data esnode (stores, indexes data)? default true
data: true
# http://www.elasticsearch.com/docs/elasticsearch/modules/http/
http:
# # http.enabled: is this a query esnode (has http interface, dispatches/gathers queries)? Default true
enabled: true
port: 9200-9300
max_content_length: 100mb
gateway:
# The gateway set on the node level will automatically control the index
# gateway to use. For example, if the fs gateway is used, then automatically,
# each index created on the node will also use its own respective index level
# fs gateway. In this case, if in an index should not persist its state, it
# should be explicitly set to none.
#
# Set gateway.type to one of: [none, local, fs, hadoop, s3]
#
type: local
#
# recovery begins when recover_after_nodes are present and then either
# recovery_after_time has passed *or* expected_nodes have shown up.
recover_after_nodes: 16
recovery_after_time: 10m # 5m
expected_nodes: 16 # 2
#
# use with type: s3
s3:
bucket: infochimps-search
# http://groups.google.com/a/elasticsearch.com/group/users/browse_thread/thread/1f3001f43266879a/06d62ea3ceb4db30?lnk=gst&q=translog#06d62ea3ceb4db30
indices:
memory:
# Increase if you are bulk loading
# A number ('512m') or percent ('10%'). You can set limits on a percentage
# with max_index_buffer_size and min_index_buffer_size. 10% by default.
# 512m for writing:
# index_buffer_size: 512m
index_buffer_size: 512m
cache:
memory:
# buffer_size: 100k
# cache_size: 50m
# direct: true
# warm_cache: false
index:
number_of_shards: 16
number_of_replicas: 0
#
translog:
# A shard is flushed to local disk (the lucene index is committed) once this
# number of operations accumulate in the translog. defaults to 5000
#
# If you have
flush_threshold: 5000
merge:
policy:
# Determines how often segment indices are merged by index operation. With
# smaller values, less RAM is used while indexing, and searches on
# unoptimized indices are faster, but indexing speed is slower. With
# larger values, more RAM is used during indexing, and while searches on
# unoptimized indices are slower, indexing is faster. Thus larger values
# (greater than 10) are best for batch index creation, and smaller values
# (lower than 10) for indices that are interactively maintained. Defaults
# to 10.
merge_factor: 30
# Use the compound file format. If not set, controlled by the actually
# store used, this is because the compound format was created to reduce
# the number of open file handles when using file based storage. The file
# system based ones default to true which others default to false. Even
# with file system based ones, consider increasing the number of open file
# handles and setting this to false for better performance
use_compound_file: false
# A size setting type which sets the minimum size for the lowest level
# segments. Any segments below this size are considered to be on the same
# level (even if they vary drastically in size) and will be merged
# whenever there are mergeFactor of them. This effectively truncates the
# “long tail” of small segments that would otherwise be created into a
# single level. If you set this too large, it could greatly increase the
# merging cost during indexing (if you flush many small
# segments). Defaults to 1.6mb
min_merge_size: 2.7mb
# Largest segment (by total byte size) that may be merged with other
# segments. Defaults to unbounded.
# max_merge_size:
# Largest segment (by document count) that may be merged with other
# segments. Defaults to unbounded
# max_merge_docs
scheduler:
max_thread_count: 64
# deletionpolicy: keep_only_last
engine:
robin:
# How often to schedule the refresh operation (the same one the Refresh
# API, which enables near real time search). Default '1s'; set to -1 to
# disable automatic refresh (you must instead initiate refresh via API)
refresh_interval: 1s
# Set the interval between indexed terms. Large values cause less memory
# to be used by a reader / searcher, but slow random-access to
# terms. Small values cause more memory to be used by a reader / searcher,
# and speed random-access to terms. Defaults to 128.
term_index_interval: 1024
gateway:
# The index.gateway.snapshot_interval is a time setting allowing to
# configure the interval at which snapshotting of the index shard to the
# gateway will take place. Note, only primary shards start this scheduled
# snapshotting process. It defaults to 10s, and can be disabled by setting
# it to -1.
snapshot_interval: -1
# When a primary shard is shut down explicitly (not relocated), the
# index.gateway.snapshot_on_close flag can control if while shutting down, a
# gateway snapshot should be performed. It defaults to true.
snapshot_on_close: false
# http://www.elasticsearch.com/docs/elasticsearch/modules/node/network/
network:
bind_host: _local_
publish_host: _local_
#
# tcp:
# no_delay: true
# keep_alive: ~
# reuse_address true
# send_buffer_size ~
# receive_buffer_size: ~
# http://www.elasticsearch.com/docs/elasticsearch/modules/transport/
transport:
tcp:
port: 9300-9400
connect_timeout: 1m
# # enable lzf compression in esnode-esnode communication?
compress: false
# http://www.elasticsearch.com/docs/elasticsearch/modules/jmx/
jmx:
# Create an RMI connector?
create_connector: true
port: 9400-9500
domain: elasticsearch
# http://www.elasticsearch.com/docs/elasticsearch/modules/threadpool/
threadpool:
# #
# # threadpool.type should be one of [cached, scaling, blocking]:
# #
# # * Cached: An unbounded thread pool that reuses previously constructed threads.
# # * Scaling: A bounded thread pool that reuses previously created free threads.
# # * Blocking: A bounded thread pool that reuses previously created free
# # threads. Pending requests block for an available thread (different than
# # the scaling one, where the request is added to a queue and does not
# # block).
# #
# type: cached
# http://www.elasticsearch.com/docs/elasticsearch/modules/discovery/
discovery:
# set to 'zen' or 'ec2'
type: zen
zen:
ping:
multicast:
enabled: false
unicast:
hosts: 10.117.9.178:9300,10.117.23.150:9300,10.117.31.211:9300,10.117.15.112:9300
# There are two fault detection processes running. The first is by the
# master, to ping all the other nodes in the cluster and verify that they
# are alive. And on the other end, each node pings to master to verify if
# its still alive or an election process needs to be initiated.
fd:
# How often a node gets pinged. Defaults to "1s".
ping_interval: 1s
# # How long to wait for a ping response, defaults to "30s".
ping_timeout: 30s
# # How many ping failures / timeouts cause a node to be considered failed. Defaults to 3.
ping_retries: 3
#
# # ec2 discovery can cause big trouble with the hadoop loader:
# # discovery churn can hit API usage limits
# # Be sure to set your cloud keys if you're using ec2
# #
# ec2:
# # security groups used for discovery
# groups: hoolock-data_esnode
# # require *all* (false) or *any* (true) of those groups?
# any_group: true
# # private_ip, public_ip, private_dns, public_dns
# host_type: private_ip
# availability_zones: us-east-1d
# Necessary if you will use either of
# * the ec2 discovery module: for finding peers
# * the s3 gateway module, for pushing indices to an s3 mirror.
# Read more: http://www.elasticsearch.com/docs/elasticsearch/cloud/
#
cloud:
aws:
access_key: XXX
secret_key: XXXX
# thrift:
# # port:
# monitor.jvm: gc_threshold, interval, enabled
rootLogger: DEBUG, console, file
#
# Put the name of any module -- using its config path -- in the section below.
#
logger:
# log action execution errors for easier debugging
action : DEBUG
index:
shard:
recovery: DEBUG
store: INFO
gateway: DEBUG
engine: DEBUG
merge: DEBUG
translog: DEBUG
cluster:
service: INFO
action:
shard: INFO
gateway: DEBUG
discovery: DEBUG
jmx: DEBUG
httpclient: INFO
node: DEBUG
plugins: DEBUG
appender:
console:
type: console
layout:
type: consolePattern
conversionPattern: "[%d{ABSOLUTE}][%-5p][%-25c] %m%n"
file:
type: dailyRollingFile
file: ${path.logs}/${cluster.name}.log
datePattern: "'.'yyyy-MM-dd"
layout:
type: pattern
conversionPattern: "[%d{ABSOLUTE}][%-5p][%-25c] %m%n"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment