mrflip/elasticsearch.yml Secret

## elasticsearch.yml
#
# ElasticSearch config file
#

cluster:
  name:                         hoolock

  # http://groups.google.com/a/elasticsearch.com/group/users/browse_thread/thread/439afb06f3e85aa7/431a8543811d7848?lnk=gst&q=configuration#431a8543811d7848
  routing:
    allocation:
      concurrent_recoveries:  1

# File paths
path:
  home:                         /usr/local/share/elasticsearch
  conf:                         /etc/elasticsearch
  logs:                         /var/log/elasticsearch
  # data dirs are set in in the elasticsearch.in.sh

# http://www.elasticsearch.com/docs/elasticsearch/modules/node/
node:
  # # node.data: is this a data esnode (stores, indexes data)? default true
  data:                         true

# http://www.elasticsearch.com/docs/elasticsearch/modules/http/
http:
  # # http.enabled: is this a query esnode (has http interface, dispatches/gathers queries)? Default true
  enabled:                      true
  port:                         9200-9300
  max_content_length:           100mb

gateway:
  # The gateway set on the node level will automatically control the index
  # gateway to use. For example, if the fs gateway is used, then automatically,
  # each index created on the node will also use its own respective index level
  # fs gateway. In this case, if in an index should not persist its state, it
  # should be explicitly set to none.
  #
  # Set gateway.type to one of: [none, local, fs, hadoop, s3]
  #
  type:                         local
  #
  # recovery begins when recover_after_nodes are present and then either
  # recovery_after_time has passed *or* expected_nodes have shown up.
  recover_after_nodes:          16
  recovery_after_time:          10m    # 5m
  expected_nodes:               16          # 2
  #
  # use with type: s3
  s3:
    bucket:                   infochimps-search

# http://groups.google.com/a/elasticsearch.com/group/users/browse_thread/thread/1f3001f43266879a/06d62ea3ceb4db30?lnk=gst&q=translog#06d62ea3ceb4db30
indices:
  memory:
    # Increase if you are bulk loading
    # A number ('512m') or percent ('10%'). You can set limits on a percentage
    # with max_index_buffer_size and min_index_buffer_size. 10% by default.
    # 512m for writing:
    # index_buffer_size:        512m
    index_buffer_size:          512m

cache:
  memory:
    # buffer_size:              100k
    # cache_size:               50m
    # direct:                   true
    # warm_cache:               false

index:
  number_of_shards:             16
  number_of_replicas:           0
  #
  translog:
    # A shard is flushed to local disk (the lucene index is committed) once this
    # number of operations accumulate in the translog. defaults to 5000
    #
    # If you have
    flush_threshold:            5000
  merge:
    policy:
      # Determines how often segment indices are merged by index operation. With
      # smaller values, less RAM is used while indexing, and searches on
      # unoptimized indices are faster, but indexing speed is slower. With
      # larger values, more RAM is used during indexing, and while searches on
      # unoptimized indices are slower, indexing is faster. Thus larger values
      # (greater than 10) are best for batch index creation, and smaller values
      # (lower than 10) for indices that are interactively maintained. Defaults
      # to 10.
      merge_factor:             30
      # Use the compound file format. If not set, controlled by the actually
      # store used, this is because the compound format was created to reduce
      # the number of open file handles when using file based storage. The file
      # system based ones default to true which others default to false. Even
      # with file system based ones, consider increasing the number of open file
      # handles and setting this to false for better performance
      use_compound_file:        false
      # A size setting type which sets the minimum size for the lowest level
      # segments. Any segments below this size are considered to be on the same
      # level (even if they vary drastically in size) and will be merged
      # whenever there are mergeFactor of them. This effectively truncates the
      # “long tail” of small segments that would otherwise be created into a
      # single level. If you set this too large, it could greatly increase the
      # merging cost during indexing (if you flush many small
      # segments). Defaults to 1.6mb
      min_merge_size:           2.7mb
      # Largest segment (by total byte size) that may be merged with other
      # segments. Defaults to unbounded.
      # max_merge_size:
      # Largest segment (by document count) that may be merged with other
      # segments. Defaults to unbounded
      # max_merge_docs
    scheduler:
      max_thread_count:         64
  # deletionpolicy:             keep_only_last

  engine:
    robin:
      # How often to schedule the refresh operation (the same one the Refresh
      # API, which enables near real time search).  Default '1s'; set to -1 to
      # disable automatic refresh (you must instead initiate refresh via API)
      refresh_interval:         1s
      # Set the interval between indexed terms. Large values cause less memory
      # to be used by a reader / searcher, but slow random-access to
      # terms. Small values cause more memory to be used by a reader / searcher,
      # and speed random-access to terms. Defaults to 128.
      term_index_interval:      1024

  gateway:
    # The index.gateway.snapshot_interval is a time setting allowing to
    # configure the interval at which snapshotting of the index shard to the
    # gateway will take place. Note, only primary shards start this scheduled
    # snapshotting process. It defaults to 10s, and can be disabled by setting
    # it to -1.
    snapshot_interval:          -1
    # When a primary shard is shut down explicitly (not relocated), the
    # index.gateway.snapshot_on_close flag can control if while shutting down, a
    # gateway snapshot should be performed. It defaults to true.
    snapshot_on_close:          false

# http://www.elasticsearch.com/docs/elasticsearch/modules/node/network/
network:
  bind_host:                    _local_
  publish_host:                 _local_
  #
  # tcp:
  #   no_delay:                 true
  #   keep_alive:               ~
  #   reuse_address             true
  #   send_buffer_size          ~
  #   receive_buffer_size:      ~

# http://www.elasticsearch.com/docs/elasticsearch/modules/transport/
transport:
  tcp:
    port:                       9300-9400
    connect_timeout:            1m
    # # enable lzf compression in esnode-esnode communication?
    compress:                   false

# http://www.elasticsearch.com/docs/elasticsearch/modules/jmx/
jmx:
  # Create an RMI connector?
  create_connector:             true
  port:                         9400-9500
  domain:                       elasticsearch

# http://www.elasticsearch.com/docs/elasticsearch/modules/threadpool/
threadpool:
  # #
  # # threadpool.type should be one of [cached, scaling, blocking]:
  # #
  # # * Cached:  An unbounded thread pool that reuses previously constructed threads.
  # # * Scaling: A bounded thread pool that reuses previously created free threads.
  # # * Blocking: A bounded thread pool that reuses previously created free
  # #   threads. Pending requests block for an available thread (different than
  # #   the scaling one, where the request is added to a queue and does not
  # #   block).
  # #
  # type:                       cached

# http://www.elasticsearch.com/docs/elasticsearch/modules/discovery/
discovery:
  # set to 'zen' or 'ec2'
  type:                         zen
  zen:
    ping:
      multicast:
        enabled:                false
      unicast:
        hosts:                  10.117.9.178:9300,10.117.23.150:9300,10.117.31.211:9300,10.117.15.112:9300
    # There are two fault detection processes running. The first is by the
    # master, to ping all the other nodes in the cluster and verify that they
    # are alive. And on the other end, each node pings to master to verify if
    # its still alive or an election process needs to be initiated.
    fd:
      # How often a node gets pinged. Defaults to "1s".
      ping_interval:            1s
      # # How long to wait for a ping response, defaults to "30s".
      ping_timeout:             30s
      # # How many ping failures / timeouts cause a node to be considered failed. Defaults to 3.
      ping_retries:             3
  #
  # # ec2 discovery can cause big trouble with the hadoop loader:
  # # discovery churn can hit API usage limits
  # # Be sure to set your cloud keys if you're using ec2
  # #
  # ec2:
  #   # security groups used for discovery
  #   groups:                   hoolock-data_esnode
  #   # require *all* (false) or *any* (true) of those groups?
  #   any_group:                true
  #   # private_ip, public_ip, private_dns, public_dns
  #   host_type:                private_ip
  #   availability_zones:       us-east-1d

# Necessary if you will use either of
# * the ec2 discovery module: for finding peers
# * the s3 gateway module, for pushing indices to an s3 mirror.
# Read more: http://www.elasticsearch.com/docs/elasticsearch/cloud/
#
cloud:
  aws:
    access_key:                 XXX
    secret_key:                 XXXX

# thrift:
#   # port:

# monitor.jvm: gc_threshold, interval, enabled

## logging.yml
rootLogger: DEBUG, console, file

#
# Put the name of any module -- using its config path -- in the section below.
#
logger:
  # log action execution errors for easier debugging
  action :      DEBUG

  index:
    shard:
      recovery: DEBUG
    store:      INFO
    gateway:    DEBUG
    engine:     DEBUG
    merge:      DEBUG
    translog:   DEBUG
  cluster:
    service:    INFO
    action:
      shard:    INFO
  gateway:      DEBUG
  discovery:    DEBUG
  jmx:          DEBUG
  httpclient:   INFO
  node:         DEBUG
  plugins:      DEBUG

appender:
  console:
    type: console
    layout:
      type: consolePattern
      conversionPattern: "[%d{ABSOLUTE}][%-5p][%-25c] %m%n"

  file:
    type: dailyRollingFile
    file: ${path.logs}/${cluster.name}.log
    datePattern: "'.'yyyy-MM-dd"
    layout:
      type: pattern
      conversionPattern: "[%d{ABSOLUTE}][%-5p][%-25c] %m%n"
	#
	# ElasticSearch config file
	#

	cluster:
	name: hoolock

	# http://groups.google.com/a/elasticsearch.com/group/users/browse_thread/thread/439afb06f3e85aa7/431a8543811d7848?lnk=gst&q=configuration#431a8543811d7848
	routing:
	allocation:
	concurrent_recoveries: 1

	# File paths
	path:
	home: /usr/local/share/elasticsearch
	conf: /etc/elasticsearch
	logs: /var/log/elasticsearch
	# data dirs are set in in the elasticsearch.in.sh

	# http://www.elasticsearch.com/docs/elasticsearch/modules/node/
	node:
	# # node.data: is this a data esnode (stores, indexes data)? default true
	data: true

	# http://www.elasticsearch.com/docs/elasticsearch/modules/http/
	http:
	# # http.enabled: is this a query esnode (has http interface, dispatches/gathers queries)? Default true
	enabled: true
	port: 9200-9300
	max_content_length: 100mb

	gateway:
	# The gateway set on the node level will automatically control the index
	# gateway to use. For example, if the fs gateway is used, then automatically,
	# each index created on the node will also use its own respective index level
	# fs gateway. In this case, if in an index should not persist its state, it
	# should be explicitly set to none.
	#
	# Set gateway.type to one of: [none, local, fs, hadoop, s3]
	#
	type: local
	#
	# recovery begins when recover_after_nodes are present and then either
	# recovery_after_time has passed or expected_nodes have shown up.
	recover_after_nodes: 16
	recovery_after_time: 10m # 5m
	expected_nodes: 16 # 2
	#
	# use with type: s3
	s3:
	bucket: infochimps-search

	# http://groups.google.com/a/elasticsearch.com/group/users/browse_thread/thread/1f3001f43266879a/06d62ea3ceb4db30?lnk=gst&q=translog#06d62ea3ceb4db30
	indices:
	memory:
	# Increase if you are bulk loading
	# A number ('512m') or percent ('10%'). You can set limits on a percentage
	# with max_index_buffer_size and min_index_buffer_size. 10% by default.
	# 512m for writing:
	# index_buffer_size: 512m
	index_buffer_size: 512m

	cache:
	memory:
	# buffer_size: 100k
	# cache_size: 50m
	# direct: true
	# warm_cache: false

	index:
	number_of_shards: 16
	number_of_replicas: 0
	#
	translog:
	# A shard is flushed to local disk (the lucene index is committed) once this
	# number of operations accumulate in the translog. defaults to 5000
	#
	# If you have
	flush_threshold: 5000
	merge:
	policy:
	# Determines how often segment indices are merged by index operation. With
	# smaller values, less RAM is used while indexing, and searches on
	# unoptimized indices are faster, but indexing speed is slower. With
	# larger values, more RAM is used during indexing, and while searches on
	# unoptimized indices are slower, indexing is faster. Thus larger values
	# (greater than 10) are best for batch index creation, and smaller values
	# (lower than 10) for indices that are interactively maintained. Defaults
	# to 10.
	merge_factor: 30
	# Use the compound file format. If not set, controlled by the actually
	# store used, this is because the compound format was created to reduce
	# the number of open file handles when using file based storage. The file
	# system based ones default to true which others default to false. Even
	# with file system based ones, consider increasing the number of open file
	# handles and setting this to false for better performance
	use_compound_file: false
	# A size setting type which sets the minimum size for the lowest level
	# segments. Any segments below this size are considered to be on the same
	# level (even if they vary drastically in size) and will be merged
	# whenever there are mergeFactor of them. This effectively truncates the
	# “long tail” of small segments that would otherwise be created into a
	# single level. If you set this too large, it could greatly increase the
	# merging cost during indexing (if you flush many small
	# segments). Defaults to 1.6mb
	min_merge_size: 2.7mb
	# Largest segment (by total byte size) that may be merged with other
	# segments. Defaults to unbounded.
	# max_merge_size:
	# Largest segment (by document count) that may be merged with other
	# segments. Defaults to unbounded
	# max_merge_docs
	scheduler:
	max_thread_count: 64
	# deletionpolicy: keep_only_last

	engine:
	robin:
	# How often to schedule the refresh operation (the same one the Refresh
	# API, which enables near real time search). Default '1s'; set to -1 to
	# disable automatic refresh (you must instead initiate refresh via API)
	refresh_interval: 1s
	# Set the interval between indexed terms. Large values cause less memory
	# to be used by a reader / searcher, but slow random-access to
	# terms. Small values cause more memory to be used by a reader / searcher,
	# and speed random-access to terms. Defaults to 128.
	term_index_interval: 1024

	gateway:
	# The index.gateway.snapshot_interval is a time setting allowing to
	# configure the interval at which snapshotting of the index shard to the
	# gateway will take place. Note, only primary shards start this scheduled
	# snapshotting process. It defaults to 10s, and can be disabled by setting
	# it to -1.
	snapshot_interval: -1
	# When a primary shard is shut down explicitly (not relocated), the
	# index.gateway.snapshot_on_close flag can control if while shutting down, a
	# gateway snapshot should be performed. It defaults to true.
	snapshot_on_close: false

	# http://www.elasticsearch.com/docs/elasticsearch/modules/node/network/
	network:
	bind_host: _local_
	publish_host: _local_
	#
	# tcp:
	# no_delay: true
	# keep_alive: ~
	# reuse_address true
	# send_buffer_size ~
	# receive_buffer_size: ~

	# http://www.elasticsearch.com/docs/elasticsearch/modules/transport/
	transport:
	tcp:
	port: 9300-9400
	connect_timeout: 1m
	# # enable lzf compression in esnode-esnode communication?
	compress: false

	# http://www.elasticsearch.com/docs/elasticsearch/modules/jmx/
	jmx:
	# Create an RMI connector?
	create_connector: true
	port: 9400-9500
	domain: elasticsearch

	# http://www.elasticsearch.com/docs/elasticsearch/modules/threadpool/
	threadpool:
	# #
	# # threadpool.type should be one of [cached, scaling, blocking]:
	# #
	# # * Cached: An unbounded thread pool that reuses previously constructed threads.
	# # * Scaling: A bounded thread pool that reuses previously created free threads.
	# # * Blocking: A bounded thread pool that reuses previously created free
	# # threads. Pending requests block for an available thread (different than
	# # the scaling one, where the request is added to a queue and does not
	# # block).
	# #
	# type: cached

	# http://www.elasticsearch.com/docs/elasticsearch/modules/discovery/
	discovery:
	# set to 'zen' or 'ec2'
	type: zen
	zen:
	ping:
	multicast:
	enabled: false
	unicast:
	hosts: 10.117.9.178:9300,10.117.23.150:9300,10.117.31.211:9300,10.117.15.112:9300
	# There are two fault detection processes running. The first is by the
	# master, to ping all the other nodes in the cluster and verify that they
	# are alive. And on the other end, each node pings to master to verify if
	# its still alive or an election process needs to be initiated.
	fd:
	# How often a node gets pinged. Defaults to "1s".
	ping_interval: 1s
	# # How long to wait for a ping response, defaults to "30s".
	ping_timeout: 30s
	# # How many ping failures / timeouts cause a node to be considered failed. Defaults to 3.
	ping_retries: 3
	#
	# # ec2 discovery can cause big trouble with the hadoop loader:
	# # discovery churn can hit API usage limits
	# # Be sure to set your cloud keys if you're using ec2
	# #
	# ec2:
	# # security groups used for discovery
	# groups: hoolock-data_esnode
	# # require all (false) or any (true) of those groups?
	# any_group: true
	# # private_ip, public_ip, private_dns, public_dns
	# host_type: private_ip
	# availability_zones: us-east-1d

	# Necessary if you will use either of
	# * the ec2 discovery module: for finding peers
	# * the s3 gateway module, for pushing indices to an s3 mirror.
	# Read more: http://www.elasticsearch.com/docs/elasticsearch/cloud/
	#
	cloud:
	aws:
	access_key: XXX
	secret_key: XXXX

	# thrift:
	# # port:

	# monitor.jvm: gc_threshold, interval, enabled
	rootLogger: DEBUG, console, file

	#
	# Put the name of any module -- using its config path -- in the section below.
	#
	logger:
	# log action execution errors for easier debugging
	action : DEBUG

	index:
	shard:
	recovery: DEBUG
	store: INFO
	gateway: DEBUG
	engine: DEBUG
	merge: DEBUG
	translog: DEBUG
	cluster:
	service: INFO
	action:
	shard: INFO
	gateway: DEBUG
	discovery: DEBUG
	jmx: DEBUG
	httpclient: INFO
	node: DEBUG
	plugins: DEBUG

	appender:
	console:
	type: console
	layout:
	type: consolePattern
	conversionPattern: "[%d{ABSOLUTE}][%-5p][%-25c] %m%n"

	file:
	type: dailyRollingFile
	file: ${path.logs}/${cluster.name}.log
	datePattern: "'.'yyyy-MM-dd"
	layout:
	type: pattern
	conversionPattern: "[%d{ABSOLUTE}][%-5p][%-25c] %m%n"