Created
January 30, 2012 17:40
-
-
Save tjake/1705600 to your computer and use it in GitHub Desktop.
Cassandra lan party config
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Cassandra storage config YAML | |
# NOTE: | |
# See http://wiki.apache.org/cassandra/StorageConfiguration for | |
# full explanations of configuration directives | |
# /NOTE | |
# The name of the cluster. This is mainly used to prevent machines in | |
# one logical cluster from joining another. | |
cluster_name: 'LanParty Cluster' | |
# Address to bind to and tell other Cassandra nodes to connect to. You | |
# _must_ change this if you want multiple nodes to be able to | |
# communicate! | |
# | |
# Leaving it blank leaves it up to InetAddress.getLocalHost(). This | |
# will always do the Right Thing *if* the node is properly configured | |
# (hostname, name resolution, etc), and the Right Thing is to use the | |
# address associated with the hostname (it might not be). | |
# | |
# Setting this to 0.0.0.0 is always wrong. | |
listen_address: 10.X.X.X | |
# You should always specify InitialToken when setting up a production | |
# cluster for the first time, and often when adding capacity later. | |
# The principle is that each node should be given an equal slice of | |
# the token ring; see http://wiki.apache.org/cassandra/Operations | |
# for more details. | |
# | |
# If blank, Cassandra will request a token bisecting the range of | |
# the heaviest-loaded existing node. If there is no load information | |
# available, such as is the case with a new cluster, it will pick | |
# a random token, which will lead to hot spots. | |
initial_token: | |
# See http://wiki.apache.org/cassandra/HintedHandoff | |
hinted_handoff_enabled: true | |
# this defines the maximum amount of time a dead host will have hints | |
# generated. After it has been dead this long, hints will be dropped. | |
max_hint_window_in_ms: 3600000 # one hour | |
# Sleep this long after delivering each hint | |
hinted_handoff_throttle_delay_in_ms: 1 | |
# authentication backend, implementing IAuthenticator; used to identify users | |
authenticator: org.apache.cassandra.auth.AllowAllAuthenticator | |
# authorization backend, implementing IAuthority; used to limit access/provide permissions | |
authority: org.apache.cassandra.auth.AllowAllAuthority | |
# The partitioner is responsible for distributing rows (by key) across | |
# nodes in the cluster. Any IPartitioner may be used, including your | |
# own as long as it is on the classpath. Out of the box, Cassandra | |
# provides org.apache.cassandra.dht.RandomPartitioner | |
# org.apache.cassandra.dht.ByteOrderedPartitioner, | |
# org.apache.cassandra.dht.OrderPreservingPartitioner (deprecated), | |
# and org.apache.cassandra.dht.CollatingOrderPreservingPartitioner | |
# (deprecated). | |
# | |
# - RandomPartitioner distributes rows across the cluster evenly by md5. | |
# When in doubt, this is the best option. | |
# - ByteOrderedPartitioner orders rows lexically by key bytes. BOP allows | |
# scanning rows in key order, but the ordering can generate hot spots | |
# for sequential insertion workloads. | |
# - OrderPreservingPartitioner is an obsolete form of BOP, that stores | |
# - keys in a less-efficient format and only works with keys that are | |
# UTF8-encoded Strings. | |
# - CollatingOPP colates according to EN,US rules rather than lexical byte | |
# ordering. Use this as an example if you need custom collation. | |
# | |
# See http://wiki.apache.org/cassandra/Operations for more on | |
# partitioners and token selection. | |
partitioner: org.apache.cassandra.dht.RandomPartitioner | |
# directories where Cassandra should store data on disk. | |
data_file_directories: | |
- ./data | |
# commit log | |
commitlog_directory: ./commitlog | |
# saved caches | |
saved_caches_directory: ./saved_caches | |
# commitlog_sync may be either "periodic" or "batch." | |
# When in batch mode, Cassandra won't ack writes until the commit log | |
# has been fsynced to disk. It will wait up to | |
# commitlog_sync_batch_window_in_ms milliseconds for other writes, before | |
# performing the sync. | |
# | |
# commitlog_sync: batch | |
# commitlog_sync_batch_window_in_ms: 50 | |
# | |
# the other option is "periodic" where writes may be acked immediately | |
# and the CommitLog is simply synced every commitlog_sync_period_in_ms | |
# milliseconds. | |
commitlog_sync: periodic | |
commitlog_sync_period_in_ms: 10000 | |
# any class that implements the SeedProvider interface and has a | |
# constructor that takes a Map<String, String> of parameters will do. | |
seed_provider: | |
# Addresses of hosts that are deemed contact points. | |
# Cassandra nodes use this list of hosts to find each other and learn | |
# the topology of the ring. You must change this if you are running | |
# multiple nodes! | |
- class_name: org.apache.cassandra.locator.SimpleSeedProvider | |
parameters: | |
# seeds is actually a comma-delimited list of addresses. | |
# Ex: "<ip1>,<ip2>,<ip3>" | |
- seeds: "10.1.0.1,10.2.0.1,10.3.0.1" | |
# emergency pressure valve: each time heap usage after a full (CMS) | |
# garbage collection is above this fraction of the max, Cassandra will | |
# flush the largest memtables. | |
# | |
# Set to 1.0 to disable. Setting this lower than | |
# CMSInitiatingOccupancyFraction is not likely to be useful. | |
# | |
# RELYING ON THIS AS YOUR PRIMARY TUNING MECHANISM WILL WORK POORLY: | |
# it is most effective under light to moderate load, or read-heavy | |
# workloads; under truly massive write load, it will often be too | |
# little, too late. | |
flush_largest_memtables_at: 0.75 | |
# emergency pressure valve #2: the first time heap usage after a full | |
# (CMS) garbage collection is above this fraction of the max, | |
# Cassandra will reduce cache maximum _capacity_ to the given fraction | |
# of the current _size_. Should usually be set substantially above | |
# flush_largest_memtables_at, since that will have less long-term | |
# impact on the system. | |
# | |
# Set to 1.0 to disable. Setting this lower than | |
# CMSInitiatingOccupancyFraction is not likely to be useful. | |
reduce_cache_sizes_at: 0.85 | |
reduce_cache_capacity_to: 0.6 | |
# For workloads with more data than can fit in memory, Cassandra's | |
# bottleneck will be reads that need to fetch data from | |
# disk. "concurrent_reads" should be set to (16 * number_of_drives) in | |
# order to allow the operations to enqueue low enough in the stack | |
# that the OS and drives can reorder them. | |
# | |
# On the other hand, since writes are almost never IO bound, the ideal | |
# number of "concurrent_writes" is dependent on the number of cores in | |
# your system; (8 * number_of_cores) is a good rule of thumb. | |
concurrent_reads: 32 | |
concurrent_writes: 32 | |
# Total memory to use for memtables. Cassandra will flush the largest | |
# memtable when this much memory is used. | |
# If omitted, Cassandra will set it to 1/3 of the heap. | |
# memtable_total_space_in_mb: 2048 | |
# Total space to use for commitlogs. | |
# If space gets above this value (it will round up to the next nearest | |
# segment multiple), Cassandra will flush every dirty CF in the oldest | |
# segment and remove it. | |
# commitlog_total_space_in_mb: 4096 | |
# This sets the amount of memtable flush writer threads. These will | |
# be blocked by disk io, and each one will hold a memtable in memory | |
# while blocked. If you have a large heap and many data directories, | |
# you can increase this value for better flush performance. | |
# By default this will be set to the amount of data directories defined. | |
#memtable_flush_writers: 1 | |
# the number of full memtables to allow pending flush, that is, | |
# waiting for a writer thread. At a minimum, this should be set to | |
# the maximum number of secondary indexes created on a single CF. | |
memtable_flush_queue_size: 4 | |
# Buffer size to use when performing contiguous column slices. | |
# Increase this to the size of the column slices you typically perform | |
sliced_buffer_size_in_kb: 64 | |
# TCP port, for commands and data | |
storage_port: 7000 | |
# SSL port, for encrypted communication. Unused unless enabled in | |
# encryption_options | |
ssl_storage_port: 7001 | |
# Address to broadcast to other Cassandra nodes | |
# Leaving this blank will set it to the same value as listen_address | |
# broadcast_address: 1.2.3.4 | |
# The address to bind the Thrift RPC service to -- clients connect | |
# here. Unlike ListenAddress above, you *can* specify 0.0.0.0 here if | |
# you want Thrift to listen on all interfaces. | |
# | |
# Leaving this blank has the same effect it does for ListenAddress, | |
# (i.e. it will be based on the configured hostname of the node). | |
rpc_address: 0.0.0.0 | |
# port for Thrift to listen for clients on | |
rpc_port: 9160 | |
# enable or disable keepalive on rpc connections | |
rpc_keepalive: true | |
# Cassandra provides three options for the RPC Server: | |
# | |
# sync -> One connection per thread in the rpc pool (see below). | |
# For a very large number of clients, memory will be your limiting | |
# factor; on a 64 bit JVM, 128KB is the minimum stack size per thread. | |
# Connection pooling is very, very strongly recommended. | |
# | |
# async -> Nonblocking server implementation with one thread to serve | |
# rpc connections. This is not recommended for high throughput use | |
# cases. Async has been tested to be about 50% slower than sync | |
# or hsha and is deprecated: it will be removed in the next major release. | |
# | |
# hsha -> Stands for "half synchronous, half asynchronous." The rpc thread pool | |
# (see below) is used to manage requests, but the threads are multiplexed | |
# across the different clients. | |
# | |
# The default is sync because on Windows hsha is about 30% slower. On Linux, | |
# sync/hsha performance is about the same, with hsha of course using less memory. | |
rpc_server_type: sync | |
# Uncomment rpc_min|max|thread to set request pool size. | |
# You would primarily set max for the sync server to safeguard against | |
# misbehaved clients; if you do hit the max, Cassandra will block until one | |
# disconnects before accepting more. The defaults for sync are min of 16 and max | |
# unlimited. | |
# | |
# For the Hsha server, the min and max both default to quadruple the number of | |
# CPU cores. | |
# | |
# This configuration is ignored by the async server. | |
# | |
# rpc_min_threads: 16 | |
# rpc_max_threads: 2048 | |
# uncomment to set socket buffer sizes on rpc connections | |
# rpc_send_buff_size_in_bytes: | |
# rpc_recv_buff_size_in_bytes: | |
# Frame size for thrift (maximum field length). | |
# 0 disables TFramedTransport in favor of TSocket. This option | |
# is deprecated; we strongly recommend using Framed mode. | |
thrift_framed_transport_size_in_mb: 15 | |
# The max length of a thrift message, including all fields and | |
# internal thrift overhead. | |
thrift_max_message_length_in_mb: 16 | |
# Set to true to have Cassandra create a hard link to each sstable | |
# flushed or streamed locally in a backups/ subdirectory of the | |
# Keyspace data. Removing these links is the operator's | |
# responsibility. | |
incremental_backups: false | |
# Whether or not to take a snapshot before each compaction. Be | |
# careful using this option, since Cassandra won't clean up the | |
# snapshots for you. Mostly useful if you're paranoid when there | |
# is a data format change. | |
snapshot_before_compaction: false | |
# Add column indexes to a row after its contents reach this size. | |
# Increase if your column values are large, or if you have a very large | |
# number of columns. The competing causes are, Cassandra has to | |
# deserialize this much of the row to read a single column, so you want | |
# it to be small - at least if you do many partial-row reads - but all | |
# the index data is read for each access, so you don't want to generate | |
# that wastefully either. | |
column_index_size_in_kb: 64 | |
# Size limit for rows being compacted in memory. Larger rows will spill | |
# over to disk and use a slower two-pass compaction process. A message | |
# will be logged specifying the row key. | |
in_memory_compaction_limit_in_mb: 64 | |
# Number of simultaneous compactions to allow, NOT including | |
# validation "compactions" for anti-entropy repair. Simultaneous | |
# compactions can help preserve read performance in a mixed read/write | |
# workload, by mitigating the tendency of small sstables to accumulate | |
# during a single long running compactions. The default is usually | |
# fine and if you experience problems with compaction running too | |
# slowly or too fast, you should look at | |
# compaction_throughput_mb_per_sec first. | |
# | |
# This setting has no effect on LeveledCompactionStrategy. | |
# | |
# concurrent_compactors defaults to the number of cores. | |
# Uncomment to make compaction mono-threaded, the pre-0.8 default. | |
#concurrent_compactors: 1 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment