Skip to content

Instantly share code, notes, and snippets.

@tobert
Created October 11, 2012 01:06
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save tobert/3869536 to your computer and use it in GitHub Desktop.
Save tobert/3869536 to your computer and use it in GitHub Desktop.
sysctl.conf
# save some memory on indexes
index_interval: 512
# this can hurt your read latency, but in my experience it's better to stay caught up
compaction_throughput_mb_per_sec: 0
# if you have lots & lots of connections, e.g. from Hadoop, saves memory
rpc_server_type: hsha
# /etc/fstab
# Use "nobootwait" option in /etc/fstab on Ubuntu or upstart WILL troll you!
# relatime is the default in Linux since around 2.6.31, so skip the relatime/noatime unless
# you've actually measured a difference
/dev/md7 /data_raid xfs nobootwait,defaults 0 0
java -XX:+UseNUMA
# or
numactl --interleave java -jar foo.jar
# /etc/security/limits.conf
# this is not a multi-user system, ulimits are useless
* - memlock unlimited
* - nofile 1048576
* - fsize unlimited
* - nproc 999999
# put in /etc/rc.local
# Linux RAID tuning
# Al Tobey 2011-09-19 <al@ooyala.com>
# Haven't tested this in ages, YMMV.
drive_ra=$((2**14)) # 16k readahead
for sysent in /sys/block/sd[a-z]
do
drive=$(basename $sysent)
sector_size=$(blockdev --getss /dev/$drive)
# it should already be CFQ, make sure
echo cfq > /sys/block/$drive/queue/scheduler
# allow 256 in-flight BIO's per drive for better IO merging
echo 256 > /sys/block/$drive/queue/nr_requests
# adjust readahead
blockdev --setra $(($drive_ra / $sector_size)) /dev/$drive
done
# we always mount the critical data volume on /data_raid
dr_vol=$(awk '/data_raid/{print $1}' < /etc/fstab)
if [ -n "$dr_vol" ] ; then
# blockdev works in sectors
dr_vol_ss=$(blockdev --getss $dr_vol)
# short name, e.g. 'md7'
dr_vol_name=$(basename $dr_vol)
# count the number of devices in the raid
dr_vol_devcnt=$(< /sys/block/$dr_vol_name/md/raid_disks)
# number of drives in the raid * readahead set on the drives earlier
dr_vol_rabytes=$(($drive_ra * $dr_vol_devcnt))
# significantly increase the number of entries in the stripe cache (default 128)
if [ -e "/sys/block/$dr_vol_name/md/stripe_cache_size" ] ; then
echo 16384 > /sys/block/$dr_vol_name/md/stripe_cache_size
fi
# set readahead on the raid device
blockdev --setra $(($dr_vol_rabytes / $dr_vol_ss)) $dr_vol
fi
# Enable compression! surprisingly good
compression_options = {'sstable_compression': 'org.apache.cassandra.io.compress.SnappyCompressor'};
# Examine bloom filter false-positives
# nodetool -h localhost cfstats |grep Bloom
bloom_filter_fp_chance = 0.1 # diminishing returns
# Reduce ssTable count
# memory pressure caused frequent memtable flushes compaction throttling made it worse
compaction_strategy_options = {'sstable_size_in_mb': 256}
# Give yourself time to repair
gc_grace = 5184000 # 60 days
net.ipv4.ip_forward=0
net.ipv6.conf.all.forwarding=0
kernel.sysrq = 1
kernel.panic = 300
fs.file-max = 1048576
kernel.pid_max = 999999
net.core.rmem_max = 16777216
net.core.wmem_max = 16777216
net.ipv4.tcp_rmem = 4096 65536 16777216
net.ipv4.tcp_wmem = 4096 65536 16777216
vm.max_map_count = 1048576
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment