Skip to content

Instantly share code, notes, and snippets.

@nesquena
Last active December 5, 2019 17:41
Show Gist options
  • Save nesquena/482555 to your computer and use it in GitHub Desktop.
Save nesquena/482555 to your computer and use it in GitHub Desktop.
god configuration
# delayed job priority ranges. Higher values represent lower priority.
PRIORITY_RANGES = [(0..9), (10..19)]
2.times do |num|
God.watch do |w|
w.name = "dj-#{num}"
w.group = 'dj'
w.interval = 30.seconds
w.start = "rake -f #{RAILS_ROOT}/Rakefile RAILS_ENV=production MIN_PRIORITY=#{PRIORITY_RANGES[num].first} MAX_PRIORITY=#{PRIORITY_RANGES[num].last} jobs:work"
w.log = "/var/log/god/god.log"
w.uid = 'deploy'
w.gid = 'deploy'
# restart if memory gets too high
w.transition(:up, :restart) do |on|
on.condition(:memory_usage) do |c|
c.above = 600.megabytes
c.times = 3
c.notify = ['nathan', 'tim']
end
end
# determine the state on startup
w.transition(:init, { true => :up, false => :start }) do |on|
on.condition(:process_running) do |c|
c.running = true
end
end
# determine when process has finished starting
w.transition([:start, :restart], :up) do |on|
on.condition(:process_running) do |c|
c.running = true
c.interval = 5.seconds
end
# failsafe
on.condition(:tries) do |c|
c.times = 5
c.transition = :start
c.interval = 5.seconds
end
end
# start if process is not running
w.transition(:up, :start) do |on|
on.condition(:process_running) do |c|
c.notify = ['nathan', 'tim']
c.running = false
end
end
end
end
#!/usr/bin/env bash
### BEGIN INIT INFO
# Provides: god
# Required-Start: $local_fs $syslog
# Required-Stop: $local_fs $syslog
# Default-Start: 2 3 4 5
# Default-Stop: 0 1 6
# Short-Description: Runs the ruby god meta server.
# Description: Runs the ruby god meta server.
### END INIT INFO
#
# God init.d (startup) script
#
# save this file as /etc/init.d/god
# make it executable:
# chmod +x /etc/init.d/god
# Tell the os to run the script at startup:
# sudo /usr/sbin/update-rc.d -f god defaults
#
# Also consider adding this line (kills god weekly) to your crontab (sudo crontab -e):
#
# # deicide is painless
# 0 1 * * 0 god quit; sleep 1; killall god; sleep 1; killall -9 god; sleep 1; /etc/init.d/god start
#
PATH=/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin
RETVAL=0
god_conf="/etc/god.conf"
god_pid_file="/var/run/god/god.pid" ; mkdir -p `dirname $god_pid_file`
god_log_file="/var/log/god/god.log" ; mkdir -p `dirname $god_log_file`
case "$1" in
start)
god -c "$god_conf" -P "$god_pid_file" -l "$god_log_file"
RETVAL=$?
echo "God started"
;;
stop)
kill `cat $god_pid_file`
RETVAL=$?
echo "God stopped"
;;
restart)
kill `cat $god_pid_file`
god -c "$god_conf" -P "$god_pid_file" -l "$god_log_file"
RETVAL=$?
echo "God restarted"
;;
status)
RETVAL=$?
;;
*)
echo "Usage: god {start|stop|restart|status}"
exit 1
;;
esac
exit $RETVAL
# /etc/god.conf
require 'rubygems'
require 'active_support'
require 'extlib'
require 'godhead'
GOD_CONFIG = YAML.load(File.open('/etc/god/god_config.yaml')) rescue {} unless defined?(GOD_CONFIG)
RAILS_ROOT = ENV['RAILS_ROOT'] ||= "/var/apps/gomiso/current" unless defined?(RAILS_ROOT)
# load in all god configs
God.load "/etc/god/*.god"
# set up email
God::Contacts::Email.defaults do |d|
d.from_email = 'god@codepath.com'
d.from_name = 'God'
d.delivery_method = :sendmail
end
God.contact(:email) do |c|
c.name = 'nathan'
c.group = 'developers'
c.to_email = 'nathan@codepath.com'
end
God.contact(:email) do |c|
c.name = 'tim'
c.group = 'developers'
c.to_email = 'tim@codepath.com'
end
God.watch do |w|
w.name = "memcached"
w.interval = 30.seconds
w.start = "service memcached start"
w.stop = "service memcached stop"
w.restart = "service memcached restart"
w.start_grace = 15.seconds
w.restart_grace = 15.seconds
w.pid_file = "/var/run/memcached.pid"
w.log = "/var/log/god/god.log"
w.behavior(:clean_pid_file)
w.start_if do |start|
start.condition(:process_running) do |c|
c.interval = 5.seconds
c.running = false
end
end
w.restart_if do |restart|
restart.condition(:memory_usage) do |c|
c.above = 1500.megabytes
c.times = [3, 5] # 3 out of 5 intervals
c.notify = ['nathan', 'tim']
end
restart.condition(:cpu_usage) do |c|
c.above = 50.percent
c.times = 5
c.notify = ['nathan', 'tim']
end
end
# If this watch is started or restarted five times withing 5 minutes, then unmonitor it
#...then after ten minutes, monitor it again to see if it was just a temporary problem;
#if the process is seen to be flapping five times within two hours, then give up completely.
w.lifecycle do |on|
on.condition(:flapping) do |c|
c.to_state = [:start, :restart]
c.times = 5
c.within = 5.minute
c.transition = :unmonitored
c.retry_in = 10.minutes
c.retry_times = 5
c.retry_within = 2.hours
c.notify = ['nathan', 'tim']
end
end
end
God.watch do |w|
w.name = "mongodb"
w.interval = 30.seconds # default
w.start = "service mongodb start"
w.stop = "service mongodb stop"
w.restart = "service mongodb restart"
w.start_grace = 20.seconds
w.restart_grace = 20.seconds
w.pid_file = "/var/run/mongodb.pid"
w.log = "/var/log/mongodb/mongodb.log"
w.behavior(:clean_pid_file)
# determine the state on startup
w.transition(:init, { true => :up, false => :start }) do |on|
on.condition(:process_running) do |c|
c.running = true
end
end
# determine when process has finished starting
w.transition([:start, :restart], :up) do |on|
on.condition(:process_running) do |c|
c.running = true
end
# failsafe
on.condition(:tries) do |c|
c.times = 8
c.within = 2.minutes
c.transition = :start
c.notify = ['developers']
end
end
# start if process is not running
w.transition(:up, :start) do |on|
on.condition(:process_exits)
end
# lifecycle
w.lifecycle do |on|
on.condition(:flapping) do |c|
c.to_state = [:start, :restart]
c.times = 5
c.within = 1.minute
c.transition = :unmonitored
c.retry_in = 10.minutes
c.retry_times = 5
c.retry_within = 2.hours
c.notify = ['developers']
end
end
end
God.watch do |w|
w.name = "mysql"
# polling interval
w.interval = 30.seconds
# command to start service
w.start = "/etc/init.d/mysqld start && /etc/init.d/httpd restart"
# command to stop service
w.stop = "/etc/init.d/mysqld stop"
# command to restart service
w.restart = "/etc/init.d/mysqld restart && /etc/init.d/httpd restart"
# how long to wait after starting service before monitoring resumes
w.start_grace = 20.seconds
# how long to wait after restarting service before monitoring resumes
w.restart_grace = 20.seconds
# location of pid file
w.pid_file = "/var/run/mysqld/mysqld.pid"
# tell god to delete the pid file when mysqld crashes
w.behavior(:clean_pid_file)
# determine the state on startup
w.transition(:init, { true => :up, false => :start }) do |on|
on.condition(:process_running) do |c|
c.running = true
end
end
# determine when process has finished starting
w.transition([:start, :restart], :up) do |on|
on.condition(:process_running) do |c|
c.running = true
end
# failsafe
on.condition(:tries) do |c|
c.times = 8
c.within = 2.minutes
c.transition = :start
end
end
# start if process is not running
w.transition(:up, :start) do |on|
on.condition(:process_exits) do |c|
# send an email to me to notify me that the service has crashed
c.notify = ['nathan', 'tim']
end
end
# lifecycle
w.lifecycle do |on|
# If the service keeps triggering a restart over and over, it is considered to be "flapping".
on.condition(:flapping) do |c|
c.to_state = [:start, :restart]
c.times = 5
c.within = 1.minute
c.transition = :unmonitored
# If the service is flapping, wait 10 minutes, then try to start/restart again.
c.retry_in = 10.minutes
c.retry_times = 5
c.retry_within = 2.hours
end
end
end
God.watch do |w|
w.name = "nginx"
w.interval = 30.seconds # default
w.start = "service nginx start"
w.stop = "service nginx stop"
w.restart = "service nginx restart"
w.start_grace = 20.seconds
w.restart_grace = 20.seconds
w.pid_file = "/opt/nginx/logs/nginx.pid"
w.log = "/var/log/god/nginx.log"
w.behavior(:clean_pid_file)
# determine the state on startup
w.transition(:init, { true => :up, false => :start }) do |on|
on.condition(:process_running) do |c|
c.running = true
end
end
# determine when process has finished starting
w.transition([:start, :restart], :up) do |on|
on.condition(:process_running) do |c|
c.running = true
end
# failsafe
on.condition(:tries) do |c|
c.times = 8
c.within = 2.minutes
c.transition = :start
c.notify = ['nathan', 'tim']
end
end
# start if process is not running
w.transition(:up, :start) do |on|
on.condition(:process_exits)
end
w.transition(:up, :restart) do |on|
on.condition(:http_response_code) do |c|
c.host = 'localhost'
c.port = 80
c.path = '/monitor.html'
c.code_is_not = 200
c.timeout = 10.seconds
c.times = [3, 5]
c.notify = ['nathan', 'tim']
end
end
# lifecycle
w.lifecycle do |on|
on.condition(:flapping) do |c|
c.to_state = [:start, :restart]
c.times = 5
c.within = 1.minute
c.transition = :unmonitored
c.retry_in = 10.minutes
c.retry_times = 5
c.retry_within = 2.hours
c.notify = ['nathan', 'tim']
end
end
end
God.watch do |w|
w.name = "postgresql"
# polling interval
w.interval = 30.seconds
# command to start service
w.start = "/etc/init.d/postgresql start"
# command to stop service
w.stop = "/etc/init.d/postgresql stop"
# command to restart service
w.restart = "/etc/init.d/postgresql restart"
# how long to wait after starting service before monitoring resumes
w.start_grace = 20.seconds
# how long to wait after restarting service before monitoring resumes
w.restart_grace = 20.seconds
# location of pid file
w.pid_file = "/var/run/postgresql/9.3-main.pid"
# tell god to delete the pid file when mysqld crashes
w.behavior(:clean_pid_file)
# determine the state on startup
w.transition(:init, { true => :up, false => :start }) do |on|
on.condition(:process_running) do |c|
c.running = true
end
end
# determine when process has finished starting
w.transition([:start, :restart], :up) do |on|
on.condition(:process_running) do |c|
c.running = true
end
# failsafe
on.condition(:tries) do |c|
c.times = 8
c.within = 2.minutes
c.transition = :start
end
end
# start if process is not running
w.transition(:up, :start) do |on|
on.condition(:process_exits) do |c|
# send an email to me to notify me that the service has crashed
c.notify = ['nathan', 'tim']
end
end
# lifecycle
w.lifecycle do |on|
# If the service keeps triggering a restart over and over, it is considered to be "flapping".
on.condition(:flapping) do |c|
c.to_state = [:start, :restart]
c.times = 5
c.within = 1.minute
c.transition = :unmonitored
# If the service is flapping, wait 10 minutes, then try to start/restart again.
c.retry_in = 10.minutes
c.retry_times = 5
c.retry_within = 2.hours
end
end
end
# Redis
God.watch do |w|
w.name = "redis"
w.interval = 30.seconds
w.start = "/etc/init.d/redis-server start"
w.stop = "/etc/init.d/redis-server stop"
w.restart = "/etc/init.d/redis-server restart"
w.start_grace = 10.seconds
w.restart_grace = 10.seconds
w.log = "/var/log/god/god.log"
# this depends on an /etc/init.d/redis script that handles creating a pid file here
w.pid_file = '/var/run/redis.pid'
w.behavior(:clean_pid_file)
w.start_if do |start|
start.condition(:process_running) do |c|
c.interval = 5.seconds
c.running = false
end
end
w.restart_if do |restart|
restart.condition(:memory_usage) do |c|
c.above = 5000.megabytes
c.times = [3, 5] # 3 out of 5 intervals
c.notify = ['nathan', 'tim']
end
restart.condition(:cpu_usage) do |c|
c.above = 92.percent
c.times = 5
c.notify = ['nathan', 'tim']
end
end
# If this watch is started or restarted five times withing 5 minutes, then unmonitor it
#...then after ten minutes, monitor it again to see if it was just a temporary problem;
#if the process is seen to be flapping five times within two hours, then give up completely.
w.lifecycle do |on|
on.condition(:flapping) do |c|
c.to_state = [:start, :restart]
c.times = 5
c.within = 5.minute
c.transition = :unmonitored
c.retry_in = 10.minutes
c.retry_times = 5
c.retry_within = 2.hours
c.notify = ['nathan', 'tim']
end
on.condition(:memory_usage) do |c|
c.above = 5000.megabytes
c.times = 2
c.notify = ['nathan', 'tim']
end
end
end
# Resque
God.watch do |w|
w.name = "resque-1.8.0"
w.interval = 30.seconds
w.start = "cd /var/www/apps/firefly/current && rake environment RAILS_ENV=production resque:work QUEUE=high,medium,low"
w.start_grace = 10.seconds
# retart if memory gets too high
w.transition(:up, :restart) do |on|
on.condition(:memory_usage) do |c|
c.above = 350.megabytes
c.times = 2
end
end
# determine the state on startup
w.transition(:init, { true => :up, false => :start }) do |on|
on.condition(:process_running) do |c|
c.running = true
end
end
# determine when process has finished starting
w.transition([:start, :restart], :up) do |on|
on.condition(:process_running) do |c|
c.running = true
c.interval = 5.seconds
end
# failsafe
on.condition(:tries) do |c|
c.times = 5
c.transition = :start
c.interval = 5.seconds
end
end
# start if process is not running
w.transition(:up, :start) do |on|
on.condition(:process_running) do |c|
c.running = false
end
end
end
God.watch do |w|
w.name = "sphinx"
w.interval = 30.seconds
w.start = "searchd --config #{RAILS_ROOT}/config/production.sphinx.conf"
w.stop = "searchd --config #{RAILS_ROOT}/config/production.sphinx.conf --stop"
w.restart = w.stop + " && " + w.start
w.start_grace = 15.seconds
w.stop_grace = 15.seconds
w.restart_grace = 15.seconds
w.log = "/var/log/god/god.log"
w.uid = 'deploy'
w.gid = 'deploy'
w.pid_file = "#{RAILS_ROOT}/var/run/searchd.pid"
w.behavior(:clean_pid_file)
w.start_if do |start|
start.condition(:process_running) do |c|
c.interval = 5.seconds
c.running = false
end
end
w.restart_if do |restart|
restart.condition(:memory_usage) do |c|
c.above = 100.megabytes
c.times = [3, 5] # 3 out of 5 intervals
c.notify = ['nathan', 'tim', 'joshua']
end
end
w.lifecycle do |on|
on.condition(:flapping) do |c|
c.to_state = [:start, :restart]
c.times = 5
c.within = 5.minutes
c.transition = :unmonitored
c.retry_in = 10.minutes
c.retry_times = 5
c.retry_within = 2.hours
c.notify = ['nathan', 'tim', 'joshua']
end
end
end
# Varnish
God.watch do |w|
w.name = "varnish"
w.interval = 30.seconds
w.start = "/etc/init.d/varnish start"
w.stop = "/etc/init.d/varnish stop"
w.restart = "/etc/init.d/varnish restart"
w.start_grace = 10.seconds
w.restart_grace = 10.seconds
w.log = "/var/log/god/god.log"
# this depends on an /etc/init.d/varnish script that handles creating a pid file here
w.pid_file = '/var/run/varnishd.pid'
w.behavior(:clean_pid_file)
w.start_if do |start|
start.condition(:process_running) do |c|
c.interval = 5.seconds
c.running = false
end
end
w.restart_if do |restart|
restart.condition(:memory_usage) do |c|
c.above = 3500.megabytes
c.times = [3, 5] # 3 out of 5 intervals
c.notify = ['nathan', 'tim']
end
restart.condition(:cpu_usage) do |c|
c.above = 60.percent
c.times = 5
c.notify = ['nathan', 'tim']
end
end
# If this watch is started or restarted five times withing 5 minutes, then unmonitor it
#...then after ten minutes, monitor it again to see if it was just a temporary problem;
#if the process is seen to be flapping five times within two hours, then give up completely.
w.lifecycle do |on|
on.condition(:flapping) do |c|
c.to_state = [:start, :restart]
c.times = 5
c.within = 5.minute
c.transition = :unmonitored
c.retry_in = 10.minutes
c.retry_times = 5
c.retry_within = 2.hours
c.notify = ['nathan', 'tim']
end
on.condition(:memory_usage) do |c|
c.above = 3000.megabytes
c.times = 2
c.notify = ['nathan', 'tim']
end
end
end
@nicotaing
Copy link

eggdrop god configuration are here

@nesquena
Copy link
Author

beanstalk and rabbitmq here

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment