Skip to content

Instantly share code, notes, and snippets.

@darkside
Created December 28, 2010 18:27
Show Gist options
  • Save darkside/757512 to your computer and use it in GitHub Desktop.
Save darkside/757512 to your computer and use it in GitHub Desktop.
Resque God Recipe
# ------------------------------------------------------------------------------
# Resque
# Manages background jobs and does all the hard work so we dont have to.
#
# Notes:
# This configuration assumes we are only using one background worker for our
# tasks. This makes it easier to let god take care of it, since there's only
# one pid to control.
#
# There's also a hackish, ugly thread that runs with it to kill anyone stuck,
# their sacrifice is for the greater good.
#
# Anyway, the start command does give the correct pid of the worker, feel free
# to refactor this nicely to allow multiple workers to live.
# ------------------------------------------------------------------------------
resque_service = "<%= resque_service %>"
resque_pid_file = "/var/run/god/#{resque_service}.pid"
resque_user = "<%= user %>"
resque_group = "<%= user %>"
God.watch do |w|
w.name = resque_service
w.group = '<%= application %>'
w.log = "#{rails_root}/log/resque.log"
w.interval = 60.seconds
w.start = "/usr/bin/rake -f #{rails_root}/Rakefile environment resque:work"
w.stop = "kill -QUIT `cat #{w.pid_file}`"
w.dir = rails_root
w.env = god_environment
w.uid = resque_user
w.gid = resque_group
w.start_grace = 20.seconds
w.restart_grace = 20.seconds
w.behavior(:clean_pid_file)
# TODO: refactor everything below to use helper method "generic_monitor"
# retart if memory gets too high
w.transition(:up, :restart) do |on|
on.condition(:memory_usage) do |c|
c.above = 350.megabytes
c.times = 2
end
end
# determine the state on startup
w.transition(:init, { true => :up, false => :start }) do |on|
on.condition(:process_running) do |c|
c.running = true
end
end
# determine when process has finished starting
w.transition([:start, :restart], :up) do |on|
on.condition(:process_running) do |c|
c.running = true
c.interval = 5.seconds
end
# failsafe
on.condition(:tries) do |c|
c.times = 5
c.transition = :start
c.interval = 5.seconds
end
end
# start if process is not running
w.transition(:up, :start) do |on|
on.condition(:process_running) do |c|
c.running = false
end
end
# lifecycle
w.lifecycle do |on|
on.condition(:flapping) do |c|
c.to_state = [:start, :restart]
c.times = 5
c.within = 5.minute
c.transition = :unmonitored
c.retry_in = 10.minutes
c.retry_times = 5
c.retry_within = 2.hours
end
end
end
# ------------------------------------------------------------------------------
# Resque Workers Suicide
# ------------------------------------------------------------------------------
# This will ride alongside god and kill any rogue stale worker
# processes. Their sacrifice is for the greater good.
# ------------------------------------------------------------------------------
WORKER_TIMEOUT = 60 * 10 # 10 minutes
Thread.new do
loop do
begin
`ps -e -o pid,command | grep [r]esque`.split("\n").each do |line|
parts = line.split(' ')
next if parts[-2] != "at"
started = parts[-1].to_i
elapsed = Time.now - Time.at(started)
if elapsed >= WORKER_TIMEOUT
::Process.kill('USR1', parts[0].to_i)
end
end
rescue
# don't die because of stupid exceptions
nil
end
sleep 30
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment