Skip to content

Instantly share code, notes, and snippets.

@hector
Created October 2, 2012 21:23
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save hector/3823395 to your computer and use it in GitHub Desktop.
Save hector/3823395 to your computer and use it in GitHub Desktop.
Daemon to kill stale resque workers
rails_path = ENV['RAILS_ROOT'] || File.expand_path(File.join(File.dirname(__FILE__), '..', '..'))
God.watch do |w|
w.dir = rails_path
w.name = "stale_workers"
w.group = "vocsyn"
w.interval = 30.seconds
w.uid = 'deployer'
w.gid = 'staff'
script = File.join(File.dirname(__FILE__), 'stale_workers.rb')
pid_file = File.join rails_path, 'tmp', 'pids', 'stale_workers.pid'
log = File.join rails_path, 'log', 'stale_workers.log'
w.start = "ruby #{script} -p #{pid_file} -l #{log} -D"
w.stop = "kill -s QUIT `cat #{pid_file}`"
w.restart = "#{w.start} && #{w.stop}"
w.start_grace = 10.seconds
w.restart_grace = 10.seconds
w.start_if do |start|
start.condition(:process_running) do |c|
c.interval = 5.seconds
c.running = false
end
end
end
#!/usr/bin/env ruby-local-exec
#
# This will kill any rogue stale worker processes. Their sacrifice is for the greater good.
require 'fileutils'
require 'optparse'
# Input options--------------------------------------------------------------------------
options = {}
OptionParser.new do |opts|
opts.banner = "Usage: stale_workers.rb [options]"
opts.on("-p", "--pid [PID_FILE]", "File where the PID of this process will be saved") do |o|
options[:pid_file] = o
end
opts.on("-D", "--daemon", "Daemonize process") do |o|
options[:daemon] = o
end
opts.on("-l", "--log [LOG_FILE]", "File to save LOG_FILE") do |o|
options[:log] = o
end
opts.on("--timeout N", Float, "Kill a worker after it has been N minutes alive (default 20)") do |n|
if n <= 0
puts "Timeout must be greater than 0"
exit
end
options[:timeout] = n
end
opts.on("--sleep N", Float, "Check for stale workers every N seconds (default 30)") do |n|
if n <= 0
puts "Check must be greater than 0"
exit
end
options[:sleep] = n
end
# No argument, shows at tail. This will print an options summary.
# Try it and see!
opts.on_tail("-h", "--help", "Show this message") do
puts opts
exit
end
end.parse!
options[:sleep] ||= 30 # check every N seconds
options[:timeout] ||= 20 # default timeout is 20 minutes
$options = options
# ---------------------------------------------------------------------------------------
def daemon_log(str)
puts "[#{Time.now.strftime("%m/%d/%Y-%H:%M:%S")}] #{str}"
end
def create_pid_file
if $options[:pid_file]
begin
File.open($options[:pid_file], 'w') {|f| f.write(::Process.pid.to_s) }
rescue Errno::EACCES
daemon_log("Cannot create PID file. Check the permissions and try again!")
exit
end
end
end
def erase_pid_file
File.delete $options[:pid_file] if $options[:pid_file]
end
def set_log_file
if $options[:log]
STDIN.reopen $options[:log]
STDOUT.reopen $options[:log], "a"
STDERR.reopen $options[:log], "a"
end
end
def daemon_start
if $options[:pid_file] and File.exist?($options[:pid_file])
daemon_log("Process already running. If it`s not - remove the pid file")
exit
end
begin
FileUtils.touch($options[:log]) if $options[:log]
rescue Errno::EACCES
daemon_log("Cannot create LOG file. Check the permissions and try again!")
exit
end
daemon_log("Starting process...")
begin
::Process.daemon if $options[:daemon]
rescue Errno
daemon_log "Failed to daemonize! Probably your OS is incompatible with fork()"
end
create_pid_file
set_log_file
daemon_handle_signals
daemon_work
end
def daemon_terminate(signal = nil)
daemon_log "#{signal} signal received." if signal
erase_pid_file
::Process.exit
end
def daemon_handle_signals
# termination signal
trap("TERM") { daemon_terminate "TERM" }
# kill signal
trap("KILL") { daemon_terminate "KILL" }
# keyboard interruption
trap("INT") { daemon_terminate "INT" }
begin
trap("QUIT") { daemon_terminate "QUIT" }
rescue ArgumentError
puts "Signal QUIT not supported."
end
end
def worker_pid(resque_processes, job_id)
resque_processes.each do |line|
parts = line.split ' '
next if parts[-2] != 'since' or parts[-1] != job_id
return parts[0].to_i
end
nil
end
def daemon_work
daemon_log "Working!"
loop do
begin
resque_processes = `ps -e -o pid,command | grep [r]esque`.split("\n")
resque_processes.each do |line|
parts = line.split(' ')
next if parts[-2] != "at"
started = parts[-1].to_i
elapsed = Time.now - Time.at(started)
if elapsed >= $options[:timeout] * 60
::Process.kill('USR1', parts[0].to_i)
msg = "\e[31mJob killed!\e[0m\n"
msg << " job pid: #{parts[0]}\n"
w_pid = worker_pid resque_processes, parts[0]
msg << " worker pid: #{w_pid}\n\n" if w_pid
msg << " time elapsed: #{(elapsed/60).truncate} minutes #{(elapsed%60).truncate} seconds"
daemon_log msg
end
end
rescue
# don't die because of stupid exceptions
daemon_log("Error: #{e.class} - #{e.message}")
end
sleep $options[:sleep]
end
daemon_terminate
end
daemon_start
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment