Created
October 2, 2012 21:23
-
-
Save hector/3823395 to your computer and use it in GitHub Desktop.
Daemon to kill stale resque workers
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
rails_path = ENV['RAILS_ROOT'] || File.expand_path(File.join(File.dirname(__FILE__), '..', '..')) | |
God.watch do |w| | |
w.dir = rails_path | |
w.name = "stale_workers" | |
w.group = "vocsyn" | |
w.interval = 30.seconds | |
w.uid = 'deployer' | |
w.gid = 'staff' | |
script = File.join(File.dirname(__FILE__), 'stale_workers.rb') | |
pid_file = File.join rails_path, 'tmp', 'pids', 'stale_workers.pid' | |
log = File.join rails_path, 'log', 'stale_workers.log' | |
w.start = "ruby #{script} -p #{pid_file} -l #{log} -D" | |
w.stop = "kill -s QUIT `cat #{pid_file}`" | |
w.restart = "#{w.start} && #{w.stop}" | |
w.start_grace = 10.seconds | |
w.restart_grace = 10.seconds | |
w.start_if do |start| | |
start.condition(:process_running) do |c| | |
c.interval = 5.seconds | |
c.running = false | |
end | |
end | |
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env ruby-local-exec | |
# | |
# This will kill any rogue stale worker processes. Their sacrifice is for the greater good. | |
require 'fileutils' | |
require 'optparse' | |
# Input options-------------------------------------------------------------------------- | |
options = {} | |
OptionParser.new do |opts| | |
opts.banner = "Usage: stale_workers.rb [options]" | |
opts.on("-p", "--pid [PID_FILE]", "File where the PID of this process will be saved") do |o| | |
options[:pid_file] = o | |
end | |
opts.on("-D", "--daemon", "Daemonize process") do |o| | |
options[:daemon] = o | |
end | |
opts.on("-l", "--log [LOG_FILE]", "File to save LOG_FILE") do |o| | |
options[:log] = o | |
end | |
opts.on("--timeout N", Float, "Kill a worker after it has been N minutes alive (default 20)") do |n| | |
if n <= 0 | |
puts "Timeout must be greater than 0" | |
exit | |
end | |
options[:timeout] = n | |
end | |
opts.on("--sleep N", Float, "Check for stale workers every N seconds (default 30)") do |n| | |
if n <= 0 | |
puts "Check must be greater than 0" | |
exit | |
end | |
options[:sleep] = n | |
end | |
# No argument, shows at tail. This will print an options summary. | |
# Try it and see! | |
opts.on_tail("-h", "--help", "Show this message") do | |
puts opts | |
exit | |
end | |
end.parse! | |
options[:sleep] ||= 30 # check every N seconds | |
options[:timeout] ||= 20 # default timeout is 20 minutes | |
$options = options | |
# --------------------------------------------------------------------------------------- | |
def daemon_log(str) | |
puts "[#{Time.now.strftime("%m/%d/%Y-%H:%M:%S")}] #{str}" | |
end | |
def create_pid_file | |
if $options[:pid_file] | |
begin | |
File.open($options[:pid_file], 'w') {|f| f.write(::Process.pid.to_s) } | |
rescue Errno::EACCES | |
daemon_log("Cannot create PID file. Check the permissions and try again!") | |
exit | |
end | |
end | |
end | |
def erase_pid_file | |
File.delete $options[:pid_file] if $options[:pid_file] | |
end | |
def set_log_file | |
if $options[:log] | |
STDIN.reopen $options[:log] | |
STDOUT.reopen $options[:log], "a" | |
STDERR.reopen $options[:log], "a" | |
end | |
end | |
def daemon_start | |
if $options[:pid_file] and File.exist?($options[:pid_file]) | |
daemon_log("Process already running. If it`s not - remove the pid file") | |
exit | |
end | |
begin | |
FileUtils.touch($options[:log]) if $options[:log] | |
rescue Errno::EACCES | |
daemon_log("Cannot create LOG file. Check the permissions and try again!") | |
exit | |
end | |
daemon_log("Starting process...") | |
begin | |
::Process.daemon if $options[:daemon] | |
rescue Errno | |
daemon_log "Failed to daemonize! Probably your OS is incompatible with fork()" | |
end | |
create_pid_file | |
set_log_file | |
daemon_handle_signals | |
daemon_work | |
end | |
def daemon_terminate(signal = nil) | |
daemon_log "#{signal} signal received." if signal | |
erase_pid_file | |
::Process.exit | |
end | |
def daemon_handle_signals | |
# termination signal | |
trap("TERM") { daemon_terminate "TERM" } | |
# kill signal | |
trap("KILL") { daemon_terminate "KILL" } | |
# keyboard interruption | |
trap("INT") { daemon_terminate "INT" } | |
begin | |
trap("QUIT") { daemon_terminate "QUIT" } | |
rescue ArgumentError | |
puts "Signal QUIT not supported." | |
end | |
end | |
def worker_pid(resque_processes, job_id) | |
resque_processes.each do |line| | |
parts = line.split ' ' | |
next if parts[-2] != 'since' or parts[-1] != job_id | |
return parts[0].to_i | |
end | |
nil | |
end | |
def daemon_work | |
daemon_log "Working!" | |
loop do | |
begin | |
resque_processes = `ps -e -o pid,command | grep [r]esque`.split("\n") | |
resque_processes.each do |line| | |
parts = line.split(' ') | |
next if parts[-2] != "at" | |
started = parts[-1].to_i | |
elapsed = Time.now - Time.at(started) | |
if elapsed >= $options[:timeout] * 60 | |
::Process.kill('USR1', parts[0].to_i) | |
msg = "\e[31mJob killed!\e[0m\n" | |
msg << " job pid: #{parts[0]}\n" | |
w_pid = worker_pid resque_processes, parts[0] | |
msg << " worker pid: #{w_pid}\n\n" if w_pid | |
msg << " time elapsed: #{(elapsed/60).truncate} minutes #{(elapsed%60).truncate} seconds" | |
daemon_log msg | |
end | |
end | |
rescue | |
# don't die because of stupid exceptions | |
daemon_log("Error: #{e.class} - #{e.message}") | |
end | |
sleep $options[:sleep] | |
end | |
daemon_terminate | |
end | |
daemon_start |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment