Skip to content

Instantly share code, notes, and snippets.

@RaphaelAudet
Created September 30, 2014 13:16
Show Gist options
  • Save RaphaelAudet/4c5bd9e6b3d00021d266 to your computer and use it in GitHub Desktop.
Save RaphaelAudet/4c5bd9e6b3d00021d266 to your computer and use it in GitHub Desktop.
Resque Stalled Alert : this script can check if a resque queue is overflowing or a job is "stuck"
require 'date'
require 'redis'
require 'net/smtp'
require 'resque'
require 'resque/version'
class App
VERSION = '0.0.1'
def initialize
# Development parameters
redis_host = '127.0.0.1'
redis_password = 'password'
redis_port = 6379
@redis_namespace = 'resque:app_development'
@verbose = true # set to false in production
@veryverbose = true # set to false in production and in staging
@cycle = 3 # check resque every @cycle seconds
@alert_queue_length_threshold = 300 # over this threshold an alert will be sent
@alert_job_stuck_threshold = 300 # over this threshold an alert will be sent, in seconds
#email settings
#Senders and Recipients
@send_email = false # set to true is you want to send alert emails
@from_mail = 'me@email.com'
@to_mail = 'you@email.com'
@from_name = 'Resque Queue Monitoring Robot'
@to_name = 'Monitoring team'
#Servers and Authentication
@smtp_host = '127.0.0.1'
@smtp_port = 25
@smtp_domain = 'email.com'
@smtp_user = 'mailer@email.com'
@smtp_pwd = 'password'
@redis = Redis.new(:host => redis_host, :port => redis_port, :password => redis_password, :thread_safe => true)
Resque.redis = @redis
Resque.redis.namespace = @redis_namespace
@prev_stat = [0,0,0,0,0,0,0,0,0,0,0,0]
@sent_job_alert = ''
end
def run
puts "Start at #{DateTime.now}\n\n" if @verbose
check_queue_length
check_job_stuck
puts "\nFinished at #{DateTime.now}" if @verbose
sleep @cycle
end
protected
def check_queue_length
queues = Resque.queues
stat = []
index = 0
queues.each do |q|
size = Resque.size(q)
prev_size = @prev_stat[index]
stat << size
puts (q + ' ; ' + size.to_s) if @verbose
if prev_size < @alert_queue_length_threshold
if size > @alert_queue_length_threshold
alert_queue_length(q,size)
end
end
index += 1
end
puts queues.to_s if @veryverbose
@prev_stat = stat
write_stat(queues, stat)
end
def write_stat(queues, stat)
puts stat.join(", ") if @veryverbose
log_file = "/tmp/log_file_#{@rails_env}_#{Date.today.to_s}"
File.open(log_file, 'a') do |f|
f.puts (DateTime.now.to_s + ', ' + queues.join(', ') + ', ' + stat.join(", "))
end
end
def alert_queue_length(queue, nb_jobs)
subj = "#{@rails_env} Warning Resque:#{queue} has #{nb_jobs} jobs"
msg_body ="the resque #{@redis_namespace} queue #{queue} is #{nb_jobs} jobs long. (alerting over #{@alert_queue_length_threshold} jobs)"
puts subj if @verbose
puts msg_body if @veryverbose
email(subj, msg_body) if @send_email
@sent_queue_alert = true
end
def check_job_stuck
workers = Resque.workers
for worker in (workers = workers.sort_by { |w| w.to_s })
data = worker.processing || {}
if data['queue']
job = data['payload']['class'] + ':' + data['run_at'] + ':' + data[':args'].to_s
max_tim = DateTime.strptime(data['run_at']).to_time + @alert_job_stuck_threshold
alert_job_stuck(worker, job) if max_tim < Time.now && !@sent_job_alert.include?(job)
end
puts (worker.to_s + ' ; ' + job.to_s) if @verbose
end
end
def alert_job_stuck(worker, job_title)
subj = "#{@rails_env} Warning: #{job_title} stuck on #{worker}"
msg_body ="the job #{job_title} is running for more than #{@alert_job_stuck_threshold/60} minutes on on #{worker}"
puts subj if @verbose
puts msg_body if @veryverbose
email(subj, msg_body) if @send_email
@sent_job_alert << job_title
end
def email(subj, msg_body)
#The subject and the message
t = Time.now
#The date/time should look something like: Thu, 03 Jan 2006 12:33:22 -0700
msg_date = t.strftime("%a, %d %b %Y %H:%M:%S %z")
#Compose the message for the email
msg = <<END_OF_MESSAGE
Date: #{@msg_date}
From: #{@from_name} <#{@from_mail}>
To: #{@to_name} <#{@to_mail}>
Subject: #{subj}
#{msg_body}
END_OF_MESSAGE
Net::SMTP.start(@smtp_host, @smtp_port, @smtp_domain) do |smtp|
smtp.send_message msg, @smtp_user, @to_mail
end
end
end
# Create and run the application
app = App.new
loop do
app.run
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment