Created
June 5, 2013 08:27
-
-
Save jobwat/5712437 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# inspired from: https://gist.github.com/vitobotta/2783513 | |
threshold=300 # after 5min of uptime, a job is considered 'stuck', to kill | |
logfile=log/dead_workers_killed.log | |
function ps_etime_to_seconds() # cheers user000001 - http://stackoverflow.com/questions/14652445/parse-ps-etime-output-into-seconds#14653443 | |
{ | |
echo $1 | awk -F $':' -f <(cat - <<-'EOF' | |
{ | |
if (NF == 2) { | |
print $1*60 + $2 | |
} else if (NF == 3) { | |
split($1, a, "-"); | |
if (a[2] > 0) { | |
print ((a[1]*24+a[2])*60 + $2) * 60 + $3; | |
} else { | |
print ($1*60 + $2) * 60 + $3; | |
} | |
} | |
} | |
EOF | |
) | |
} | |
ps -eo pid,etime,command | grep "[r]esque" | grep "Processing" | while read PID UPTIME COMMAND; do | |
SECONDS=`ps_etime_to_seconds $UPTIME` | |
#echo "$PID, $COMMAND, $UPTIME (${SECONDS}s)" | |
if `kill -0 $PID`; then | |
if [ $SECONDS -gt $threshold ]; then | |
kill -9 $PID | |
QUEUE=`echo "$COMMAND" | cut -d ' ' -f 3` | |
echo " The forked child with pid #$PID (queue: $QUEUE) was found stuck for longer than $threshold seconds. RIP" >> $logfile | |
fi | |
fi | |
done | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
FYI if you are using 'reque-scheduler' this script can kill the scheduler, since the schedule can be "Processing" a delayed job.