Created
August 22, 2017 13:18
-
-
Save walac/bddbca20afe84611336dba808c75c8e7 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import sys | |
from datetime import datetime | |
import psycopg2 | |
import pytz | |
lower = int(sys.argv[1]) | |
upper = int(sys.argv[2]) | |
threshold = None | |
try : | |
threshold = int(sys.argv[3]) | |
except ValueError: | |
threshold = float(sys.argv[3]) | |
workers = ["t-yosemite-r7-{0:04d}".format(x) for x in range(lower, upper)] | |
total_broken_workers = 0 | |
conn = psycopg2.connect(os.environ['PGCONNECTIONSTRING']) | |
cur = conn.cursor() | |
try: | |
for worker in workers: | |
cur.execute("select state, started, resolved from tasks where worker_id = %s order by modified desc limit 1", (worker,)) | |
if cur.rowcount == 0: | |
total_broken_workers += 1 | |
print "{} dead forever".format(worker) | |
continue | |
for r in cur: | |
if r[0] == 'running': | |
elapsed = ((datetime.utcnow() - r[1]).total_seconds())/3600.0 | |
if elapsed > threshold: | |
total_broken_workers += 1 | |
print "{} dead for {} hours".format(worker, elapsed) | |
break | |
# If the task is not running, assume that is has been resolved | |
elapsed = ((datetime.utcnow() - r[2]).total_seconds())/3600.0 | |
if elapsed > threshold: | |
total_broken_workers += 1 | |
print "{} dead for {} hours".format(worker, elapsed) | |
break | |
finally: | |
cur.close() | |
conn.close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment