Skip to content

Instantly share code, notes, and snippets.

@maxfischer2781
Created September 23, 2016 14:23
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save maxfischer2781/5c3bb079fb730e7242267cdb326866ce to your computer and use it in GitHub Desktop.
Save maxfischer2781/5c3bb079fb730e7242267cdb326866ce to your computer and use it in GitHub Desktop.
Hook for HTCondor job_router that removes its job from a route
#!/usr/bin/python
from __future__ import print_function, with_statement
import sys
import ast
import subprocess
import time
route_test_performed, want_route_test = False, False
# read input so router does not stall
# we only need ClusterId and ProcId, but the others are useful for
# debugging and should be defined
for line in sys.stdin:
if not line.strip():
continue
key, value = (elem.strip() for elem in line.split('=', 1))
if key.lower() == "routedfromjobid":
real_job_id = ast.literal_eval(value) # "78249.0"
elif key.lower() == "routename":
route_name = ast.literal_eval(value) # "MyRoute"
elif key.lower() == "routetestperformed":
route_test_performed = True
elif key.lower() == "clusterid":
cluster_id = ast.literal_eval(value) # 78251
elif key.lower() == "procid":
proc_id = ast.literal_eval(value) # 3
elif key.lower() == "wantroutetest":
want_route_test = value
if not want_route_test:
sys.exit(0)
try:
assert cluster_id and proc_id and real_job_id and route_name
except NameError as err:
print(err, file=sys.stderr)
sys.exit(0)
job_id = '%d.%d' % (cluster_id, proc_id)
assert job_id != real_job_id, 'Somebody set us up the inplace transformed job! Abandon hooks!'
if route_test_performed:
with open('/tmp/htc_exit_hook.log', 'a') as log_file:
log_file.write('[%s] job %s => %s survived previous route test\n' % (time.asctime(), job_id, real_job_id))
with open('/tmp/htc_exit_hook.log', 'a') as log_file:
log_file.write('[%s] job %s => %s about to be removed\n' % (time.asctime(), job_id, real_job_id))
# mark us as routed already
print('RouteRestPerformed = True')
# remove the ROUTED job
condor_rm = subprocess.Popen(('condor_rm', str(job_id)), stdout=subprocess.PIPE, stderr=subprocess.PIPE)
stderr, stdout = condor_rm.communicate()
if condor_rm.poll() == 0:
with open('/tmp/htc_exit_hook.log', 'a') as log_file:
log_file.write('[%s] job %s => %s marked for removal\n' % (time.asctime(), job_id, real_job_id))
else:
with open('/tmp/htc_exit_hook.log', 'a') as log_file:
log_file.write('[%s] job %s => %s removal failed: %s\n' % (time.asctime(), job_id, real_job_id, condor_rm.poll()))
print('rmexit: ', condor_rm.poll(), file=sys.stderr)
print('stdout:\n', stdout, file=sys.stderr)
print('stderr:\n', stderr, file=sys.stderr)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment