Skip to content

Instantly share code, notes, and snippets.

@Nepomuk
Last active August 29, 2015 14:19
Show Gist options
  • Save Nepomuk/f3e2840705b2b804f403 to your computer and use it in GitHub Desktop.
Save Nepomuk/f3e2840705b2b804f403 to your computer and use it in GitHub Desktop.
Resubmit jobs on Sun Grid Engine
#!/bin/bash
# restart all currently running jobs which are hanging for some reason
if [ "$1" == "qstat" ]; then
qstat=$(qstat -u $USER)
jobIDs=$(echo "$qstat" | awk -v col=1 'NR > 2 {print $col}' | sort | uniq)
taskIDs=$(echo "$qstat" | awk -v col=10 'NR > 2 {print $col}')
# kill currently running jobs
for id in $jobIDs; do
echo "kill job $id"
qdel $id
done
# restart the jobs
for id in $taskIDs; do
./cascade_job_100k.sh $id
done
echo "$taskIDs" > cascade_job_resubmit_qstat.txt
elif [ "$1" == "find" ]; then
find=$(find ../data/ana -name "*.root" -type f -size -2M)
taskIDs=""
for f in $find; do
f2="${f#*cascade_}"
id="${f2%%_*}"
taskIDs=$(printf "$taskIDs\n$id")
done
taskIDs=$(echo "$taskIDs" | tr ' ' '\n' | sort -u)
for id in $taskIDs; do
./cascade_job_100k.sh $id
done
echo "$taskIDs" > cascade_job_resubmit_find.txt
elif [ "$1" == "missing" ]; then
# retrieve further information for the range
maxID=100
if test "$2" != ""; then
if [[ $2 =~ ^[0-9]+$ ]]; then
maxID=$2
fi
fi
# get the list of IDs that ran through
find=$(find ../data/ana -name "*.root" -type f)
completedIDs=""
for f in $find; do
f2="${f#*cascade_}"
id="${f2%%_*}"
completedIDs=$(printf "$completedIDs\n$id")
done
completedIDs=$(echo "$completedIDs" | tr " " "\n" | sort -un)
# find the gaps
lastID=0
gapIDs=""
for id in $completedIDs; do
if (( $id > $maxID )); then
break
fi
if (( $id != $lastID + 1 )); then
if (( $lastID + 2 == $id )); then
gapIDs=$(printf "$gapIDs\n$(( $lastID + 1 ))")
else
gapIDs=$(printf "$gapIDs\n$(( $lastID + 1 ))-$(( $id - 1 ))")
fi
fi
lastID=$id
done
# catch IDs in the gap between last and maximum
if (( $lastID < $maxID )); then
if (( $lastID + 1 == $maxID )); then
gapIDs=$(printf "$gapIDs\n$(( $lastID + 1 ))")
else
gapIDs=$(printf "$gapIDs\n$(( $lastID + 1 ))-$(( $maxID ))")
fi
fi
# call the job control
for id in $gapIDs; do
./cascade_job_100k.sh $id
done
echo "$gapIDs" > cascade_job_resubmit_missing.txt
fi
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment