Created
June 4, 2025 06:52
-
-
Save pvs2401/5e1f4ab8a715b27be3f2630bbc993795 to your computer and use it in GitHub Desktop.
Slurm post run script to write the JCT to a file which can be read by telegraf for plotting on Grafana
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
job_id=$SLURM_JOB_ID | |
# Get required job info | |
job_info=$(sacct -j "$job_id" --format=JobID,JobName,User,Partition,Elapsed -n -P | head -n 1) | |
IFS='|' read -r id name user partition elapsed <<< "$job_info" | |
# Convert elapsed time to seconds | |
if [[ "$elapsed" =~ ^([0-9]+):([0-9]+):([0-9]+)$ ]]; then | |
h=${BASH_REMATCH[1]} | |
m=${BASH_REMATCH[2]} | |
s=${BASH_REMATCH[3]} | |
elif [[ "$elapsed" =~ ^([0-9]+)-([0-9]+):([0-9]+):([0-9]+)$ ]]; then | |
# Handle day-hour:min:sec format | |
d=${BASH_REMATCH[1]} | |
h=${BASH_REMATCH[2]} | |
m=${BASH_REMATCH[3]} | |
s=${BASH_REMATCH[4]} | |
h=$((d * 24 + h)) | |
else | |
echo "Invalid elapsed format: $elapsed" >&2 | |
exit 1 | |
fi | |
elapsed_seconds=$((10#$h * 3600 + 10#$m * 60 + 10#$s)) | |
# Output file | |
[ ! -d /tmp/slurm ] && mkdir -p /tmp/slurm | |
[ "$(stat -c '%a' /tmp/slurm)" != "755" ] && chmod 755 /tmp/slurm | |
echo "job_metrics,job_id=$id,job_name=$name,user=$user,partition=$partition jct=$elapsed_seconds" | tee -a /tmp/slurm/postjobresult.out | |
Telegraf: | |
========= | |
[[inputs.tail]] | |
files = ["/tmp/slurm/postjobresult.out"] | |
from_beginning = false | |
data_format = "influx" | |
watch_method = "inotify" # Efficient, triggers on file changes | |
Slurm: | |
====== | |
EpilogSlurmctld=/home/[xxxxx]/scripts/post_job_script.sh |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment