Skip to content

Instantly share code, notes, and snippets.

@vuillaut
Last active February 19, 2024 14:59
Show Gist options
  • Save vuillaut/6220f3cf46769dbcf617026313d8ecb4 to your computer and use it in GitHub Desktop.
Save vuillaut/6220f3cf46769dbcf617026313d8ecb4 to your computer and use it in GitHub Desktop.
SLURM job dependencies status checker.
"""
This script get the dependencies of a job and list all of them that are not COMPLETED
"""
import subprocess
import re
import argparse
# Create the argument parser
parser = argparse.ArgumentParser(description="Check the state of Slurm job dependencies.")
parser.add_argument("job_id", type=str, help="The Job ID for which to check dependencies.")
# Parse the arguments
args = parser.parse_args()
# Now you can use args.job_id where you previously used a hardcoded job_id value
job_id = args.job_id
def get_dependency_string(job_id):
# Use scontrol to get job details, specifically looking for the Dependency line
command = f"scontrol show job {job_id}"
result = subprocess.run(command, shell=True, capture_output=True, text=True)
job_info = result.stdout
# Find the Dependency line and extract the value
dependency_line = [line for line in job_info.split('\n') if "Dependency" in line]
if dependency_line:
dependencies = dependency_line[0]
return dependencies
return ""
def parse_dependency_ids(dependency_string):
# Extract job IDs from the dependency string
# The pattern captures numeric job IDs following 'afterok:' and before '_*' if present
pattern = r'afterok:(\d+)_*\**'
return re.findall(pattern, dependency_string)
def check_dependency_states(dependency_ids):
not_completed = []
for dep_id in dependency_ids:
# Use sacct to get the state of the dependency
command = f"sacct -j {dep_id} --format=State --noheader"
result = subprocess.run(command, shell=True, capture_output=True, text=True)
state = result.stdout.strip().split('\n')[0] # Take the first state result if multiple lines
if "COMPLETED" not in state:
not_completed.append((dep_id, state))
return not_completed
dependency_string = get_dependency_string(job_id)
if dependency_string:
dependency_ids = parse_dependency_ids(dependency_string)
not_completed_dependencies = check_dependency_states(dependency_ids)
if not_completed_dependencies:
for dep, state in not_completed_dependencies:
print(f"Dependency {dep} is in state {state}")
else:
print("All dependencies are completed.")
else:
print("No dependencies found.")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment