Last active
February 19, 2024 14:59
-
-
Save vuillaut/6220f3cf46769dbcf617026313d8ecb4 to your computer and use it in GitHub Desktop.
SLURM job dependencies status checker.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
This script get the dependencies of a job and list all of them that are not COMPLETED | |
""" | |
import subprocess | |
import re | |
import argparse | |
# Create the argument parser | |
parser = argparse.ArgumentParser(description="Check the state of Slurm job dependencies.") | |
parser.add_argument("job_id", type=str, help="The Job ID for which to check dependencies.") | |
# Parse the arguments | |
args = parser.parse_args() | |
# Now you can use args.job_id where you previously used a hardcoded job_id value | |
job_id = args.job_id | |
def get_dependency_string(job_id): | |
# Use scontrol to get job details, specifically looking for the Dependency line | |
command = f"scontrol show job {job_id}" | |
result = subprocess.run(command, shell=True, capture_output=True, text=True) | |
job_info = result.stdout | |
# Find the Dependency line and extract the value | |
dependency_line = [line for line in job_info.split('\n') if "Dependency" in line] | |
if dependency_line: | |
dependencies = dependency_line[0] | |
return dependencies | |
return "" | |
def parse_dependency_ids(dependency_string): | |
# Extract job IDs from the dependency string | |
# The pattern captures numeric job IDs following 'afterok:' and before '_*' if present | |
pattern = r'afterok:(\d+)_*\**' | |
return re.findall(pattern, dependency_string) | |
def check_dependency_states(dependency_ids): | |
not_completed = [] | |
for dep_id in dependency_ids: | |
# Use sacct to get the state of the dependency | |
command = f"sacct -j {dep_id} --format=State --noheader" | |
result = subprocess.run(command, shell=True, capture_output=True, text=True) | |
state = result.stdout.strip().split('\n')[0] # Take the first state result if multiple lines | |
if "COMPLETED" not in state: | |
not_completed.append((dep_id, state)) | |
return not_completed | |
dependency_string = get_dependency_string(job_id) | |
if dependency_string: | |
dependency_ids = parse_dependency_ids(dependency_string) | |
not_completed_dependencies = check_dependency_states(dependency_ids) | |
if not_completed_dependencies: | |
for dep, state in not_completed_dependencies: | |
print(f"Dependency {dep} is in state {state}") | |
else: | |
print("All dependencies are completed.") | |
else: | |
print("No dependencies found.") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment