Skip to content

Instantly share code, notes, and snippets.

@cabb99
Last active May 19, 2023 01:10
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save cabb99/e7cd08955907de50754a53b58c71ae24 to your computer and use it in GitHub Desktop.
Save cabb99/e7cd08955907de50754a53b58c71ae24 to your computer and use it in GitHub Desktop.
Submit a list of SLURM jobs with a maximum number of simultaneous jobs, taking into account previously submitted jobs.
#!/bin/bash
print_usage() {
echo "Usage: $0 [--max-jobs <max_jobs>] [--user <username>] [--help|-h] <script_path1> <script_path2> ... <script_pathN>"
echo
echo "This script submits a list of SLURM jobs with a maximum number of simultaneous jobs, taking into account previously submitted jobs."
echo
echo "Options:"
echo " --max-jobs <max_jobs> Set the maximum number of simultaneous jobs (default: 10)"
echo " --user <username> Set the user name for checking the SLURM queue (default: current user)"
echo " --help, -h Show this help message and exit"
echo
echo "Arguments:"
echo " script_path Path to the SLURM script to be submitted"
}
# Default maximum number of simultaneous jobs
max_jobs=10
user="$USER"
# Check if the input arguments are provided
if [ "$#" -lt 1 ]; then
print_usage
exit 1
fi
# Parse command line arguments
while [[ "$#" -gt 0 ]]; do
if [[ "$1" =~ ^--.* ]]; then
case $1 in
--max-jobs)
if ! [[ "$2" =~ ^[0-9]+$ ]]; then
echo "Error: --max-jobs value must be a positive integer."
exit 1
fi
max_jobs="$2"
shift 2
;;
--user)
user="$2"
shift 2
;;
--help|-h)
print_usage
exit 0
;;
*)
echo "Error: Unrecognized option '$1'"
print_usage
exit 1
;;
esac
else
script_paths+=("$1")
shift
fi
done
# Check if any script paths were provided
if [[ ${#script_paths[@]} -eq 0 ]]; then
echo "Error: No script paths provided."
print_usage
exit 1
fi
# Get the list of job IDs from the SLURM queue for the specified user
job_ids_queue=$(squeue -u "$user" --noheader --format="%i")
# Process job IDs to remove array ID part and find the max_jobs largest job IDs
if [ -n "$job_ids_queue" ]; then
processed_job_ids=$(echo "$job_ids_queue" | awk -F '_' '{print $1}' | sort -nu)
largest_job_ids=$(echo "$processed_job_ids" | tail -n "$max_jobs")
dependency_list=$(echo "$largest_job_ids" | paste -sd "," -)
else
dependency_list=""
fi
# Initialize the job counter with the number of jobs in the dependency list
if [ -n "$dependency_list" ]; then
job_counter=$(echo "$dependency_list" | grep -o ',' | wc -l)
job_counter=$((job_counter + 1))
else
job_counter=0
fi
# Initialize an array to store the scripts that failed to submit
failed_scripts=()
# Iterate through the script paths and submit the SLURM jobs
for script_path in "${script_paths[@]}"; do
# Check if the script exists
if [ ! -f "$script_path" ]; then
echo "Error: Script not found at $script_path"
continue
fi
# Go to the script's folder
script_dir=$(dirname "$script_path")
pushd "$script_dir" >/dev/null
# Submit the job using sbatch with dependencies on the max_jobs largest job IDs, if any
echo "Submitting job for script $script_path"
if [ "$job_counter" -ge "$max_jobs" ] && [ -n "$dependency_list" ]; then
dependency_job=$(echo "$dependency_list" | cut -d',' -f1)
if [ -n "$dependency_job" ]; then
echo " with dependencies on job IDs: $dependency_job"
sbatch_output=$(sbatch --dependency=afterany:"$dependency_job" "$(basename "$script_path")")
else
sbatch_output=$(sbatch "$(basename "$script_path")")
fi
else
sbatch_output=$(sbatch "$(basename "$script_path")")
fi
sbatch_exit_code=$?
if [ "$sbatch_exit_code" -eq 0 ]; then
job_id=$(echo "$sbatch_output" | awk '{print $NF}')
echo " submitted with job ID: $job_id"
else
echo "Error: Job submission failed for script $script_path"
echo "$sbatch_output"
failed_scripts+=("$script_path")
popd >/dev/null
continue
fi
# Increment the job counter
job_counter=$((job_counter + 1))
# Update the dependency list with the new job ID
if [ -n "$dependency_list" ]; then
dependency_list="$dependency_list,$job_id"
else
dependency_list="$job_id"
fi
# If the job counter exceeds max_jobs, remove the oldest job ID from the dependency list
if [ "$job_counter" -gt "$max_jobs" ]; then
dependency_list=$(echo "$dependency_list" | awk -F ',' -v max_jobs="$max_jobs" '{for (i=(NF-max_jobs+1>1?NF-max_jobs+1:1); i<=NF; i++) printf "%s%s", $i, (i==NF?"":",")}')
fi
# Return to the previous working directory
popd >/dev/null
done
# Check if any scripts failed to submit
if [ ${#failed_scripts[@]} -ne 0 ]; then
echo "Error: The following scripts failed to submit:"
for failed_script in "${failed_scripts[@]}"; do
echo " $failed_script"
done
exit 1
else
echo "All jobs submitted."
fi
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment