Skip to content

Instantly share code, notes, and snippets.

@taoliu
Last active December 30, 2021 19:10
Show Gist options
  • Save taoliu/7a752fa5fafb10cafab90460ae271bea to your computer and use it in GitHub Desktop.
Save taoliu/7a752fa5fafb10cafab90460ae271bea to your computer and use it in GitHub Desktop.
Wrapper for submitting jobs to SLURM
#!/usr/bin/env python
# This script is a wrapper in Python to help submit jobs, that need to use CONDA environment, to SLURM.
import subprocess
import sys
import os
import time
import argparse as ap
description = "A script to get your current conda environment, to write a SBATCH script for slurm, then submit it. Example: `%(prog)s -n 2 -p 2 -e macs3 callpeaks -t t.bam -c c.bam`"
# top-level parser
argparser = ap.ArgumentParser( description = description )
argparser.add_argument( "-n", "--nodes", dest = "n_nodes", type = int,
help = "Specify number of nodes the job needs. Default: 1",
default = 1)
argparser.add_argument( "-p", "--processes", dest = "n_processes", type = int,
help = "Specify number of processes PER NODE the job needs. The total 'CPUs' that the job will use is n_nodes x n_processes. Default: 1",
default = 1)
argparser.add_argument( "-m", "--mem", dest = "m_mem", type = int,
help = "Specify the requirement of memory size of the node in MBytes. Default is 8G. Default: 8000",
default = 8000)
argparser.add_argument( "-c", "--conda", dest = "conda_env", type = str,
help = "By default, this script will automatically retrieve the current conda environment. However, if you want to specify a different conda environment that the job needs to run based on, you can use this option.",
default = None)
argparser.add_argument( "-d", "--wd", dest = "working_directory", type = str,
help = "By default, this script will let the job run inside the current directory. However, if you want to specify a different working directory, please use this option.",
default = None)
argparser.add_argument( "-e", "--exec", dest = "command", type = str, nargs = ap.REMAINDER, required = True,
help = "The whole command line of the job. If there is special chraracter in the command, please use `\"` to quote the entire command. For example, use `-e \"runthis \> savethis.txt\"`. This is a required option. Also, this should be the last argument for `%(prog)s`, since the entire string after `-e` or `--exec` will be taken as the command line of the job.")
args = argparser.parse_args()
time_stamp=str(time.time())
n_nodes = args.n_nodes
n_processes = args.n_processes
m_mem = args.m_mem
conda_env = args.conda_env
wd = args.working_directory
cmd = args.command
job_main_name = cmd[0].split()[0].replace('./','').replace('/','') # remove some common confusing chraracters in cmd[0]
sbatch_filename = "slurm_"+time_stamp+".sh"
sbatch_output_filename = "slurm_"+time_stamp+".stdout"
sbatch_error_filename = "slurm_"+time_stamp+".stderr"
# get working directory
if not wd:
wd = os.getcwd()
# get env name of conda
if not conda_env:
conda_env = os.getenv("CONDA_DEFAULT_ENV")
conda_exe = os.getenv("CONDA_EXE")
conda_bin = os.path.dirname(conda_exe) # the bin directory
job_name = job_main_name + "_" + time_stamp
entire_command_line = " ".join( cmd )
slurm_template = f"""#!/bin/bash
#SBATCH --clusters=faculty --partition=rpci --qos=rpci
##SBATCH --time=00:15:00
#SBATCH --nodes={n_nodes}
#SBATCH --ntasks-per-node={n_processes}
#SBATCH --mem={m_mem}
#SBATCH --job-name={job_name}
#SBATCH --output={wd}/{sbatch_output_filename}
#SBATCH --error={wd}/{sbatch_error_filename}
# load modules
#module load ...
# customized python environment
# >>> conda initialize >>>
__conda_setup="$('{conda_exe}' 'shell.bash' 'hook' 2> /dev/null)"
if [ $? -eq 0 ]; then
eval "$__conda_setup"
else
export PATH="{conda_bin}:$PATH"
fi
unset __conda_setup
conda activate {conda_env}
# customized env variables
# scripts to be run
cd {wd}
{entire_command_line}
wait;
"""
# Write into the sbatch file
f = open( sbatch_filename, "w" )
f.write( slurm_template )
f.close()
# Submit job to SLURM
subprocess.check_call(['sbatch', sbatch_filename])
print(f"Job Submitted from {sbatch_filename}")
@taoliu
Copy link
Author

taoliu commented Dec 30, 2021

usage: wrapToSlurm [-h] [-n N_NODES] [-p N_PROCESSES] [-m M_MEM] [-c CONDA_ENV] [-d WORKING_DIRECTORY] -e ...

A script to get your current conda environment, to write a SBATCH script for slurm, then submit it. Example: `wrapToSlurm
-n 2 -p 2 -e macs3 callpeaks -t t.bam -c c.bam`

optional arguments:
  -h, --help            show this help message and exit
  -n N_NODES, --nodes N_NODES
                        Specify number of nodes the job needs. Default: 1
  -p N_PROCESSES, --processes N_PROCESSES
                        Specify number of processes PER NODE the job needs. The total 'CPUs' that the job will use is
                        n_nodes x n_processes. Default: 1
  -m M_MEM, --mem M_MEM
                        Specify the requirement of memory size of the node in MBytes. Default is 8G. Default: 8000
  -c CONDA_ENV, --conda CONDA_ENV
                        By default, this script will automatically retrieve the current conda environment. However, if
                        you want to specify a different conda environment that the job needs to run based on, you can use
                        this option.
  -d WORKING_DIRECTORY, --wd WORKING_DIRECTORY
                        By default, this script will let the job run inside the current directory. However, if you want
                        to specify a different working directory, please use this option.
  -e ..., --exec ...    The whole command line of the job. If there is special chraracter in the command, please use `"`
                        to quote the entire command. For example, use `-e "runthis \> savethis.txt"`. This is a required
                        option. Also, this should be the last argument for `wrapToSlurm`, since the entire string after
                        `-e` or `--exec` will be taken as the command line of the job.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment